Minimum Spinning Tree – FighterNan's Blog

Summary

A spanning tree of some graph G is a subgraph that is a tree which includes all of the vertices of G, with a minimum possible number of edges.

A Minimum Spanning Tree (MST) is a spanning tree whose sum of edge weights is as small as possible.

Kruskal’s Algorithm

Please refer to this page for its concepts and pseudo codes.

The following code is implemented with disjoint-set (Union Find) data structure. The time complexity is O(ElogE).

struct UnionFind {
    vector<int> parent;
    vector<int> size;
    int total_group;
    UnionFind(int n) {
        total_group = n;
        for (int i = 0; i < n; ++i) {
            parent.push_back(i);
            size.push_back(1);
        }
    }
    int Find(int i) {
        if (i != parent[i]) {
            parent[i] = Find(parent[i]);
        }
        return parent[i];
    }
    void Union(int i, int j) {
        int root1 = Find(i), root2 = Find(j);
        if (root1 != root2) {
            total_group--;
            size[root1] += size[root2];
            size[root2] = 0;
            parent[root2] = root1;
        }
    }
};

struct Edge {
    int from = 0, to = 0, cost = 0;
    bool operator< (Edge const &e) const {
        return cost < e.cost;
    }
};

pair<int, vector<Edge>> Kruskal(vector<Edge> &edges, int n) {
    sort(edges.begin(), edges.end());

    UnionFind uf(n);

    int cost = 0;
    vector<Edge> choosed;

    for (auto const &e : edges) {
        if (uf.Find(e.from) != uf.Find(e.to)) {
            uf.Union(e.from, e.to);
            cost += e.cost;
            choosed.push_back(e);
        }
    }
    return {cost, choosed};
}

Prim’s Algorithm

This algorithm is very similar to the Dijkstra algorithm I introduced in this post.

We use a priority queue to stores the potential nodes we could visit and its current cost . Note that the cost here is not from the initial point to the node. It represents the cost to include the node into the MST.

We randomly choose a point to start and greedily choose the node with lowest cost to visit.

In each iteration, the cost we get is guaranteed to be the lowest cost to bring in the current node, which is a property of greedy (the first visit will always have smaller cost than other costs in queue). Note that: Not like BFS, where we can mark node as visited when we push them in the queue, a node is marked as visited (gains its lowest cost) only when it gets out of the queue.

We then visit the newly introduced edges regarding the current node and update the lowest cost in the queue (to guarantee there are no duplicate nodes in the queue).

The size of queue will be at most O(V); each node might have at most O(V) edges; the graph has O(E) edges, we visit each node once. So the time complexity will be O((E+V)logV).

pair<int, vector<Edge>> Prim(unordered_map<int, vector<Edge>> adj_list, int n, int start_point) {

    multiset<Edge> s;
    unordered_map<int, int> node_cheapest_cost;
    unordered_map<int, multiset<Edge>::iterator> node_s_iter;

    int cost = 0;
    vector<Edge> choosed;

    node_cheapest_cost[start_point] = 0;
    for (auto const &e : adj_list[start_point]) {
        node_s_iter[e.to] = s.insert(e);
    }

    while (s.size() > 0) {
        auto curr_e = *s.begin();
        s.erase(s.begin());
        // update iterator map when updating multiset
        node_s_iter.erase(curr_e.to);

        cost += curr_e.cost;
        node_cheapest_cost[curr_e.to] = curr_e.cost;
        choosed.push_back(curr_e);
        for (auto const &next_e : adj_list[curr_e.to]) {
            // not a must
            if (node_cheapest_cost.count(next_e.to)) {
                continue;
            }
            if (node_s_iter.count(next_e.to)) {
                if (node_s_iter[next_e.to]->cost > next_e.cost) {
                    s.erase(node_s_iter[next_e.to]);
                    node_s_iter[next_e.to] = s.insert(next_e);
                }
            } else {
                node_s_iter[next_e.to] = s.insert(next_e);
            }
        }
    }

    return {cost, choosed};
}

Many lazy deletion implementation actually has O((E+V)logE) complexity, but it is simpler to implement.

pair<int, vector<Edge>> Prim(unordered_map<int, vector<Edge>> adj_list, int n, int start_point) {

    multiset<Edge> s;
    vector<int> node_cheapest_cost(n, INT_MAX);

    int cost = 0;
    vector<Edge> choosed;

    node_cheapest_cost[start_point] = 0;
    for (auto const &e : adj_list[start_point]) {
        s.insert(e);
    }

    while (s.size() > 0) {
        auto curr_e = *s.begin();
        s.erase(s.begin());
        // lazy deletion
        if (node_cheapest_cost[curr_e.to] < INT_MAX) {
            continue;
        }
        cost += curr_e.cost;
        node_cheapest_cost[curr_e.to] = curr_e.cost;
        choosed.push_back(curr_e);
        for (auto const &next_e : adj_list[curr_e.to]) {
            // This is a must, otherwise it leads to an infinite queue in any undirected graphs
            if (next_e.cost < node_cheapest_cost[next_e.to]) {
                s.insert(next_e);
            }
        }
    }

    return {cost, choosed};
}

Note that, if the number of edges are much larger than the number of nodes (e.g. complete graph). Brute force searching the cheapest edge O(V^2) might be better than using priority queues.

int minCostConnectPoints(vector<vector<int>>& points) {
    int n = points.size();
    ll cost = 0;

    vector<bool> visited(n, false);
    vector<int> min_cost_to_join(n, INT_MAX);
    vector<vector<int>> cost_table(n, vector<int>(n, 0));
    int node_joined = 0;

    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < n; ++j) {
            cost_table[i][j] = GetCost(points[i], points[j]);
        }
    }
    min_cost_to_join[0] = 0;

    while (node_joined < n) {
        int curr_min_dist = INT_MAX;
        int curr_min_node = -1;
        for (int i = 0; i < n; ++i) {
            if (!visited[i] && min_cost_to_join[i] < curr_min_dist) {
                curr_min_dist = min_cost_to_join[i];
                curr_min_node = i;
            }
        }

        cost += curr_min_dist;
        visited[curr_min_node] = true;
        node_joined++;

        for (int i = 0; i < n; ++i) {
            if (!visited[i]) {
                min_cost_to_join[i] = min(min_cost_to_join[i], cost_table[curr_min_node][i]);
            }
        }
    }
    return cost;
}

Summary

Kruskal’s Algorithm

Prim’s Algorithm

Leave a Reply Cancel reply