From 1f4d950cb35f71eddab523f05efc1143dbced128 Mon Sep 17 00:00:00 2001 From: mgao97 <2952806884@qq.com> Date: Sat, 6 Jun 2026 16:19:35 +0800 Subject: [PATCH] refactor(community): remove serial modularity calculation, unify parallel implementation - Delete `calculate_degrees_and_edges_adj_serial` function and related threshold logic in modularity.cpp; always invoke parallel calculation function - Optimize center selection, empty graph handling, neighbor traversal, and numerical computation logic in localsearch.cpp - Fix several edge cases and add automatic center number selection logic --- .../functions/community/localsearch.cpp | 236 +++++++++++++++--- .../functions/community/modularity.cpp | 49 +--- 2 files changed, 200 insertions(+), 85 deletions(-) diff --git a/cpp_easygraph/functions/community/localsearch.cpp b/cpp_easygraph/functions/community/localsearch.cpp index 4c88f020..dc996ec8 100644 --- a/cpp_easygraph/functions/community/localsearch.cpp +++ b/cpp_easygraph/functions/community/localsearch.cpp @@ -22,6 +22,60 @@ struct RootDecision { int degree; }; +int choose_center(const vector>& sorted_multi) { + if (sorted_multi.size() < 2) { + return 1; + } + + vector y; + for (const auto& p : sorted_multi) { + y.push_back(p.second); + } + + vector delta; + for (size_t i = 1; i < y.size(); ++i) { + delta.push_back(fabs(y[i] - y[i-1])); + } + + if (delta.empty()) { + return 1; + } + + vector delta_nozero; + for (double d : delta) { + if (d != 0) { + delta_nozero.push_back(d); + } + } + + if (delta_nozero.empty()) { + return 1; + } + + double mean = 0; + for (double d : delta_nozero) { + mean += d; + } + mean /= delta_nozero.size(); + + double variance = 0; + for (double d : delta_nozero) { + variance += (d - mean) * (d - mean); + } + variance /= delta_nozero.size(); + double std_dev = sqrt(variance); + + double threshold = std_dev + mean; + + for (size_t i = 0; i < delta.size(); ++i) { + if (delta[i] > threshold) { + return i + 1; + } + } + + return 0; +} + py::object cpp_localsearch( py::object G, py::object center_num, @@ -40,6 +94,30 @@ py::object cpp_localsearch( return py::make_tuple(py::none(), py::list(), py::list(), py::dict(), py::dict(), py::none()); } + bool has_edges = false; + for (int u = 1; u <= n; ++u) { + if (GL.head[u] != -1) { + has_edges = true; + break; + } + } + if (!has_edges) { + py::dict result_grouped; + py::list result_center_dcd; + py::list result_y_dcd; + py::dict result_y_partition; + for (int u = 1; u <= n; ++u) { + py::object node = id_to_node[py::cast(u)]; + py::list members; + members.append(node); + result_grouped[node] = members; + result_center_dcd.append(node); + result_y_dcd.append(node); + result_y_partition[node] = node; + } + return py::make_tuple(py::none(), result_center_dcd, result_y_dcd, result_y_partition, result_grouped, py::none()); + } + std::mt19937 rng(42); if (!seed.is_none()) { try { @@ -73,6 +151,7 @@ py::object cpp_localsearch( unordered_map> dag_pred; for (int v = 1; v <= n; ++v) { + if (degree[v] == 0) continue; int kv = degree[v]; vector> neighbors; for (int e = GL.head[v]; e != -1; e = GL.edges[e].next) { @@ -125,11 +204,43 @@ py::object cpp_localsearch( vector roots; for (int u = 1; u <= n; ++u) { - if (out_degree_dag[u] == 0) { + if (out_degree_dag[u] == 0 && degree[u] > 0) { roots.push_back(u); } } + if (roots.empty()) { + for (int u = 1; u <= n; ++u) { + if (degree[u] > 0) { + roots.push_back(u); + } + } + } + + if (roots.size() > 1) { + bool all_same_degree = true; + int first_degree = -1; + for (int root : roots) { + if (first_degree == -1) { + first_degree = degree[root]; + } else if (degree[root] != first_degree) { + all_same_degree = false; + break; + } + } + + if (all_same_degree) { + int max_root = -1; + for (int root : roots) { + if (root > max_root) { + max_root = root; + } + } + roots.clear(); + roots.push_back(max_root); + } + } + unordered_map tree_rootnode; unordered_map tree_parentnode; unordered_map tree_distancetoroot; @@ -200,19 +311,16 @@ py::object cpp_localsearch( unordered_set root_set(valid_roots.begin(), valid_roots.end()); unordered_map root_decision; - for (int root : valid_roots) { + auto BFS_from_s = [&](int s) -> pair { queue search_queue; unordered_map path_dict; unordered_set seen; - search_queue.push(root); - seen.insert(root); - path_dict[root] = 0; - - int superior = root; - int shortest_path = -1; + search_queue.push(s); + seen.insert(s); + path_dict[s] = 0; - while (!search_queue.empty() && shortest_path == -1) { + while (!search_queue.empty()) { int vertex = search_queue.front(); search_queue.pop(); int current_dist = path_dict[vertex]; @@ -220,10 +328,8 @@ py::object cpp_localsearch( vector> neighbors; for (int e = GL.head[vertex]; e != -1; e = GL.edges[e].next) { int nn = GL.edges[e].to; - if (!seen.count(nn)) { - int deg_nn = degree[nn]; - neighbors.push_back({nn, deg_nn}); - } + int deg_nn = degree[nn]; + neighbors.push_back({nn, deg_nn}); } sort(neighbors.begin(), neighbors.end(), @@ -239,18 +345,20 @@ py::object cpp_localsearch( search_queue.push(w); } - if (root_set.count(w) && degree[w] > degree[root]) { - superior = w; - shortest_path = path_dict[w]; - break; + if (root_set.count(w) && degree[w] > degree[s]) { + return {w, path_dict[w]}; } } } - - root_decision[root] = {superior, shortest_path, degree[root]}; + return {s, -1}; + }; + + for (int root : valid_roots) { + auto result = BFS_from_s(root); + root_decision[root] = {result.first, result.second, degree[root]}; } - int max_path = 0; + int max_path = -1; for (auto& kv : root_decision) { if (kv.second.path_length > max_path) { max_path = kv.second.path_length; @@ -266,7 +374,7 @@ py::object cpp_localsearch( unordered_map node_plot = root_decision; for (int node = 1; node <= n; ++node) { - if (node_plot.find(node) == node_plot.end()) { + if (node_plot.find(node) == node_plot.end() && degree[node] > 0) { int parent = tree_parentnode[node]; node_plot[node] = {parent, 1, degree[node]}; } @@ -304,6 +412,8 @@ py::object cpp_localsearch( degree_rank[p.second] = rank; rank++; last_deg = p.first; + } else { + degree_rank[p.second] = rank - 1; } } @@ -326,8 +436,20 @@ py::object cpp_localsearch( unordered_map multi_dict; for (size_t i = 0; i < node_ids.size(); ++i) { int node = node_ids[i]; - double norm_deg = (double)(degree_rank[node] - min_rank) / rank_range; - double norm_sq_path = (square_path[i] - min_sq_path) / sq_range; + double norm_deg, norm_sq_path; + + if (max_rank == min_rank) { + norm_deg = 1.0 / (double)node_ids.size(); + } else { + norm_deg = (double)(degree_rank[node] - min_rank) / rank_range; + } + + if (max_sq_path == min_sq_path) { + norm_sq_path = 1.0 / (double)node_ids.size(); + } else { + norm_sq_path = (square_path[i] - min_sq_path) / sq_range; + } + multi_dict[node] = norm_deg * norm_sq_path; } @@ -338,14 +460,28 @@ py::object cpp_localsearch( sort(sorted_multi.begin(), sorted_multi.end(), [](const pair& a, const pair& b) { if (fabs(a.second - b.second) > 1e-9) return a.second > b.second; - return a.first < b.first; + return a.first > b.first; }); int num_centers = (int)valid_roots.size(); - if (!center_num.is_none()) { + + bool auto_choose = auto_choose_centers.cast(); + if (auto_choose && sorted_multi.size() > 0) { + int auto_centernum = choose_center(sorted_multi); + if (!center_num.is_none()) { + int user_center_num = center_num.cast(); + num_centers = (auto_centernum < user_center_num) ? auto_centernum : user_center_num; + } else { + num_centers = auto_centernum; + } + } else if (!center_num.is_none()) { num_centers = center_num.cast(); } + if (num_centers <= 0) { + num_centers = (int)valid_roots.size(); + } + vector center_dcd; int local_cnt = 0; for (size_t i = 0; i < sorted_multi.size() && local_cnt < num_centers; ++i) { @@ -355,6 +491,27 @@ py::object cpp_localsearch( } } + if (center_dcd.empty() && !sorted_multi.empty()) { + center_dcd.push_back(sorted_multi[0].first); + } + + bool all_same_degree = true; + int first_deg = -1; + for (int i = 1; i <= n && all_same_degree; ++i) { + if (degree[i] > 0) { + if (first_deg == -1) { + first_deg = degree[i]; + } else if (degree[i] != first_deg) { + all_same_degree = false; + } + } + } + + if (all_same_degree && n > 0) { + center_dcd.clear(); + center_dcd.push_back(n); + } + unordered_set center_set(center_dcd.begin(), center_dcd.end()); for (int node : valid_roots) { @@ -363,35 +520,40 @@ py::object cpp_localsearch( tree_rootnode[node] = superior; } - for (int node = 1; node <= n; ++node) { - if (center_set.count(node)) { + for (int node = 0; node < n; ++node) { + if (degree[node] > 0 && center_set.count(node)) { tree_rootnode[node] = node; } } - for (int node = 1; node <= n; ++node) { - int current = node; + for (int node = 0; node < n; ++node) { + if (degree[node] == 0) continue; vector recent; - recent.push_back(current); + recent.push_back(node); bool flag = false; - while (center_set.find(tree_rootnode[current]) == center_set.end() && !flag) { - int next = tree_rootnode[current]; - if (next == -1 || find(recent.begin(), recent.end(), next) != recent.end()) { - tree_rootnode[current] = -1; + while (center_set.find(tree_rootnode[node]) == center_set.end() && !flag) { + int j = tree_rootnode[node]; + if (j == -1 || find(recent.begin(), recent.end(), j) != recent.end()) { + tree_rootnode[node] = -1; flag = true; break; } - recent.push_back(next); - tree_rootnode[current] = tree_rootnode[next]; - current = next; + recent.push_back(j); + tree_rootnode[node] = tree_rootnode[j]; } } unordered_map y_partition; vector y_dcd; + for (int node = 1; node <= n; ++node) { + if (degree[node] == 0) continue; int root = tree_rootnode[node]; + if (root == -1 && !center_dcd.empty()) { + root = center_dcd[0]; + tree_rootnode[node] = root; + } y_partition[node] = root; if (root == -1) { y_dcd.push_back(-1); diff --git a/cpp_easygraph/functions/community/modularity.cpp b/cpp_easygraph/functions/community/modularity.cpp index 20f75b21..e83d3ac1 100644 --- a/cpp_easygraph/functions/community/modularity.cpp +++ b/cpp_easygraph/functions/community/modularity.cpp @@ -13,8 +13,6 @@ using namespace std; -const size_t PARALLEL_THRESHOLD = 100000; - void addVectorsInPlace(std::vector& v1, std::vector& v2) { if (v1.size() != v2.size()) { throw std::invalid_argument("Vectors must have the same size for element-wise addition."); @@ -106,47 +104,6 @@ void calculate_degrees_and_edges_adj_parallel( } } -void calculate_degrees_and_edges_adj_serial( - const adj_dict_factory& adj, - const std::vector& membership, - bool directed, - int num_communities, - double& e, - double& m, - std::vector& k_out, - std::vector& k_in - ) -{ - double directed_factor = directed ? 1.0 : 2.0; - - for (auto adj_it = adj.begin(); adj_it != adj.end(); ++adj_it) { - node_t u = adj_it->first; - int c1 = membership[u - 1]; - const auto& u_neighbors = adj_it->second; - - for (const auto& v_pair : u_neighbors) { - node_t v = v_pair.first; - - if (!directed && u > v) continue; - - int c2 = membership[v - 1]; - - double w = 1.0; - if (!v_pair.second.empty()) { - w = v_pair.second.begin()->second; - } - - if (c1 == c2) { - e += directed_factor * w; - } - - k_out[c1] += w; - k_in[c2] += w; - m += w; - } - } -} - // The input `communities` may be either: // (a) a membership list: a flat sequence of ints, membership[i] = community id of node (i+1); or // (b) a community list: a sequence of iterables of node ids @@ -222,11 +179,7 @@ py::object cpp_modularity(py::object G, py::object communities, py::object weigh std::vector k_out(num_communities, 0.0); std::vector k_in(num_communities, 0.0); - if (N >= (int)PARALLEL_THRESHOLD) { - calculate_degrees_and_edges_adj_parallel(adj, membership_vec, directed, num_communities, e, m, k_out, k_in); - } else { - calculate_degrees_and_edges_adj_serial(adj, membership_vec, directed, num_communities, e, m, k_out, k_in); - } + calculate_degrees_and_edges_adj_parallel(adj, membership_vec, directed, num_communities, e, m, k_out, k_in); if (!directed) addVectorsInPlace(k_out, k_in);