Skip to content

Commit 39868a2

Browse files
committed
added return for the number of reachable nodes
1 parent d076638 commit 39868a2

2 files changed

Lines changed: 136 additions & 41 deletions

File tree

experimental/algorithm/LAGr_HarmonicCentrality.c

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
// The LAGr_HarmonicCentrality algorithm calculates an estimate of the "harmonic
1818
// centrality" of all nodes in a graph. Harmonic Centrality is defined on a non-
1919
// weighted graph as follows:
20-
// HC(u) = \sum_{v\in neighbors(u)} d(u,v)
20+
// HC(u) = \sum_{v \ne u} 1/d(u,v)
2121
// where d(u,v) is the shortest path distance from u to v.
2222
//
2323
// HyperLogLog allows us to estimate the cardinality of the subsets which each
@@ -47,16 +47,10 @@
4747
#include "LG_internal.h"
4848

4949
#define CENTRALITY_MAX_ITER 100
50-
51-
// WARNING: these macros are not recognized by the JIT, so have to be changed
52-
// manually in the functions below
5350
#define HLL_P 10 // Precision
5451
#define HLL_REGISTERS (1 << HLL_P) // number of registers
5552

56-
// register a function (or type) and the jit string for its definition
57-
#define JIT_STR(f, var) static char *var = #f; f
58-
59-
JIT_STR(
53+
LG_JIT_STRING(
6054
typedef struct {
6155
uint8_t registers[(1 << 10)];
6256
} HLL;
@@ -84,8 +78,9 @@ static __inline void _hll_add_hash(HLL *hll, uint32_t hash) {
8478
}
8579
}
8680

87-
double _hll_count(const HLL *hll) {
88-
uint32_t i;
81+
LG_JIT_STRING (
82+
void lg_hll_count(double *z, const HLL *x) {
83+
const HLL *hll = x;
8984

9085
double alpha_mm = 0.7213 / (1.0 + 1.079 / (double)HLL_REGISTERS);
9186

@@ -101,7 +96,7 @@ double _hll_count(const HLL *hll) {
10196
if (estimate <= 5.0 / 2.0 * (double)HLL_REGISTERS) {
10297
int zeros = 0;
10398

104-
for (i = 0; i < HLL_REGISTERS; i++)
99+
for (uint32_t i = 0; i < HLL_REGISTERS; i++)
105100
zeros += (hll->registers[i] == 0);
106101

107102
if (zeros)
@@ -111,8 +106,8 @@ double _hll_count(const HLL *hll) {
111106
estimate = -4294967296.0 * log(1.0 - (estimate / 4294967296.0)) ;
112107
}
113108

114-
return estimate;
115-
}
109+
*z = estimate;
110+
}, LG_HLL_COUNT)
116111

117112
//------------------------------------------------------------------------------
118113
// GraphBLAS Ops
@@ -145,41 +140,41 @@ void lg_hll_init(HLL *z, const uint64_t *x, GrB_Index i, GrB_Index j,
145140
}
146141
}
147142

148-
JIT_STR(
143+
LG_JIT_STRING(
149144
void lg_hll_merge(HLL *z, const HLL *x, const HLL *y) {
150-
for (uint32_t i = 0; i < (1 << 10); i++) {
151-
z->registers[i] = y->registers[i] > x->registers[i] ? y->registers[i]
152-
: x->registers[i];
145+
for (uint32_t i = 0; i < HLL_REGISTERS; i++) {
146+
z->registers[i] = y->registers[i] > x->registers[i] ?
147+
y->registers[i] : x->registers[i];
153148
}
154149
},
155150
LG_HLL_MERGE_STR)
156151

157-
JIT_STR(
152+
LG_JIT_STRING(
158153
void lg_hll_delta(double *z, const HLL *x, const HLL *y) {
159154
*z = 0;
160-
bool diff = 0 != memcmp(x->registers, y->registers, (1 << 10)) ;
155+
bool diff = 0 != memcmp(x->registers, y->registers, HLL_REGISTERS) ;
161156
if (diff) {
162157
uint32_t i;
163158

164159
double alpha_mm = 0.7213 / (1.0 + 1.079 / (double)(1 << 10)) ;
165160

166-
alpha_mm *= ((double)(1 << 10) * (double)(1 << 10)) ;
161+
alpha_mm *= ((double)(HLL_REGISTERS) * (double)(HLL_REGISTERS)) ;
167162

168163
double sum = 0;
169-
for (uint32_t i = 0; i < (1 << 10); i++) {
164+
for (uint32_t i = 0; i < (HLL_REGISTERS); i++) {
170165
sum += 1.0 / (1 << x->registers[i]);
171166
}
172167

173168
double estimate = alpha_mm / sum;
174169

175-
if (estimate <= 5.0 / 2.0 * (double)(1 << 10)) {
170+
if (estimate <= 5.0 / 2.0 * (double)(HLL_REGISTERS)) {
176171
int zeros = 0;
177172

178-
for (i = 0; i < (1 << 10); i++)
173+
for (i = 0; i < (HLL_REGISTERS); i++)
179174
zeros += (x->registers[i] == 0);
180175

181176
if (zeros)
182-
estimate = (double)(1 << 10) * log((double)(1 << 10) / zeros);
177+
estimate = (double)(HLL_REGISTERS) * log((double)(HLL_REGISTERS) / zeros);
183178

184179
} else if (estimate > (1.0 / 30.0) * 4294967296.0) {
185180
estimate = -4294967296.0 * log(1.0 - (estimate / 4294967296.0)) ;
@@ -188,20 +183,20 @@ JIT_STR(
188183
*z = estimate;
189184

190185
sum = 0;
191-
for (uint32_t i = 0; i < (1 << 10); i++) {
186+
for (uint32_t i = 0; i < (HLL_REGISTERS); i++) {
192187
sum += 1.0 / (1 << y->registers[i]);
193188
}
194189

195190
estimate = alpha_mm / sum;
196191

197-
if (estimate <= 5.0 / 2.0 * (double)(1 << 10)) {
192+
if (estimate <= 5.0 / 2.0 * (double)(HLL_REGISTERS)) {
198193
int zeros = 0;
199194

200-
for (i = 0; i < (1 << 10); i++)
195+
for (i = 0; i < (HLL_REGISTERS); i++)
201196
zeros += (y->registers[i] == 0);
202197

203198
if (zeros)
204-
estimate = (double)(1 << 10) * log((double)(1 << 10) / zeros);
199+
estimate = (double)(HLL_REGISTERS) * log((double)(HLL_REGISTERS) / zeros);
205200

206201
} else if (estimate > (1.0 / 30.0) * 4294967296.0) {
207202
estimate = -4294967296.0 * log(1.0 - (estimate / 4294967296.0)) ;
@@ -211,11 +206,9 @@ JIT_STR(
211206
},
212207
LG_HLL_DELTA_STR)
213208

214-
JIT_STR(
215-
void lg_hll_second(HLL *z,
216-
bool *x, // unused
217-
const HLL *y) {
218-
memcpy(z->registers, y->registers, 1 << 10);
209+
LG_JIT_STRING(
210+
void lg_hll_second(HLL *z, bool *x, const HLL *y) {
211+
memcpy(z->registers, y->registers, sizeof(z->registers));
219212
},
220213
LG_HLL_SECOND_STR)
221214

@@ -252,12 +245,14 @@ GrB_free(&shallow_second); \
252245
GrB_free(&merge_hll_biop); \
253246
GrB_free(&merge_hll); \
254247
GrB_free(&merge_second); \
255-
GrB_free(&delta_hll);
248+
GrB_free(&delta_hll); \
249+
GrB_free(&count_hll);
256250

257251
#undef LG_FREE_ALL
258252
#define LG_FREE_ALL \
259253
LG_FREE_WORK ; \
260-
GrB_free(scores) ;
254+
GrB_free(scores) ; \
255+
GrB_free(reachable_nodes) ;
261256

262257
// compute harmonic closeness centrality estimates using HLL BFS propagation
263258
//
@@ -291,15 +286,13 @@ int LAGr_HarmonicCentrality(
291286

292287
GrB_Descriptor desc = NULL;
293288
GrB_IndexUnaryOp init_hlls = NULL;
289+
GrB_UnaryOp count_hll = NULL;
294290
GrB_BinaryOp shallow_second = NULL;
295291
GrB_BinaryOp merge_hll_biop = NULL;
296292
GrB_Monoid merge_hll = NULL;
297293
GrB_Semiring merge_second = NULL;
298294
GrB_BinaryOp delta_hll = NULL;
299295

300-
// TODO:
301-
LG_ASSERT(reachable_nodes == NULL, GrB_NOT_IMPLEMENTED);
302-
303296
LG_ASSERT(G != NULL, GrB_NULL_POINTER);
304297
LG_ASSERT(G->A != NULL, GrB_NULL_POINTER);
305298
LG_ASSERT(scores != NULL, GrB_NULL_POINTER);
@@ -327,6 +320,7 @@ int LAGr_HarmonicCentrality(
327320
node_weights, NULL)) ;
328321
LG_ASSERT_MSG(min_w > 0, GrB_INVALID_VALUE,
329322
"Negative node weights not supported");
323+
330324
// TODO: is this cap reasonable?
331325
LG_ASSERT_MSG(max_w < 1000000, GrB_NOT_IMPLEMENTED,
332326
"Node weights over 1000000 not supported");
@@ -367,6 +361,9 @@ int LAGr_HarmonicCentrality(
367361
GRB_TRY (GxB_Vector_extractTuples_Vector (
368362
NULL, flat_weight, node_weights, NULL)) ;
369363

364+
// count op
365+
GRB_TRY (GxB_UnaryOp_new (&count_hll, (GxB_unary_function) lg_hll_count,
366+
GrB_FP64, hll_t, "lg_hll_count", LG_HLL_COUNT)) ;
370367

371368
// init op: weight (INT64) at row index i → HLL seeded with 'weight' hashes
372369
GRB_TRY(GrB_IndexUnaryOp_new(
@@ -459,6 +456,17 @@ int LAGr_HarmonicCentrality(
459456
score_cont->iso = false;
460457
score_cont->x = flat_scores;
461458
flat_scores = NULL ;
459+
460+
if (reachable_nodes) {
461+
// make reachable_nodes vector with same sparsity pattern as scores
462+
GRB_TRY (GrB_Vector_new (reachable_nodes, GrB_FP64, nrows)) ;
463+
GRB_TRY (GrB_apply (delta_vec, NULL, NULL, count_hll, new_sets, NULL)) ;
464+
GrB_Vector I_vec = (score_cont->format == GxB_FULL) ?
465+
NULL : score_cont->i;
466+
GRB_TRY (GxB_Vector_assign_Vector (
467+
*reachable_nodes, NULL, NULL, delta_vec, I_vec, NULL)) ;
468+
}
469+
462470
GRB_TRY (GxB_load_Vector_from_Container (*scores, score_cont, NULL)) ;
463471

464472
//--------------------------------------------------------------------------
@@ -470,6 +478,7 @@ int LAGr_HarmonicCentrality(
470478
LG_FREE_WORK;
471479
return GrB_SUCCESS;
472480
}
481+
473482
#else
474483
int LAGr_HarmonicCentrality(
475484
// outputs:
@@ -493,6 +502,7 @@ int LAGr_HarmonicCentrality(
493502
GrB_free(&it); \
494503
GrB_free(&score); \
495504
GrB_free(&desc); \
505+
GrB_free(&node_compact); \
496506
LAGraph_Delete(&G_compact, NULL);
497507

498508
#undef LG_FREE_ALL
@@ -519,6 +529,7 @@ int LAGr_HarmonicCentrality_exact(
519529

520530
GrB_Vector level = NULL;
521531
GrB_Vector flat_weight = NULL;
532+
GrB_Vector node_compact = NULL;
522533
GxB_Iterator it = NULL;
523534
GrB_Scalar score = NULL;
524535

@@ -564,7 +575,6 @@ int LAGr_HarmonicCentrality_exact(
564575
GRB_TRY(GrB_Matrix_new(&_A, GrB_BOOL, nvals, nvals)) ;
565576
GRB_TRY(GxB_Matrix_extract_Vector(
566577
_A, NULL, NULL, G->A, node_weights, node_weights, desc)) ;
567-
GRB_TRY(GrB_free(&desc)) ;
568578
LG_TRY (LAGraph_New (&G_compact, &_A, LAGraph_ADJACENCY_DIRECTED, msg)) ;
569579

570580
GRB_TRY(GrB_Vector_new(&flat_weight, GrB_INT64, nvals)) ;
@@ -575,8 +585,13 @@ int LAGr_HarmonicCentrality_exact(
575585
//--------------------------------------------------------------------------
576586

577587
GRB_TRY (GxB_Scalar_new (&score, GrB_FP64)) ;
578-
GRB_TRY(GxB_Iterator_new(&it)) ;
579-
GRB_TRY(GxB_Vector_Iterator_attach(it, nodes, NULL)) ;
588+
GRB_TRY (GxB_Iterator_new(&it)) ;
589+
GRB_TRY (GrB_Vector_new (&node_compact, GrB_INT64, nvals));
590+
GRB_TRY (GxB_Vector_extract_Vector(
591+
node_compact, NULL, NULL, nodes, node_weights, desc)) ;
592+
GRB_TRY(GrB_free(&desc)) ;
593+
594+
GRB_TRY(GxB_Vector_Iterator_attach(it, node_compact, NULL)) ;
580595
GrB_Info info = GxB_Vector_Iterator_seek(it, 0);
581596
while (info != GxB_EXHAUSTED) {
582597
GrB_Index nodeIdx = GxB_Vector_Iterator_getIndex(it);

experimental/test/test_HarmonicCentrality.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
// NOTE: these tests require SuiteSparse:GraphBLAS
2020

21+
#include "LG_internal.h"
2122
#include <stdio.h>
2223
#include <acutest.h>
2324

@@ -30,6 +31,7 @@ LAGraph_Graph G = NULL ;
3031
GrB_Vector scores_approx = NULL ;
3132
GrB_Vector scores_exact = NULL ;
3233
GrB_Vector node_weights = NULL ;
34+
GrB_Vector reachable_approx = NULL ;
3335
#define LEN 512
3436
char filename [LEN+1] ;
3537

@@ -189,6 +191,83 @@ void test_HarmonicCentrality_empty (void)
189191
#endif
190192

191193

194+
//------------------------------------------------------------------------------
195+
// test_HarmonicCentrality_reachable: compare reachable_nodes to BFS nvals
196+
//------------------------------------------------------------------------------
197+
198+
#if LG_SUITESPARSE_GRAPHBLAS_V10
199+
void test_HarmonicCentrality_reachable (void)
200+
{
201+
LAGraph_Init (msg) ;
202+
203+
for (int k = 0 ; ; k++)
204+
{
205+
const char *aname = files [k] ;
206+
if (strlen (aname) == 0) break ;
207+
printf ("\n================================== %s:\n", aname) ;
208+
TEST_CASE (aname) ;
209+
snprintf (filename, LEN, LG_DATA_DIR "%s", aname) ;
210+
FILE *f = fopen (filename, "r") ;
211+
TEST_CHECK (f != NULL) ;
212+
OK (LAGraph_MMRead (&A, f, msg)) ;
213+
OK (fclose (f)) ;
214+
215+
GrB_Index n ;
216+
OK (GrB_Matrix_nrows (&n, A)) ;
217+
218+
OK (LAGraph_New (&G, &A, LAGraph_ADJACENCY_DIRECTED, msg)) ;
219+
TEST_CHECK (A == NULL) ;
220+
221+
OK (GrB_Vector_new (&node_weights, GrB_BOOL, n)) ;
222+
OK (GrB_Vector_assign_BOOL (
223+
node_weights, NULL, NULL, true, GrB_ALL, n, NULL)) ;
224+
225+
// compute approximate harmonic centrality with reachable_nodes output
226+
GrB_Info info = LAGr_HarmonicCentrality (
227+
&scores_approx, &reachable_approx, G, node_weights, msg);
228+
OK (info) ;
229+
230+
// for each node, run BFS and compare nvals(level) to reachable_approx
231+
double max_err = 0 ;
232+
for (GrB_Index i = 0 ; i < n ; i++)
233+
{
234+
double approx_count = 0 ;
235+
GrB_Info info = GrB_Vector_extractElement_FP64 (
236+
&approx_count, reachable_approx, i) ;
237+
if (info != GrB_SUCCESS) continue ;
238+
239+
GrB_Vector level = NULL ;
240+
OK (LAGr_BreadthFirstSearch (&level, NULL, G, i, msg)) ;
241+
242+
GrB_Index bfs_nvals = 0 ;
243+
OK (GrB_Vector_nvals (&bfs_nvals, level)) ;
244+
OK (GrB_free (&level)) ;
245+
246+
// bfs_nvals includes the source node (level 0)
247+
if (bfs_nvals > 0)
248+
{
249+
double rel = fabs (approx_count - (double) bfs_nvals)
250+
/ (double) bfs_nvals ;
251+
if (rel > max_err) max_err = rel ;
252+
}
253+
}
254+
255+
printf ("max relative error (reachable): %.2f%%\n", max_err * 100) ;
256+
TEST_CHECK (max_err < 0.5) ;
257+
TEST_MSG ("Reachable node count error too large: %.2f%%",
258+
max_err * 100) ;
259+
260+
OK (LAGraph_Delete (&G, msg)) ;
261+
OK (GrB_free (&scores_approx)) ;
262+
OK (GrB_free (&reachable_approx)) ;
263+
OK (GrB_free (&node_weights)) ;
264+
}
265+
266+
LAGraph_Finalize (msg) ;
267+
}
268+
#endif
269+
270+
192271
//------------------------------------------------------------------------------
193272
// test_errors: test error handling
194273
//------------------------------------------------------------------------------
@@ -258,6 +337,7 @@ TEST_LIST = {
258337
#if LG_SUITESPARSE_GRAPHBLAS_V10
259338
{"HarmonicCentrality", test_HarmonicCentrality},
260339
{"HarmonicCentrality_empty", test_HarmonicCentrality_empty},
340+
{"HarmonicCentrality_reachable", test_HarmonicCentrality_reachable},
261341
#endif
262342
{"HarmonicCentrality_errors", test_errors},
263343
{NULL, NULL}

0 commit comments

Comments
 (0)