/* Copyright (C) 2010 The Trustees of Indiana University. */ /* */ /* Use, modification and distribution is subject to the Boost Software */ /* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */ /* http://www.boost.org/LICENSE_1_0.txt) */ /* */ /* Authors: Jeremiah Willcock */ /* Andrew Lumsdaine */ #include "common.h" #include #include #include #include #include #include #include #include /* STINGER-like group of edges, forming a linked list of pages. */ typedef struct edge_page { int64_t targets[16]; /* Unused elements filled with -1 */ struct edge_page* next; /* NULL if no more */ } edge_page; static inline edge_page* new_edge_page(void) { edge_page* ep = (edge_page*)xmalloc(sizeof(edge_page)); int i; ep->next = NULL; for (i = 0; i < 16; ++i) ep->targets[i] = -1; return ep; } static inline void delete_edge_page(edge_page* ep) { if (!ep) return; delete_edge_page(ep->next); free(ep); } typedef struct adjacency_list { size_t nvertices; /* User-visible number of vertices */ size_t nvertices_allocated; /* Actual allocated size of data */ edge_page** data; /* Indexed by vertex */ } adjacency_list; static void grow_adj_list(adjacency_list* al, size_t min_nvertices) { if (min_nvertices <= al->nvertices) return; while (min_nvertices > al->nvertices_allocated) { al->nvertices_allocated = (al->nvertices_allocated == 0) ? 16 : (al->nvertices_allocated * 2); al->data = (edge_page**)xrealloc(al->data, al->nvertices_allocated * sizeof(edge_page*)); } size_t i; for (i = al->nvertices; i < min_nvertices; ++i) { al->data[i] = NULL; } al->nvertices = min_nvertices; } static void add_adj_list_edge(adjacency_list* al, size_t src, int64_t tgt) { grow_adj_list(al, src + 1); edge_page** p = al->data + src; /* Each page is filled before we allocate another one, so we only need to * check the last one in the chain. */ while (*p && (*p)->next) {p = &((*p)->next);} if (*p) { assert (!(*p)->next); int i; for (i = 0; i < 16; ++i) { if ((*p)->targets[i] == -1) { (*p)->targets[i] = tgt; return; } } p = &((*p)->next); assert (!*p); } assert (!*p); *p = new_edge_page(); (*p)->targets[0] = tgt; } static void clear_adj_list(adjacency_list* al) { size_t i; for (i = 0; i < al->nvertices; ++i) delete_edge_page(al->data[i]); free(al->data); al->data = NULL; al->nvertices = al->nvertices_allocated = 0; } void convert_graph_to_csr(const int64_t nedges, const int64_t* const edges, csr_graph* const g) { adjacency_list adj_list = {0, 0, NULL}; /* Adjacency list being built up with * received data */ { /* Redistribute each input undirected edge (a, b) to create two directed * copies: (a -> b) on VERTEX_OWNER(a) and (b -> a) on VERTEX_OWNER(b) * [except for self-loops, of which only one copy is kept]. */ const size_t edge_buffer_size = (1 << 27) / (2 * sizeof(int64_t)) / size; /* 128 MiB */ /* Note that these buffers are edge pairs (src, tgt), where both elements * are global vertex indexes. */ int64_t* recvbuf = (int64_t*)xmalloc(edge_buffer_size * 2 * sizeof(int64_t)); MPI_Request recvreq; int recvreq_active = 0; int64_t* coalescing_buf = (int64_t*)xmalloc(size * edge_buffer_size * 2 * sizeof(int64_t)); size_t* coalescing_counts = (size_t*)xcalloc(size, sizeof(size_t)); /* Uses zero-init */ MPI_Request* sendreqs = (MPI_Request*)xmalloc(size * sizeof(MPI_Request)); int* sendreqs_active = (int*)xcalloc(size, sizeof(int)); /* Uses zero-init */ int num_sendreqs_active = 0; int num_senders_done = 1; /* Number of ranks that have said that they will * not send to me again; I will never send to * myself at all (see test in SEND). */ /* The heavy use of macros here is to create the equivalent of nested * functions with proper access to variables from the enclosing scope. */ #define PROCESS_EDGE(src, tgt) \ /* Do the handling for one received edge. */ \ do { \ assert (VERTEX_OWNER((src)) == rank); \ add_adj_list_edge(&adj_list, VERTEX_LOCAL((src)), (tgt)); \ } while (0) #define START_IRECV \ /* Start/restart the receive operation to wait for blocks of edges, if * needed. */ \ do { \ if (num_senders_done < size) { \ MPI_Irecv(recvbuf, edge_buffer_size * 2, INT64_T_MPI_TYPE, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &recvreq); \ recvreq_active = 1; \ } \ } while (0) #define PROCESS_REQS \ /* Handle all outstanding MPI requests and progress the MPI implementation. * */ \ do { \ int flag; \ /* Test receive request. */ \ while (recvreq_active) { \ MPI_Status st; \ MPI_Test(&recvreq, &flag, &st); \ if (!flag) break; \ /* A message arrived. */ \ recvreq_active = 0; \ int count; \ MPI_Get_count(&st, INT64_T_MPI_TYPE, &count); \ count /= 2; \ if (count == 0 /* This count is used as a flag when each sender is done */ ) { \ ++num_senders_done; \ } else { \ /* Process the edges in the received message. */ \ int c; \ for (c = 0; c < count; ++c) { \ PROCESS_EDGE(recvbuf[c * 2], recvbuf[c * 2 + 1]); \ } \ } \ START_IRECV; \ } \ /* Test send requests to determine when their buffers are available for * reuse. */ \ int c; \ for (c = 0; c < size; ++c) { \ if (sendreqs_active[c]) { \ MPI_Test(&sendreqs[c], &flag, MPI_STATUS_IGNORE); \ if (flag) {sendreqs_active[c] = 0; --num_sendreqs_active;} \ } \ } \ } while (0) #define SEND(src, tgt) \ do { \ int dest = VERTEX_OWNER((src)); \ if (dest == rank) { \ /* Process self-sends locally. */ \ PROCESS_EDGE((src), (tgt)); \ } else { \ while (sendreqs_active[dest]) PROCESS_REQS; /* Wait for send buffer to be available */ \ /* Push onto coalescing buffer. */ \ size_t c = coalescing_counts[dest]; \ coalescing_buf[dest * edge_buffer_size * 2 + c * 2] = (src); \ coalescing_buf[dest * edge_buffer_size * 2 + c * 2 + 1] = (tgt); \ ++coalescing_counts[dest]; \ /* Send if the buffer is full. */ \ if (coalescing_counts[dest] == edge_buffer_size) { \ FLUSH_COALESCING_BUFFER(dest); \ } \ } \ } while (0) #define FLUSH_COALESCING_BUFFER(dest) \ do { \ while (sendreqs_active[(dest)]) PROCESS_REQS; /* Wait for previous sends to finish */ \ /* Ssend plus only having one request to a given destination active at a time should act as flow control. */ \ MPI_Issend(coalescing_buf + (dest) * edge_buffer_size * 2, coalescing_counts[(dest)] * 2, INT64_T_MPI_TYPE, (dest), 0, MPI_COMM_WORLD, &sendreqs[(dest)]); \ sendreqs_active[(dest)] = 1; \ ++num_sendreqs_active; \ /* Clear the buffer for the next user. */ \ coalescing_counts[(dest)] = 0; \ } while (0) START_IRECV; size_t i; for (i = 0; i < (size_t)nedges; ++i) { if ((i % (1 << 16)) == 0) PROCESS_REQS; if (edges[i * 2 + 0] == -1 || edges[i * 2 + 1] == -1) { continue; } SEND(edges[i * 2 + 0], edges[i * 2 + 1]); if (edges[i * 2 + 0] != edges[i * 2 + 1]) { /* Only send reverse for non-self-loops. */ SEND(edges[i * 2 + 1], edges[i * 2 + 0]); } } int offset; for (offset = 1; offset < size; ++offset) { int dest = MOD_SIZE(rank + offset); if (coalescing_counts[dest] != 0) { /* Send actual data, if any. */ FLUSH_COALESCING_BUFFER(dest); } /* Send empty message to indicate that we won't send anything else to * this rank (takes advantage of MPI non-overtaking rules). */ FLUSH_COALESCING_BUFFER(dest); } while (num_senders_done < size || num_sendreqs_active > 0) PROCESS_REQS; free(recvbuf); free(coalescing_buf); free(coalescing_counts); free(sendreqs); free(sendreqs_active); #undef PROCESS_REQS #undef PROCESS_EDGE #undef FLUSH_COALESCING_BUFFER #undef SEND #undef START_IRECV } /* Compute global number of vertices and count the degrees of the local * vertices. */ int64_t nverts_known = 0; /* We only count vertices touched by at least one * edge, and because of edge doubling each vertex * incident to an edge must be the target of some * copy of that edge. */ size_t nlocalverts_orig = adj_list.nvertices; size_t* degrees = (size_t*)xcalloc(nlocalverts_orig, sizeof(size_t)); /* Uses zero-init */ size_t i, j; for (i = 0; i < nlocalverts_orig; ++i) { size_t deg = 0; edge_page* p; for (p = adj_list.data[i]; p; p = p->next) { for (j = 0; j < 16; ++j) { if (p->targets[j] != -1) { ++deg; if (p->targets[j] >= nverts_known) nverts_known = p->targets[j] + 1; } } } degrees[i] = deg; } int64_t nglobalverts = 0; MPI_Allreduce(&nverts_known, &nglobalverts, 1, INT64_T_MPI_TYPE, MPI_MAX, MPI_COMM_WORLD); g->nglobalverts = nglobalverts; /* Compute the final number of local vertices based on the global maximum * vertex number. */ size_t nlocalverts = VERTEX_LOCAL(nglobalverts + size - 1 - rank); g->nlocalverts = nlocalverts; grow_adj_list(&adj_list, nlocalverts); /* Build CSR data structure. */ size_t *rowstarts = (size_t*)xmalloc((nlocalverts + 1) * sizeof(size_t)); g->rowstarts = rowstarts; /* Compute offset to start of each row. */ rowstarts[0] = 0; for (i = 0; i < nlocalverts; ++i) { rowstarts[i + 1] = rowstarts[i] + (i >= nlocalverts_orig ? 0 : degrees[i]); } size_t nlocaledges = rowstarts[nlocalverts]; g->nlocaledges = nlocaledges; int64_t* column = (int64_t*)xmalloc(nlocaledges * sizeof(int64_t)); g->column = column; /* Append outgoing edges for each vertex to the column array, in order. */ for (i = 0; i < nlocalverts; ++i) { edge_page* p; int offset = 0; for (p = adj_list.data[i]; p; p = p->next, offset += 16) { size_t deg = (i >= nlocalverts_orig ? 0 : degrees[i]); size_t nelts = (deg - offset > 16) ? 16 : deg - offset; memcpy(column + rowstarts[i] + offset, p->targets, nelts * sizeof(int64_t)); } } free(degrees); degrees = NULL; clear_adj_list(&adj_list); }