-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalgorithms.tex
More file actions
286 lines (235 loc) · 17.6 KB
/
algorithms.tex
File metadata and controls
286 lines (235 loc) · 17.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
\begin{figure*}[!t]
\centering
\includegraphics[scale=.45]{figures/example}
\caption{An illustrative example of the the Awerbuch-Shiloach algorithm. Edges in the graph are shown in solid black edges. The parent of a vertex is shown with a dashed arrowhead directed from a child to its parent. (a) An undirected and unweighted graph with seven vertices and five edges. (b) Initially, every vertex forms a singleton tree. (c) After conditional hooking of stars. Here we only show edges connected vertices in different trees. (d) Identification of vertices in stars (see Figure~\ref{fig:starcheck}). (e) After unconditional hooking of stars rooted at vertex 1. (f) After shortcutting in the only remaining tree. }
\label{fig:example}
\end{figure*}
\begin{figure*}[!t]
\centering
\includegraphics[scale=.45]{figures/starcheck}
\caption{Finding all vertices belonging stars. A Boolean array $star$ is maintained where $star[v]$ is $true$ if $v$ belongs to a star and $star[v]$ is $false$ otherwise. In this figure, a vertex belonging to a star is shown with an unfilled circle and a vertex not belonging to a star is shown with a filled circle. The parent of a vertex is shown with a dashed arrowhead directed from a child to its parent. (a) Initially, every vertex is assumed to be part a star. (b) In a nonstar tree, we mark all vertices at level not equal to 1. (c) In a nonstar tree, we mark all vertices at level equal to 1.}
\label{fig:starcheck}
\end{figure*}
\begin{algorithm}[!t]
\begin{algorithmic}[1]
\begin{small}
\Procedure{Awerbuch-Shiloach}{$G(V,E)$}
\LineComment{\textcolor{blue}{Initialization }}
\For {every vertex $v$ in $V$}
\State $f[v] \gets u$
\EndFor
\Repeat
\LineComment{\textcolor{blue}{Step1: Conditional star hooking }}
\For {every edge \{$u,v$\} in $E$} {\bf in parallel }
\If {$u$ belongs to a star and $f[u] > f[v]$}
\State $f[f[u]] \gets f[v]$
\EndIf
\EndFor
\LineComment{\textcolor{blue}{Step2: Unconditional star hooking }}
\For {every edge \{$u,v$\} in $E$} {\bf in parallel }
\If {$u$ belongs to a star and $f[u] \neq f[v]$}
\State $f[f[u]] \gets f[v]$
\EndIf
\EndFor
\LineComment{\textcolor{blue}{ Step3: Shortcutting}}
\For {every vertex $v$ in $V$} {\bf in parallel }
\State \id{gf}$[v] \gets f[f[v]]$
\EndFor
\For {every vertex $v$ in $V$} {\bf in parallel }
\If {$v$ does not belongs to a star}
\State $f[v] \gets \id{gf}[v]]$
\EndIf
\EndFor
\Until{$f$ reamins unchanged}
\State \Return $f$
\EndProcedure
\end{small}
\end{algorithmic}
\caption{The skeleton of the Awerbuch-Shiloach algorithm. {\bf Inputs:} an undirected graph $G(V, E)$.
{\bf Output:} An array $f$ storing the final parent of every vertex.}
\label{algo:Awerbuch-Shiloach}
\end{algorithm}
\begin{algorithm}[!t]
\begin{algorithmic}[1]
\begin{small}
\Procedure{Starcheck}{$G(V,E), f$}
\LineComment{\textcolor{blue}{Initialization }}
\For {every vertex $v$ in $V$} {\bf in parallel }
\State $star[v] \gets true$
\State \id{gf}$[v] \gets f[f[v]]$
\EndFor
\LineComment{\textcolor{blue}{Exclude vertices at depth greater than 1 and roots of nonstars }}
\For {every vertex $v$ in $V$} {\bf in parallel }
\If {$f[v] \neq \id{gf}[v]$}
\State $star[v] \gets false$
\State $star[\id{gf}[v]] \gets false$
\EndIf
\EndFor
\LineComment{\textcolor{blue}{Exclude vertices at depth =1 in nonstar trees}}
\For {every vertex $v$ in $V$} {\bf in parallel }
\State $star[v] \gets star[f[v]]$
\EndFor
\State \Return $star$
\EndProcedure
\end{small}
\end{algorithmic}
\caption{Finding vertices belonging to stars. {\bf Inputs:} an undirected graph $G(V, E)$ and an array $f$ storing the parent of every vertex.
{\bf Output:} A Boolean array $star$ where $star[v]$ is $true$ if $v$ belongs to a star.}
\label{algo:Starcheck}
\end{algorithm}
\section{The Awerbuch-Shiloach algorithm}
The Awerbuch-Shiloach algorithm maintains a forest data structure where each tree represents a connected component at the current stage of the algorithm.
A tree is called a \emph{star} if every vertex is a child of the root (i.e., the height of three is at most one).
The Awerbuch-Shiloach algorithm begins with singleton trees (stars) corresponding to each vertex in the graph.
In every iteration, the algorithm merges stars with other trees (both stars and nonstars) until no such merging is possible.
This merging is called ``hooking" in the Awerbuch-Shiloach algorithm and is performed in two steps: (a) conditional hooking and (b) unconditional hooking. These steps are described below.
Between two subsequent iterations, the algorithm reduces the hight of trees by pointer jumping, a process known as shortcutting.
Note that the Awerbuch-Shiloach algorithm is a simplification of the Shiloach-Vishkin algorithm~\cite{shiloach1980log}. While Shiloach-Vishkin allows hooking a tree onto another tree, Awerbuch-Shiloach only allows hooking a star onto another tree. Hooking stars instead of trees simplifies the hooking process and eliminate the need of some auxiliary data structures needed by the Shiloach-Vishkin algorithm.
In the Awerbuch-Shiloach algorithm, vertex $u$ maintains a parent $f[u]$ which is another vertex or itself.
This parent-child relationship forms a directed forest where the root $u$ of a tree satisfies $f[u]=u$.
Algorithm \ref{algo:Awerbuch-Shiloach} describes the overview of the Awerbuch-Shiloach algorithm.
Initially, every vertex forms a separate star.
In each iteration, the algorithm performs three operations: (a) conditional hooking, (b) unconditional hooking and (c) shortcutting.
Each of these three steps can be done in parallel as shown in Algorithm~\ref{algo:Awerbuch-Shiloach}.
Figure~\ref{fig:example} shows the execution of different steps of the Awerbuch-Shiloach algorithm.
Algorithm~\ref{algo:Awerbuch-Shiloach} and Figure~\ref{fig:example} assumed that the set of vertices belonging to stars is available.
To track the star memberships, the algorithm maintains a Boolean array $star$ where $star[v]$ is $true$ if $v$ belongs to a star.
However, it is not straightforward to identify those vertices (in stars) given the parent-child relationship in the array $f$.
Figure~\ref{fig:starcheck} shows a running example of finding vertices in stars in a trees obtained from Figure~\ref{fig:example}(d).
As shown in Figure~\ref{fig:starcheck}, three steps are needed to populate the $star$ array.
Algorithm~\ref{algo:Starcheck} describes these steps.
The Awerbuch-Shiloach algorithm terminates when every tree becomes a star and the parent array $f$ is not updated in the latest iteration.
Awerbuch and Shiloach showed that the algorithm terminates in $O(\log n)$ iterations. Hence, using $m+n$ processors, the algorithm has $O(\log n)$ complexity in a PRAM model.
Here $n$ and $m$ denote the number of vertices and edges, respectively.
\section{The Awerbuch-Shiloach algorithm using GraphBLAS primitives}
\label{sec:LACC}
At first, we design the Awerbuch-Shiloach algorithm in the language of sparse linear algebra.
For this purpose, we relied upon several GraphBLAS primitives as defined in its C API~\cite{bulucc2017design}.
The implementation details of these primitives (using the Combinatorial BLAS library) will be discussed in the next section.
We use three GraphBLAS operations to design the Awerbuch-Shiloach algorithm: (a) GrB\_mxv to perform sparse matrix-vector multiplication, (b) GrB\_extract for extracting some elements of a vector and (c) GrB\_assign for assigning elements of a vector to a destination vector.
The conditional and unconditional hooking operations in the Awerbuch-Shiloach algorithm traverse the neighbors of a subset of vertices in the graph. This operation can be captured by sparse matrix-vector (SpMV) product on an appropriate semiring.
We refer to a semiring by listing its scaler operations, such as the (multiply, add) semiring.
In LACC, we use Select2nd as semiring multiply, which returns the second value it is passed.
We use min as semiring add, which returns the minimum of the two operands it is passed.
Hence, we use (Select2nd, min) semiring in our SpMV algorithms.
Algorithm~\ref{algo:Sel2ndMin} describes how this semiring can be created using GraphBLAS C API.
\begin{algorithm}[htbp]
\begin{algorithmic}[1]
\begin{small}
\Procedure{Semiring}{$\lvert V \rvert$}
\State GrB\_Monoid Min
\State GrB\_Monoid\_new(\&Min, GrB\_MIN, $\lvert V \rvert$)
\State GrB\_Semiring Sel2ndMin
\State GrB\_Semiring\_new(\&Sel2ndMin, Min, GrB\_SECOND)
\State \Return Sel2ndMin
\EndProcedure
\end{small}
\end{algorithmic}
\caption{Create a semiring with (multiply, add) operations replaced by (Select2nd, min)}
\label{algo:Sel2ndMin}
\end{algorithm}
\subsection{Conditional hooking}
Algorithm~\ref{algo:condHook} describes how we designed conditional hooking using three GraphBLAS operations.
GrB\_mxv in Algorithm~\ref{algo:condHook} multiplies the adjacency matrix $\mA$ of the graph by the parent vector $f$ on a (Select2nd, min) semiring. Since we only care about output vertices belonging to stars, we masked the output in GrB\_mxv by the Boolean vector \id{star}.
The output of GrB\_mxv is stored in $f_n$. $f_n[v]$ stores the minimum parent of a vertex adjacent to $v$ such that $v$ belongs to a star.
Next, the GrB\_extract function extracts the parents of vertices in stars. Here, we again use \id{star} as an output mask.
Finally, the GrB\_assign function updates the parents of the roots of stars when possible.
\begin{algorithm}[htbp]
\begin{algorithmic}[1]
\begin{small}
\Procedure{CondHook}{$\mA$, $f$, \id{star}}
\State Sel2ndMin $\gets$ \Call{Semiring}{$\lvert V \rvert$}
\LineComment{\textcolor{blue}{Step1: For every vertex in a star, find a neighbor with the minimum parent id. SpMV($\mA$, $f$) with the Sel2ndMin semiring and the output masked by \id{star} does the job. $f_n$ is the output vector. }}
\State GrB\_mxv ($f_n$, \id{star}, GrB\_NULL, Sel2ndMin, $\mA$, $f$, GrB\_NULL) \li
\LineComment{\textcolor{blue}{Step2: Extract parents of vertices in stars ($f_{star} = f[star]$). We use GrB\_extract to extract from the parent array $f$ using \id{star} as a mask.}}
\State GrB\_extract ($f_{star}$, \id{star},GrB\_NULL, $f$, GrB\_ALL, 0, GrB\_NULL)
\LineComment{\textcolor{blue}{Step3: Hook root of a star to a neighboring tree ($f[f_{star}] = f_n$}).}
\State GrB\_assign ($f$, GrB\_NULL, GrB\_NULL, $f_n$, $f_{star}$, size($f_{star}$), GrB\_NULL)
%$F_n \gets$ \Call{SpMV}{$\mA, F $, SR=(select2nd, min)} \tcp*{minimum father among all neighbors}
%F[V_star] = F_n[V_star]
%GrB_extract(, star, GrB_NULL, GrB_ALL)
\EndProcedure
\end{small}
\end{algorithmic}
\caption{Conditional hooking of stars onto other trees (both stars and non stars). {\bf Inputs:} a sparse adjacency matrix $\mA$, a dense vector $f$ storing the parents of all vertices, a dense Boolean vector $stars$ storing whether the $i$th vertex belongs to a star.
{\bf Output:} Updated parent array $f$.}
\label{algo:condHook}
\end{algorithm}
\subsection{Unconditional hooking}
Algorithm~\ref{algo:uncondHook} describes how we designed unconditional hooking using three GraphBLAS operations.
As shown in Algorithm~\ref{algo:Awerbuch-Shiloach}, unconditional hooking is very similar to conditional hooking. The only difference between these two hooking operations is that unconditional hooking allows a star to hook with any other tree without any restriction on the order of vertices.
However, unlike conditional hooking where a star can hook to another star or nonstar, conditional hooking enables hooking a star onto a nonstar only.
This allows us to use a sparse vector $f_{nonstar}$ as an input to GrB\_mxv, where $f_{nonstar}$ is the subset of vertices belonging to nonstar trees.
Hence, GrB\_mxv in Algorithm~\ref{algo:uncondHook} multiplies the adjacency matrix $\mA$ of the graph by the sparse vector $f_{nonstar}$ on a (Select2nd, min) semiring.
In fact, (Select2nd, min) semiring is not strictly required here since the order of parents does not matter in unconditional hooking.
As before, we masked the output in GrB\_mxv by \id{star}.
The output of GrB\_mxv is stored in $f_n$.
$f_n[v]$ stores the minimum parent of a vertex $w$ adjacent to $v$ such that $v$ belongs to a star but $v$ does not belong to a star.
Next, the GrB\_extract function extracts the parents of vertices in stars.
Here, we again use \id{star} as an output mask.
Finally, the GrB\_assign function updates the parents of the roots of stars when possible.
\begin{algorithm}[htbp]
\begin{algorithmic}[1]
\begin{small}
\Procedure{UncondHook}{$\mA$, $f$, \id{star}}
\State Sel2ndMin $\gets$ \Call{Semiring}{$\lvert V \rvert$}
\LineComment{\textcolor{blue}{Step1: Extract parents of vertices not in stars. We use GrB\_extract to extract from the parent array $f$ using the structural complement of \id{star} as a mask.}}
\State GrB\_extract($f_{nonstar}$, \id{star},GrB\_NULL, $f$, GrB\_ALL, 0, GrB\_SCMP)
\LineComment{\textcolor{blue}{Step2: For every vertex in a star, find a neighbor in a nonstar. SpMV($\mA$, $f_{nonstar}$) with the Sel2ndMin semiring and the output masked by \id{star} does the job. $f_n$ is the output vector. Min is used just to break ties. Selecting any of the two operands is sufficient here. }}
\State GrB\_mxv ($f_n$, \id{star}, GrB\_NULL, Sel2ndMin, $\mA$, $f_{nonstar}$, GrB\_NULL)
\LineComment{\textcolor{blue}{Step2: Extract parents of vertices in stars ($f_{star} = f[star]$). We use GrB\_extract to extract from the parent array $f$ using \id{star} as a mask.}}
\State GrB\_extract ($f_{star}$, \id{star},GrB\_NULL, $f$, GrB\_ALL, 0, GrB\_NULL)
\LineComment{\textcolor{blue}{Step3: Hook root of a star to a neighboring tree ($f[f_{star}] = f_n$}).}
\State GrB\_assign ($f$, GrB\_NULL, GrB\_NULL, $f_n$, $f_{star}$, size($f_{star}$), GrB\_NULL)
%$F_n \gets$ \Call{SpMV}{$\mA, F $, SR=(select2nd, min)} \tcp*{minimum father among all neighbors}
%F[V_star] = F_n[V_star]
%GrB_extract(, star, GrB_NULL, GrB_ALL)
\EndProcedure
\end{small}
\end{algorithmic}
\caption{Unconditional hooking of stars onto other trees (only non stars). {\bf Inputs:} a sparse adjacency matrix $\mA$, a dense vector $f$ storing the parents of all vertices, a dense Boolean vector $stars$ storing whether the $i$th vertex belongs to a star.
{\bf Output:} Updated parent array $f$.}
\label{algo:uncondHook}
\end{algorithm}
\begin{algorithm}[htbp]
\begin{algorithmic}[1]
\begin{small}
\Procedure{Shortcut}{$f$}
\LineComment{\textcolor{blue}{Step1: find grandfathers ($\id{gf} \gets f[f] $) }}
\State GrB\_extract (\id{gf}, GrB\_NULL, GrB\_NULL, $f$, $f$, 0, GrB\_NULL)
\LineComment{\textcolor{blue}{Step2: $f \gets \id{gf}$}).}
\State GrB\_assign ($f$, GrB\_NULL, GrB\_NULL, GrB\_ALL, \id{gf}, size($f$), GrB\_NULL)
\EndProcedure
\end{small}
\end{algorithmic}
\caption{Shortcutting using GrB\_extract and GrB\_assign. {\bf Inputs:} a dense vector $f$ storing the parents of all vertices.
{\bf Output:} Updated parent array $f$.}
\label{algo:shortcut}
\end{algorithm}
\subsection{Shortcutting}
Algorithm~\ref{algo:shortcut} describes how we designed shortcutting using two GraphBLAS operations.
At first, we use GrB\_extract to obtain the grand parent \id{gf} of every vertex. Next, we assign \id{gf} to the parent array by using the GrB\_assign operation.
\section{Implementing the Awerbuch-Shiloach algorithm in CombBLAS}
We use the CombBLAS framework~\cite{bulucc2011combinatorial} to implement the GraphBLAS primitives needed to implement the Awerbuch-Shiloach algorithm.
Since CombBLAS does not directly support the masking operations, we use element-wise filtering after performing an operation when masking is needed.
CombBLAS distributes its sparse matrices on a 2D $p_r \times p_c$ processor grid.
Processor $P(i,j)$ stores the submatrix $\mA_{ij}$ of dimensions $(m/p_r)\times (n/p_c)$ in its local memory.
The CombBLAS uses the doubly compressed sparse columns (DCSC) format to store its local submatrices for scalability,
and uses a vector of \{index, value\} pairs for storing sparse vectors.
To balance load across processors, we randomly permute the input matrix $\mA$ before running the matching algorithms.
Vectors are also distributed on the same 2D processor grid.
For a distributed vector $v$, the syntax $v_{ij}$ denotes the local $n/p$ length piece of the vector owned by the $P(i, j)$th processor.
The syntax $v_i$ denotes the hypothetical $n/p_r$ or $n/p_c$ length piece of the vector collectively owned by all the processors along the $i$th processor row $P(i, :)$ or column $P(:, i)$.
%\subsection{Analysis of the distributed algorithm}
%We measure communication by the number of {\em words} moved ($W$) and
%the number of {\em messages} sent ($S$). The cost of communicating a length $m$ message is $\alpha + \beta m$ where $\alpha$ is the
%latency and $\beta$ is the inverse bandwidth, both defined relative to the cost of a single arithmetic operation. Hence, an algorithm that
%performs $F$ arithmetic operations, sends $S$ messages, and moves $W$ words takes $T= F + \alpha S + \beta W$ time.
%
%We previously analyzed~\cite{azad2016distributed} the complexity of SpMV because it is also a building block of parallel maximal matching algorithms.
%For ease of analysis, we assume that nonzeros are i.i.d. distributed in matrices and vectors. %, $x$ is $\id{f}$ percent full, and $\mA$ has on average $d$ nonzeros per column.
%We also assume a square processor grid $p_c{=}p_r{=}\sqrt{p}$. Number of iterations is denoted by $\left\vert{\mathrm{iters}} \right\vert$.
%We leverage the 2D SpMV algorithms implemented in CombBLAS.
%The complexity of the parallel SpMSpV algorithm has been analyzed in this context recently~\cite{matchingipdps16}, hence we just state
%the result here:
% $$ T_{ \Call{SpMV}{}} = O \Bigl( \frac{m}{p} + \beta \bigl ( \frac{m}{p} + \frac{n}{\sqrt{p}} \bigr ) + \left\vert{\mathrm{iters}} \right\vert \alpha \sqrt{p} \Bigr) .$$