From 879e42f6b90f68b8943f384f25ea97424b92010b Mon Sep 17 00:00:00 2001 From: Kailash Joshi Date: Sun, 27 Jul 2014 14:25:05 -0600 Subject: [PATCH 1/4] SSSP that collects path and distance --- apps/sssp_getPath/CMakeLists.txt | 2 + apps/sssp_getPath/sssp_getpath.cpp | 352 +++++++++++++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 apps/sssp_getPath/CMakeLists.txt create mode 100644 apps/sssp_getPath/sssp_getpath.cpp diff --git a/apps/sssp_getPath/CMakeLists.txt b/apps/sssp_getPath/CMakeLists.txt new file mode 100644 index 0000000000..83619b5dd8 --- /dev/null +++ b/apps/sssp_getPath/CMakeLists.txt @@ -0,0 +1,2 @@ +project(sssp_getpath) +add_graphlab_executable(sssp_getpath sssp_getpath.cpp) diff --git a/apps/sssp_getPath/sssp_getpath.cpp b/apps/sssp_getPath/sssp_getpath.cpp new file mode 100644 index 0000000000..162ff1f0ba --- /dev/null +++ b/apps/sssp_getPath/sssp_getpath.cpp @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2009 Carnegie Mellon University. + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * For more about this software visit: + * + * http://www.graphlab.ml.cmu.edu + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/** +* \brief The type used to measure distances in the graph. +*/ +typedef float distance_type; + +struct vertex_data { + distance_type dist; + distance_type parent_node; + std::vector paths; + + vertex_data(distance_type dist = std::numeric_limits::max()) : + dist(dist), parent_node(dist) { + } + + void save(graphlab::oarchive& oarc) const { + oarc << paths << dist << parent_node; + } + + void load(graphlab::iarchive& iarc) { + iarc >> paths >> dist >> parent_node; + } +};// end of vertex data + +/** +* \brief The distance associated with the edge. +*/ +struct edge_data { + distance_type dist; + edge_data(distance_type dist = 1) : + dist(dist) { + } + + void save(graphlab::oarchive& oarc) const { + oarc << dist; + } + + void load(graphlab::iarchive& iarc) { + iarc >> dist; + } +}; + + +/** +* \brief The graph type encodes the distances between vertices and +* edges +*/ +typedef graphlab::distributed_graph graph_type; + + +/** +* \brief Get the other vertex in the edge. +*/ +inline graph_type::vertex_type +get_other_vertex(const graph_type::edge_type& edge, + const graph_type::vertex_type& vertex) { + return vertex.id() == edge.source().id()? edge.target() : edge.source(); +} + +/** +* \brief Collect Parent node id. +*/ +inline graph_type::vertex_type +get_path(const graph_type::edge_type& edge, + const graph_type::vertex_type& vertex) { + return vertex.id() == edge.source().id()? edge.source() : edge.target(); +} + + +/** +* \brief Use directed or undireced edges. +*/ +bool DIRECTED_SSSP = false; + + +/** +* \brief This class is used as the gather type. +*/ +struct min_distance_type : graphlab::IS_POD_TYPE { + distance_type dist; + distance_type parent_node; + min_distance_type(distance_type dist = + std::numeric_limits::max(), + distance_type parent_node = std::numeric_limits::max()) : + dist(dist),parent_node(parent_node) { } + min_distance_type& operator+=(const min_distance_type& other) { + dist = std::min(dist, other.dist); + return *this; + } +}; + + +/** +* \brief The single source shortest path vertex program. +*/ +class sssp : + public graphlab::ivertex_program, + public graphlab::IS_POD_TYPE { + distance_type min_dist; + distance_type parent_node; + bool changed; +public: + + + void init(icontext_type& context, const vertex_type& vertex, + const min_distance_type& msg) { + min_dist = msg.dist; + parent_node=msg.parent_node; + } + + /** + * \brief We use the messaging model to compute the SSSP update + */ + edge_dir_type gather_edges(icontext_type& context, + const vertex_type& vertex) const { + return graphlab::NO_EDGES; + }; // end of gather_edges + + /** + * \brief If the distance is smaller then update + */ + void apply(icontext_type& context, vertex_type& vertex, + const graphlab::empty& empty) { + changed = false; + if(vertex.data().dist > min_dist) { + changed = true; + vertex.data().dist = min_dist; + vertex.data().paths.push_back(parent_node); + } + } + + /** + * \brief Determine if SSSP should run on all edges or just in edges + */ + edge_dir_type scatter_edges(icontext_type& context, + const vertex_type& vertex) const { + if(changed) + return DIRECTED_SSSP? graphlab::OUT_EDGES : graphlab::ALL_EDGES; + else return graphlab::NO_EDGES; + }; // end of scatter_edges + + /** + * \brief The scatter function just signal adjacent pages + */ + void scatter(icontext_type& context, const vertex_type& vertex, + edge_type& edge) const { + const vertex_type other = get_other_vertex(edge, vertex); + const vertex_type path = get_path(edge, vertex); + distance_type newd = vertex.data().dist + edge.data().dist; + if (other.data().dist > newd) { + const min_distance_type msg(newd,path.id()); + context.signal(other, msg); + } + } // end of scatter + +}; // end of shortest path vertex program + +/** +* \brief We want to save the final graph so we define a write which will be +* used in graph.save("path/prefix", pagerank_writer()) to save the graph. +*/ +struct shortest_path_writer { + std::string save_vertex(const graph_type::vertex_type& vtx) { + std::stringstream strm; + + if (vtx.data().dist == 0) { + strm << vtx.id() << "\t" << vtx.data().dist << "\t" <<"'"<< vtx.id()<<"'" + << std::endl; + } else { + strm << vtx.id() << "\t" << vtx.data().dist << "\t"; + for (size_t i = 0; i < vtx.data().paths.size(); ++i) { + strm << "'" << vtx.data().paths[i] << "'"; + } + strm << std::endl; + } + return strm.str(); + } + std::string save_edge(graph_type::edge_type e) { + return ""; + } +}; + +struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE { + size_t degree; + graphlab::vertex_id_type vid; + max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) { + if (degree < other.degree) { + (*this) = other; + } + return (*this); + } +}; + +max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) { + max_deg_vertex_reducer red; + red.degree = vtx.num_in_edges() + vtx.num_out_edges(); + red.vid = vtx.id(); + return red; +} + +int main(int argc, char** argv) { + // Initialize control plain using mpi + graphlab::mpi_tools::init(argc, argv); + graphlab::distributed_control dc; + global_logger().set_log_level(LOG_INFO); + + // Parse command line options ----------------------------------------------- + graphlab::command_line_options + clopts("Single Source Shortest Path Algorithm."); + std::string graph_dir; + std::string format = "adj"; + std::string exec_type = "synchronous"; + size_t powerlaw = 0; + std::vector sources; + bool max_degree_source = false; + clopts.attach_option("graph", graph_dir, + "The graph file. If none is provided " + "then a toy graph will be created"); + clopts.add_positional("graph"); + clopts.attach_option("format", format, + "graph format"); + clopts.attach_option("source", sources, + "The source vertices"); + clopts.attach_option("max_degree_source", max_degree_source, + "Add the vertex with maximum degree as a source"); + + clopts.add_positional("source"); + + clopts.attach_option("directed", DIRECTED_SSSP, + "Treat edges as directed."); + + clopts.attach_option("engine", exec_type, + "The engine type synchronous or asynchronous"); + + clopts.attach_option("powerlaw", powerlaw, + "Generate a synthetic powerlaw out-degree graph. "); + std::string saveprefix; + clopts.attach_option("saveprefix", saveprefix, + "If set, will save the resultant pagerank to a " + "sequence of files with prefix saveprefix"); + + if(!clopts.parse(argc, argv)) { + dc.cout() << "Error in parsing command line arguments." << std::endl; + return EXIT_FAILURE; + } + + + // Build the graph ---------------------------------------------------------- + graph_type graph(dc, clopts); + if(powerlaw > 0) { // make a synthetic graph + dc.cout() << "Loading synthetic Powerlaw graph." << std::endl; + graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); + } else if (graph_dir.length() > 0) { // Load the graph from a file + dc.cout() << "Loading graph in format: "<< format << std::endl; + graph.load_format(graph_dir, format); + } else { + dc.cout() << "graph or powerlaw option must be specified" << std::endl; + clopts.print_description(); + return EXIT_FAILURE; + } + // must call finalize before querying the graph + graph.finalize(); + dc.cout() << "#vertices: " << graph.num_vertices() << std::endl + << "#edges: " << graph.num_edges() << std::endl; + + + + if(sources.empty()) { + if (max_degree_source == false) { + dc.cout() + << "No source vertex provided. Adding vertex 0 as source" + << std::endl; + sources.push_back(0); + } + } + + if (max_degree_source) { + max_deg_vertex_reducer v = graph.map_reduce_vertices(find_max_deg_vertex); + dc.cout() + << "No source vertex provided. Using highest degree vertex " << v.vid << " as source." + << std::endl; + sources.push_back(v.vid); + } + + + + // Running The Engine ------------------------------------------------------- + graphlab::omni_engine engine(dc, graph, exec_type, clopts); + + + + // Signal all the vertices in the source set + for(size_t i = 0; i < sources.size(); ++i) { + engine.signal(sources[i], min_distance_type(0)); + } + + engine.start(); + const float runtime = engine.elapsed_seconds(); + dc.cout() << "Finished Running engine in " << runtime + << " seconds." << std::endl; + + + // Save the final graph ----------------------------------------------------- + if (saveprefix != "") { + graph.save(saveprefix, shortest_path_writer(), + false, // do not gzip + true, // save vertices + false); // do not save edges + } + + // Tear-down communication layer and quit ----------------------------------- + graphlab::mpi_tools::finalize(); + return EXIT_SUCCESS; +} // End of main +// We render this entire program in the documentation \ No newline at end of file From 31cb8af845bd56dd6784ef327c72db2c5fb70bcd Mon Sep 17 00:00:00 2001 From: Kailash Joshi Date: Sun, 27 Jul 2014 18:14:28 -0600 Subject: [PATCH 2/4] SSSP now collects alternative paths as well --- apps/sssp_getPath/sssp_getpath.cpp | 603 ++++++++++++++--------------- 1 file changed, 297 insertions(+), 306 deletions(-) diff --git a/apps/sssp_getPath/sssp_getpath.cpp b/apps/sssp_getPath/sssp_getpath.cpp index 162ff1f0ba..b8c072c673 100644 --- a/apps/sssp_getPath/sssp_getpath.cpp +++ b/apps/sssp_getPath/sssp_getpath.cpp @@ -1,26 +1,3 @@ -/* - * Copyright (c) 2009 Carnegie Mellon University. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language - * governing permissions and limitations under the License. - * - * For more about this software visit: - * - * http://www.graphlab.ml.cmu.edu - * - */ - - #include #include #include @@ -32,321 +9,335 @@ #include #include #include - - + /** -* \brief The type used to measure distances in the graph. -*/ + * \brief The type used to measure distances in the graph. + */ typedef float distance_type; - + struct vertex_data { - distance_type dist; - distance_type parent_node; - std::vector paths; - - vertex_data(distance_type dist = std::numeric_limits::max()) : - dist(dist), parent_node(dist) { - } - - void save(graphlab::oarchive& oarc) const { - oarc << paths << dist << parent_node; - } - - void load(graphlab::iarchive& iarc) { - iarc >> paths >> dist >> parent_node; - } -};// end of vertex data + distance_type dist; + distance_type parent_node; +std::vector paths; +vertex_data(distance_type dist = std::numeric_limits::max()) : +dist(dist), parent_node(dist) { +} + +void save(graphlab::oarchive& oarc) const { + oarc << paths << dist << parent_node; +} + +void load(graphlab::iarchive& iarc) { + iarc >> paths >> dist >> parent_node; +} +}; +// end of vertex data /** -* \brief The distance associated with the edge. -*/ + * \brief The distance associated with the edge. + */ struct edge_data { - distance_type dist; - edge_data(distance_type dist = 1) : - dist(dist) { - } - - void save(graphlab::oarchive& oarc) const { - oarc << dist; - } - - void load(graphlab::iarchive& iarc) { - iarc >> dist; - } +distance_type dist; +edge_data(distance_type dist = 1) +: +dist(dist) { +} + +void save(graphlab::oarchive& oarc) const { +oarc << dist; +} + +void load(graphlab::iarchive& iarc) { +iarc >> dist; +} }; - - + /** -* \brief The graph type encodes the distances between vertices and -* edges -*/ + * \brief The graph type encodes the distances between vertices and + * edges + */ typedef graphlab::distributed_graph graph_type; - - + /** -* \brief Get the other vertex in the edge. -*/ + * \brief Get the other vertex in the edge. + */ inline graph_type::vertex_type get_other_vertex(const graph_type::edge_type& edge, - const graph_type::vertex_type& vertex) { - return vertex.id() == edge.source().id()? edge.target() : edge.source(); +const graph_type::vertex_type& vertex) { +return vertex.id() == edge.source().id()? edge.target() : edge.source(); } -/** -* \brief Collect Parent node id. -*/ inline graph_type::vertex_type get_path(const graph_type::edge_type& edge, - const graph_type::vertex_type& vertex) { - return vertex.id() == edge.source().id()? edge.source() : edge.target(); +const graph_type::vertex_type& vertex) { +return vertex.id() == edge.source().id()? edge.source() : edge.target(); } - - + /** -* \brief Use directed or undireced edges. -*/ + * \brief Use directed or undireced edges. + */ bool DIRECTED_SSSP = false; - - + /** -* \brief This class is used as the gather type. -*/ + * \brief This class is used as the gather type. + */ struct min_distance_type : graphlab::IS_POD_TYPE { - distance_type dist; - distance_type parent_node; - min_distance_type(distance_type dist = - std::numeric_limits::max(), - distance_type parent_node = std::numeric_limits::max()) : - dist(dist),parent_node(parent_node) { } - min_distance_type& operator+=(const min_distance_type& other) { - dist = std::min(dist, other.dist); - return *this; - } +distance_type dist; +distance_type parent_node; +distance_type alternate_node; +min_distance_type(distance_type dist = + std::numeric_limits::max(), + distance_type parent_node = std::numeric_limits::max()) : +dist(dist),parent_node(parent_node),alternate_node(dist) {} +min_distance_type& operator+=(const min_distance_type& other) { +if(dist==other.dist) { + alternate_node = other.parent_node; +} +dist = std::min(dist, other.dist); + +return *this; +} }; - - + /** -* \brief The single source shortest path vertex program. -*/ + * \brief The single source shortest path vertex program. + */ class sssp : - public graphlab::ivertex_program, - public graphlab::IS_POD_TYPE { - distance_type min_dist; - distance_type parent_node; - bool changed; +public graphlab::ivertex_program, +public graphlab::IS_POD_TYPE { +distance_type min_dist; +distance_type parent_node; +distance_type alternate_node; +bool changed; public: - - - void init(icontext_type& context, const vertex_type& vertex, - const min_distance_type& msg) { - min_dist = msg.dist; - parent_node=msg.parent_node; - } - - /** - * \brief We use the messaging model to compute the SSSP update - */ - edge_dir_type gather_edges(icontext_type& context, - const vertex_type& vertex) const { - return graphlab::NO_EDGES; - }; // end of gather_edges - - /** - * \brief If the distance is smaller then update - */ - void apply(icontext_type& context, vertex_type& vertex, - const graphlab::empty& empty) { - changed = false; - if(vertex.data().dist > min_dist) { - changed = true; - vertex.data().dist = min_dist; - vertex.data().paths.push_back(parent_node); - } - } - - /** - * \brief Determine if SSSP should run on all edges or just in edges - */ - edge_dir_type scatter_edges(icontext_type& context, - const vertex_type& vertex) const { - if(changed) - return DIRECTED_SSSP? graphlab::OUT_EDGES : graphlab::ALL_EDGES; - else return graphlab::NO_EDGES; - }; // end of scatter_edges - - /** - * \brief The scatter function just signal adjacent pages - */ - void scatter(icontext_type& context, const vertex_type& vertex, - edge_type& edge) const { - const vertex_type other = get_other_vertex(edge, vertex); - const vertex_type path = get_path(edge, vertex); - distance_type newd = vertex.data().dist + edge.data().dist; - if (other.data().dist > newd) { - const min_distance_type msg(newd,path.id()); - context.signal(other, msg); - } - } // end of scatter - + +void init(icontext_type& context, const vertex_type& vertex, + const min_distance_type& msg) { +min_dist = msg.dist; +parent_node=msg.parent_node; +alternate_node = msg.alternate_node; +} + +/** + * \brief We use the messaging model to compute the SSSP update + */ +edge_dir_type gather_edges(icontext_type& context, + const vertex_type& vertex) const { +return graphlab::NO_EDGES; +}; // end of gather_edges + +/** + * \brief If the distance is smaller then update + */ +void apply(icontext_type& context, vertex_type& vertex, + const graphlab::empty& empty) { +changed = false; + +if(alternate_node==std::numeric_limits::max()) { + vertex.data().paths.push_back(parent_node); +} else { + vertex.data().paths.push_back(parent_node); + vertex.data().paths.push_back(alternate_node); + +} +if(vertex.data().dist > min_dist) { + changed = true; + vertex.data().dist = min_dist; + +} +} + +/** + * \brief Determine if SSSP should run on all edges or just in edges + */ +edge_dir_type scatter_edges(icontext_type& context, + const vertex_type& vertex) const { +if(changed) +return DIRECTED_SSSP? graphlab::OUT_EDGES : graphlab::ALL_EDGES; +else return graphlab::NO_EDGES; +}; // end of scatter_edges + +/** + * \brief The scatter function just signal adjacent pages + */ +void scatter(icontext_type& context, const vertex_type& vertex, + edge_type& edge) const { +const vertex_type other = get_other_vertex(edge, vertex); +const vertex_type path = get_path(edge, vertex); +distance_type newd = vertex.data().dist + edge.data().dist; +if (other.data().dist > newd) { + const min_distance_type msg(newd,path.id()); + context.signal(other, msg); +} +} // end of scatter + }; // end of shortest path vertex program - + /** -* \brief We want to save the final graph so we define a write which will be -* used in graph.save("path/prefix", pagerank_writer()) to save the graph. -*/ + * \brief We want to save the final graph so we define a write which will be + * used in graph.save("path/prefix", pagerank_writer()) to save the graph. + */ struct shortest_path_writer { - std::string save_vertex(const graph_type::vertex_type& vtx) { - std::stringstream strm; - - if (vtx.data().dist == 0) { - strm << vtx.id() << "\t" << vtx.data().dist << "\t" <<"'"<< vtx.id()<<"'" - << std::endl; - } else { - strm << vtx.id() << "\t" << vtx.data().dist << "\t"; - for (size_t i = 0; i < vtx.data().paths.size(); ++i) { - strm << "'" << vtx.data().paths[i] << "'"; - } - strm << std::endl; - } - return strm.str(); - } - std::string save_edge(graph_type::edge_type e) { - return ""; - } +std::string save_vertex(const graph_type::vertex_type& vtx) { +std::stringstream strm; + +if (vtx.data().dist == 0) { +strm << vtx.id() << "\t" << vtx.data().dist << "\t" <<"'"<< vtx.id()<<"'" +<< std::endl; +} else { +strm << vtx.id() << "\t" << vtx.data().dist << "\t"; +for (size_t i = 0; i < vtx.data().paths.size(); ++i) { + strm << "'" << vtx.data().paths[i] << "'"; +} +strm << std::endl; +} +return strm.str(); +} +std::string save_edge(graph_type::edge_type e) { +return ""; +} }; - + struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE { - size_t degree; - graphlab::vertex_id_type vid; - max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) { - if (degree < other.degree) { - (*this) = other; - } - return (*this); - } +size_t degree; +graphlab::vertex_id_type vid; +max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) { +if (degree < other.degree) { +(*this) = other; +} +return (*this); +} }; - + max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) { - max_deg_vertex_reducer red; - red.degree = vtx.num_in_edges() + vtx.num_out_edges(); - red.vid = vtx.id(); - return red; +max_deg_vertex_reducer red; +red.degree = vtx.num_in_edges() + vtx.num_out_edges(); +red.vid = vtx.id(); +return red; +} +/** + * \brief remove Duplicate node from the list + */ +void filter(graph_type::vertex_type& v) { +sort( v.data().paths.begin(), v.data().paths.end() ); +v.data().paths.erase( unique( v.data().paths.begin(), v.data().paths.end() ), v.data().paths.end() ); } - int main(int argc, char** argv) { - // Initialize control plain using mpi - graphlab::mpi_tools::init(argc, argv); - graphlab::distributed_control dc; - global_logger().set_log_level(LOG_INFO); - - // Parse command line options ----------------------------------------------- - graphlab::command_line_options - clopts("Single Source Shortest Path Algorithm."); - std::string graph_dir; - std::string format = "adj"; - std::string exec_type = "synchronous"; - size_t powerlaw = 0; - std::vector sources; - bool max_degree_source = false; - clopts.attach_option("graph", graph_dir, - "The graph file. If none is provided " - "then a toy graph will be created"); - clopts.add_positional("graph"); - clopts.attach_option("format", format, - "graph format"); - clopts.attach_option("source", sources, - "The source vertices"); - clopts.attach_option("max_degree_source", max_degree_source, - "Add the vertex with maximum degree as a source"); - - clopts.add_positional("source"); - - clopts.attach_option("directed", DIRECTED_SSSP, - "Treat edges as directed."); - - clopts.attach_option("engine", exec_type, - "The engine type synchronous or asynchronous"); - - clopts.attach_option("powerlaw", powerlaw, - "Generate a synthetic powerlaw out-degree graph. "); - std::string saveprefix; - clopts.attach_option("saveprefix", saveprefix, - "If set, will save the resultant pagerank to a " - "sequence of files with prefix saveprefix"); - - if(!clopts.parse(argc, argv)) { - dc.cout() << "Error in parsing command line arguments." << std::endl; - return EXIT_FAILURE; - } - - - // Build the graph ---------------------------------------------------------- - graph_type graph(dc, clopts); - if(powerlaw > 0) { // make a synthetic graph - dc.cout() << "Loading synthetic Powerlaw graph." << std::endl; - graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); - } else if (graph_dir.length() > 0) { // Load the graph from a file - dc.cout() << "Loading graph in format: "<< format << std::endl; - graph.load_format(graph_dir, format); - } else { - dc.cout() << "graph or powerlaw option must be specified" << std::endl; - clopts.print_description(); - return EXIT_FAILURE; - } - // must call finalize before querying the graph - graph.finalize(); - dc.cout() << "#vertices: " << graph.num_vertices() << std::endl - << "#edges: " << graph.num_edges() << std::endl; - - - - if(sources.empty()) { - if (max_degree_source == false) { - dc.cout() - << "No source vertex provided. Adding vertex 0 as source" - << std::endl; - sources.push_back(0); - } - } - - if (max_degree_source) { - max_deg_vertex_reducer v = graph.map_reduce_vertices(find_max_deg_vertex); - dc.cout() - << "No source vertex provided. Using highest degree vertex " << v.vid << " as source." - << std::endl; - sources.push_back(v.vid); - } - - - - // Running The Engine ------------------------------------------------------- - graphlab::omni_engine engine(dc, graph, exec_type, clopts); - - - - // Signal all the vertices in the source set - for(size_t i = 0; i < sources.size(); ++i) { - engine.signal(sources[i], min_distance_type(0)); - } - - engine.start(); - const float runtime = engine.elapsed_seconds(); - dc.cout() << "Finished Running engine in " << runtime - << " seconds." << std::endl; - - - // Save the final graph ----------------------------------------------------- - if (saveprefix != "") { - graph.save(saveprefix, shortest_path_writer(), - false, // do not gzip - true, // save vertices - false); // do not save edges - } - - // Tear-down communication layer and quit ----------------------------------- - graphlab::mpi_tools::finalize(); - return EXIT_SUCCESS; + // Initialize control plain using mpi +graphlab +::mpi_tools::init(argc, argv); +graphlab +::distributed_control dc; +global_logger().set_log_level(LOG_INFO); + + // Parse command line options ----------------------------------------------- +graphlab +::command_line_options +clopts("Single Source Shortest Path Algorithm."); +std +::string graph_dir; +std +::string format = "adj"; +std +::string exec_type = "synchronous"; +size_t powerlaw = 0; +std +::vector sources; +bool max_degree_source = false; +clopts.attach_option("graph", graph_dir, "The graph file. If none is provided " +"then a toy graph will be created"); +clopts.add_positional("graph"); +clopts.attach_option("format", format, "graph format"); +clopts.attach_option("source", sources, "The source vertices"); +clopts.attach_option("max_degree_source", max_degree_source, +"Add the vertex with maximum degree as a source"); + +clopts.add_positional("source"); + +clopts.attach_option("directed", DIRECTED_SSSP, "Treat edges as directed."); + +clopts.attach_option("engine", exec_type, +"The engine type synchronous or asynchronous"); + +clopts.attach_option("powerlaw", powerlaw, +"Generate a synthetic powerlaw out-degree graph. "); +std +::string saveprefix; +clopts.attach_option("saveprefix", saveprefix, +"If set, will save the resultant pagerank to a " +"sequence of files with prefix saveprefix"); + +if (!clopts.parse(argc, argv)) { +dc.cout() << "Error in parsing command line arguments." << std +::endl; +return EXIT_FAILURE; +} + + // Build the graph ---------------------------------------------------------- +graph_type graph( dc, clopts); +if (powerlaw > 0) { // make a synthetic graph +dc.cout() << "Loading synthetic Powerlaw graph." << std +::endl; +graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); +} else if (graph_dir.length() > 0) { // Load the graph from a file +dc.cout() << "Loading graph in format: " << format << std +::endl; +graph.load_format(graph_dir, format); +} else { +dc.cout() << "graph or powerlaw option must be specified" << std +::endl; +clopts.print_description(); +return EXIT_FAILURE; +} + // must call finalize before querying the graph +graph.finalize(); +dc.cout() << "#vertices: " << graph.num_vertices() << std +::endl +<< "#edges: " << graph.num_edges() << std::endl; + +if (sources.empty()) { +if (max_degree_source == false) { +dc.cout() << "No source vertex provided. Adding vertex 0 as source" << std +::endl; +sources.push_back(0); +} +} + +if (max_degree_source) { +max_deg_vertex_reducer v = graph.map_reduce_vertices < max_deg_vertex_reducer + > (find_max_deg_vertex); +dc.cout() << "No source vertex provided. Using highest degree vertex " << v.vid + << " as source." << std +::endl; +sources.push_back(v.vid); +} + + // Running The Engine ------------------------------------------------------- +graphlab +::omni_engine engine(dc, graph, exec_type, clopts); + + // Signal all the vertices in the source set +for (size_t i = 0; i < sources.size(); ++i) { +engine.signal(sources[i], min_distance_type(0)); +} + +engine.start(); +const float runtime = engine.elapsed_seconds(); +dc.cout() << "Finished Running engine in " << runtime << " seconds." << std +::endl; +graph.transform_vertices(filter); + + // Save the final graph ----------------------------------------------------- +if (saveprefix != "") { +graph.save(saveprefix, shortest_path_writer(), false, // do not gzip + true, // save vertices + false); // do not save edges +} + + // Tear-down communication layer and quit ----------------------------------- +graphlab +::mpi_tools::finalize(); +return EXIT_SUCCESS; } // End of main -// We render this entire program in the documentation \ No newline at end of file From c6ddfd83f6313bb1608cf703d29e8f68673c8065 Mon Sep 17 00:00:00 2001 From: Kailash Joshi Date: Tue, 5 Aug 2014 12:00:01 -0600 Subject: [PATCH 3/4] Fixed Bugs and tested --- apps/sssp_getPath/sssp_getpath.cpp | 577 +++++++++++++++-------------- 1 file changed, 300 insertions(+), 277 deletions(-) diff --git a/apps/sssp_getPath/sssp_getpath.cpp b/apps/sssp_getPath/sssp_getpath.cpp index b8c072c673..833280b5cc 100644 --- a/apps/sssp_getPath/sssp_getpath.cpp +++ b/apps/sssp_getPath/sssp_getpath.cpp @@ -1,3 +1,26 @@ +/** + * Copyright (c) 2009 Carnegie Mellon University. + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * For more about this software visit: + * + * http://www.graphlab.ml.cmu.edu + * + */ + + #include #include #include @@ -9,48 +32,44 @@ #include #include #include - +#include +#include /** * \brief The type used to measure distances in the graph. */ typedef float distance_type; +/** + * \brief The type used to collect paths in the graph. + */ +typedef std::set path_type; struct vertex_data { - distance_type dist; - distance_type parent_node; -std::vector paths; + distance_type dist; + distance_type parent_node; + path_type paths; -vertex_data(distance_type dist = std::numeric_limits::max()) : -dist(dist), parent_node(dist) { -} + vertex_data(distance_type dist = std::numeric_limits::max()) : + dist(dist), parent_node(dist) { + } -void save(graphlab::oarchive& oarc) const { - oarc << paths << dist << parent_node; -} + void save(graphlab::oarchive& oarc) const { + oarc << paths << dist << parent_node; + } + + void load(graphlab::iarchive& iarc) { + iarc >> paths >> dist >> parent_node; + } +};// end of vertex data -void load(graphlab::iarchive& iarc) { - iarc >> paths >> dist >> parent_node; -} -}; -// end of vertex data /** * \brief The distance associated with the edge. */ -struct edge_data { -distance_type dist; -edge_data(distance_type dist = 1) -: -dist(dist) { -} - -void save(graphlab::oarchive& oarc) const { -oarc << dist; -} - -void load(graphlab::iarchive& iarc) { -iarc >> dist; -} -}; +struct edge_data: graphlab::IS_POD_TYPE { + distance_type dist; + edge_data(distance_type dist = 1) : + dist(dist) { + } +};// end of edge data /** * \brief The graph type encodes the distances between vertices and @@ -61,16 +80,15 @@ typedef graphlab::distributed_graph graph_type; /** * \brief Get the other vertex in the edge. */ -inline graph_type::vertex_type -get_other_vertex(const graph_type::edge_type& edge, -const graph_type::vertex_type& vertex) { -return vertex.id() == edge.source().id()? edge.target() : edge.source(); +inline graph_type::vertex_type get_other_vertex( + const graph_type::edge_type& edge, + const graph_type::vertex_type& vertex) { + return vertex.id() == edge.source().id() ? edge.target() : edge.source(); } -inline graph_type::vertex_type -get_path(const graph_type::edge_type& edge, -const graph_type::vertex_type& vertex) { -return vertex.id() == edge.source().id()? edge.source() : edge.target(); +inline graph_type::vertex_type get_parent_node(const graph_type::edge_type& edge, + const graph_type::vertex_type& vertex) { + return vertex.id() == edge.source().id() ? edge.source() : edge.target(); } /** @@ -81,263 +99,268 @@ bool DIRECTED_SSSP = false; /** * \brief This class is used as the gather type. */ -struct min_distance_type : graphlab::IS_POD_TYPE { -distance_type dist; -distance_type parent_node; -distance_type alternate_node; -min_distance_type(distance_type dist = - std::numeric_limits::max(), - distance_type parent_node = std::numeric_limits::max()) : -dist(dist),parent_node(parent_node),alternate_node(dist) {} -min_distance_type& operator+=(const min_distance_type& other) { -if(dist==other.dist) { - alternate_node = other.parent_node; -} -dist = std::min(dist, other.dist); - -return *this; -} +struct min_distance_type { + distance_type dist; + distance_type parent_node; + path_type alternate_paths; + bool isAlternatePath; + min_distance_type(distance_type dist = + std::numeric_limits::max(), + distance_type parent_node = + std::numeric_limits::max()) : + dist(dist), parent_node(parent_node), isAlternatePath(false) { + } + min_distance_type& operator+=(const min_distance_type& other) { + if (dist == other.dist) { + alternate_paths.insert(other.parent_node); + isAlternatePath = true; + } else { + isAlternatePath = false; + } + dist = std::min(dist, other.dist); + return *this; + } + void save(graphlab::oarchive& oarc) const { + oarc << alternate_paths << dist << parent_node << isAlternatePath; + } + + void load(graphlab::iarchive& iarc) { + iarc >> alternate_paths >> dist >> parent_node >> isAlternatePath; + } }; /** * \brief The single source shortest path vertex program. */ -class sssp : -public graphlab::ivertex_program, -public graphlab::IS_POD_TYPE { -distance_type min_dist; -distance_type parent_node; -distance_type alternate_node; -bool changed; +class sssp: public graphlab::ivertex_program { + distance_type min_dist; + distance_type parent_node; + path_type alternate_paths; + bool isAlternatePath; + bool changed; public: -void init(icontext_type& context, const vertex_type& vertex, - const min_distance_type& msg) { -min_dist = msg.dist; -parent_node=msg.parent_node; -alternate_node = msg.alternate_node; -} - -/** - * \brief We use the messaging model to compute the SSSP update - */ -edge_dir_type gather_edges(icontext_type& context, - const vertex_type& vertex) const { -return graphlab::NO_EDGES; -}; // end of gather_edges - -/** - * \brief If the distance is smaller then update - */ -void apply(icontext_type& context, vertex_type& vertex, - const graphlab::empty& empty) { -changed = false; - -if(alternate_node==std::numeric_limits::max()) { - vertex.data().paths.push_back(parent_node); -} else { - vertex.data().paths.push_back(parent_node); - vertex.data().paths.push_back(alternate_node); - -} -if(vertex.data().dist > min_dist) { - changed = true; - vertex.data().dist = min_dist; - -} -} - -/** - * \brief Determine if SSSP should run on all edges or just in edges - */ -edge_dir_type scatter_edges(icontext_type& context, - const vertex_type& vertex) const { -if(changed) -return DIRECTED_SSSP? graphlab::OUT_EDGES : graphlab::ALL_EDGES; -else return graphlab::NO_EDGES; -}; // end of scatter_edges - -/** - * \brief The scatter function just signal adjacent pages - */ -void scatter(icontext_type& context, const vertex_type& vertex, - edge_type& edge) const { -const vertex_type other = get_other_vertex(edge, vertex); -const vertex_type path = get_path(edge, vertex); -distance_type newd = vertex.data().dist + edge.data().dist; -if (other.data().dist > newd) { - const min_distance_type msg(newd,path.id()); - context.signal(other, msg); -} -} // end of scatter - -}; // end of shortest path vertex program + void init(icontext_type& context, const vertex_type& vertex, + const min_distance_type& msg) { + min_dist = msg.dist; + parent_node = msg.parent_node; + isAlternatePath = msg.isAlternatePath; + alternate_paths = msg.alternate_paths; + + } + + /** + * \brief We use the messaging model to compute the SSSP update + */ + edge_dir_type gather_edges(icontext_type& context, + const vertex_type& vertex) const { + return graphlab::NO_EDGES; + } + ; // end of gather_edges + + /** + * \brief If the distance is smaller then update + */ + void apply(icontext_type& context, vertex_type& vertex, + const graphlab::empty& empty) { + changed = false; + + if (isAlternatePath == false) { + vertex.data().paths.insert(parent_node); + } else { + vertex.data().paths = alternate_paths; + vertex.data().paths.insert(parent_node); + } + if (vertex.data().dist > min_dist) { + changed = true; + vertex.data().dist = min_dist; + } + } + + /** + * \brief Determine if SSSP should run on all edges or just in edges + */ + edge_dir_type scatter_edges(icontext_type& context, + const vertex_type& vertex) const { + if (changed) + return DIRECTED_SSSP ? graphlab::OUT_EDGES : graphlab::ALL_EDGES; + else + return graphlab::NO_EDGES; + } + ; // end of scatter_edges + + /** + * \brief The scatter function just signal adjacent pages + */ + void scatter(icontext_type& context, const vertex_type& vertex, + edge_type& edge) const { + const vertex_type other = get_other_vertex(edge, vertex); + const vertex_type path = get_parent_node(edge, vertex); + distance_type newd = vertex.data().dist + edge.data().dist; + + if (other.data().dist > newd) { + const min_distance_type msg(newd, path.id()); + context.signal(other, msg); + } + } // end of scatter + + void save(graphlab::oarchive& oarc) const { + oarc << alternate_paths << parent_node << min_dist << isAlternatePath + << changed; + } + + void load(graphlab::iarchive& iarc) { + iarc >> alternate_paths >> parent_node >> min_dist >> isAlternatePath + >> changed; + } + +};// end of shortest path vertex program /** * \brief We want to save the final graph so we define a write which will be * used in graph.save("path/prefix", pagerank_writer()) to save the graph. */ struct shortest_path_writer { -std::string save_vertex(const graph_type::vertex_type& vtx) { -std::stringstream strm; - -if (vtx.data().dist == 0) { -strm << vtx.id() << "\t" << vtx.data().dist << "\t" <<"'"<< vtx.id()<<"'" -<< std::endl; -} else { -strm << vtx.id() << "\t" << vtx.data().dist << "\t"; -for (size_t i = 0; i < vtx.data().paths.size(); ++i) { - strm << "'" << vtx.data().paths[i] << "'"; -} -strm << std::endl; -} -return strm.str(); -} -std::string save_edge(graph_type::edge_type e) { -return ""; -} + std::string save_vertex(const graph_type::vertex_type& vtx) { + std::stringstream strm; + + if (vtx.data().dist == 0) { + strm << vtx.id() << "\t" << vtx.data().dist << "\t" << "'" + << vtx.id() << "'" << std::endl; + } else { + strm << vtx.id() << "\t" << vtx.data().dist << "\t"; + std::set::iterator it; + + for (it = vtx.data().paths.begin(); it != vtx.data().paths.end(); + it++) { + strm << "'" << *it << "'"; + } + strm << std::endl; + } + return strm.str(); + } + std::string save_edge(graph_type::edge_type e) { + return ""; + } }; struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE { -size_t degree; -graphlab::vertex_id_type vid; -max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) { -if (degree < other.degree) { -(*this) = other; -} -return (*this); -} + size_t degree; + graphlab::vertex_id_type vid; + max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) { + if (degree < other.degree) { + (*this) = other; + } + return (*this); + } }; max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) { -max_deg_vertex_reducer red; -red.degree = vtx.num_in_edges() + vtx.num_out_edges(); -red.vid = vtx.id(); -return red; -} -/** - * \brief remove Duplicate node from the list - */ -void filter(graph_type::vertex_type& v) { -sort( v.data().paths.begin(), v.data().paths.end() ); -v.data().paths.erase( unique( v.data().paths.begin(), v.data().paths.end() ), v.data().paths.end() ); -} -int main(int argc, char** argv) { - // Initialize control plain using mpi -graphlab -::mpi_tools::init(argc, argv); -graphlab -::distributed_control dc; -global_logger().set_log_level(LOG_INFO); - - // Parse command line options ----------------------------------------------- -graphlab -::command_line_options -clopts("Single Source Shortest Path Algorithm."); -std -::string graph_dir; -std -::string format = "adj"; -std -::string exec_type = "synchronous"; -size_t powerlaw = 0; -std -::vector sources; -bool max_degree_source = false; -clopts.attach_option("graph", graph_dir, "The graph file. If none is provided " -"then a toy graph will be created"); -clopts.add_positional("graph"); -clopts.attach_option("format", format, "graph format"); -clopts.attach_option("source", sources, "The source vertices"); -clopts.attach_option("max_degree_source", max_degree_source, -"Add the vertex with maximum degree as a source"); - -clopts.add_positional("source"); - -clopts.attach_option("directed", DIRECTED_SSSP, "Treat edges as directed."); - -clopts.attach_option("engine", exec_type, -"The engine type synchronous or asynchronous"); - -clopts.attach_option("powerlaw", powerlaw, -"Generate a synthetic powerlaw out-degree graph. "); -std -::string saveprefix; -clopts.attach_option("saveprefix", saveprefix, -"If set, will save the resultant pagerank to a " -"sequence of files with prefix saveprefix"); - -if (!clopts.parse(argc, argv)) { -dc.cout() << "Error in parsing command line arguments." << std -::endl; -return EXIT_FAILURE; -} - - // Build the graph ---------------------------------------------------------- -graph_type graph( dc, clopts); -if (powerlaw > 0) { // make a synthetic graph -dc.cout() << "Loading synthetic Powerlaw graph." << std -::endl; -graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); -} else if (graph_dir.length() > 0) { // Load the graph from a file -dc.cout() << "Loading graph in format: " << format << std -::endl; -graph.load_format(graph_dir, format); -} else { -dc.cout() << "graph or powerlaw option must be specified" << std -::endl; -clopts.print_description(); -return EXIT_FAILURE; -} - // must call finalize before querying the graph -graph.finalize(); -dc.cout() << "#vertices: " << graph.num_vertices() << std -::endl -<< "#edges: " << graph.num_edges() << std::endl; - -if (sources.empty()) { -if (max_degree_source == false) { -dc.cout() << "No source vertex provided. Adding vertex 0 as source" << std -::endl; -sources.push_back(0); -} + max_deg_vertex_reducer red; + red.degree = vtx.num_in_edges() + vtx.num_out_edges(); + red.vid = vtx.id(); + return red; } -if (max_degree_source) { -max_deg_vertex_reducer v = graph.map_reduce_vertices < max_deg_vertex_reducer - > (find_max_deg_vertex); -dc.cout() << "No source vertex provided. Using highest degree vertex " << v.vid - << " as source." << std -::endl; -sources.push_back(v.vid); -} - - // Running The Engine ------------------------------------------------------- -graphlab -::omni_engine engine(dc, graph, exec_type, clopts); - - // Signal all the vertices in the source set -for (size_t i = 0; i < sources.size(); ++i) { -engine.signal(sources[i], min_distance_type(0)); -} - -engine.start(); -const float runtime = engine.elapsed_seconds(); -dc.cout() << "Finished Running engine in " << runtime << " seconds." << std -::endl; -graph.transform_vertices(filter); - - // Save the final graph ----------------------------------------------------- -if (saveprefix != "") { -graph.save(saveprefix, shortest_path_writer(), false, // do not gzip - true, // save vertices - false); // do not save edges -} - - // Tear-down communication layer and quit ----------------------------------- -graphlab -::mpi_tools::finalize(); -return EXIT_SUCCESS; -} // End of main +int main(int argc, char** argv) { + // Initialize control plain using mpi + graphlab::mpi_tools::init(argc, argv); + graphlab::distributed_control dc; + global_logger().set_log_level(LOG_INFO); + + // Parse command line options ----------------------------------------------- + graphlab::command_line_options clopts( + "Single Source Shortest Path Algorithm."); + std::string graph_dir; + std::string format = "adj"; + std::string exec_type = "synchronous"; + size_t powerlaw = 0; + std::vector sources; + bool max_degree_source = false; + clopts.attach_option("graph", graph_dir, + "The graph file. If none is provided " + "then a toy graph will be created"); + clopts.add_positional("graph"); + clopts.attach_option("format", format, "graph format"); + clopts.attach_option("source", sources, "The source vertices"); + clopts.attach_option("max_degree_source", max_degree_source, + "Add the vertex with maximum degree as a source"); + + clopts.add_positional("source"); + + clopts.attach_option("directed", DIRECTED_SSSP, "Treat edges as directed."); + + clopts.attach_option("engine", exec_type, + "The engine type synchronous or asynchronous"); + + clopts.attach_option("powerlaw", powerlaw, + "Generate a synthetic powerlaw out-degree graph. "); + std::string saveprefix; + clopts.attach_option("saveprefix", saveprefix, + "If set, will save the resultant pagerank to a " + "sequence of files with prefix saveprefix"); + + if (!clopts.parse(argc, argv)) { + dc.cout() << "Error in parsing command line arguments." << std::endl; + return EXIT_FAILURE; + } + + // Build the graph ---------------------------------------------------------- + graph_type graph(dc, clopts); + if (powerlaw > 0) { // make a synthetic graph + dc.cout() << "Loading synthetic Powerlaw graph." << std::endl; + graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); + } else if (graph_dir.length() > 0) { // Load the graph from a file + dc.cout() << "Loading graph in format: " << format << std::endl; + graph.load_format(graph_dir, format); + } else { + dc.cout() << "graph or powerlaw option must be specified" << std::endl; + clopts.print_description(); + return EXIT_FAILURE; + } + // must call finalize before querying the graph + graph.finalize(); + dc.cout() << "#vertices: " << graph.num_vertices() << std::endl + << "#edges: " << graph.num_edges() << std::endl; + + if (sources.empty()) { + if (max_degree_source == false) { + dc.cout() << "No source vertex provided. Adding vertex 0 as source" + << std::endl; + sources.push_back(0); + } + } + + if (max_degree_source) { + max_deg_vertex_reducer v = graph.map_reduce_vertices + < max_deg_vertex_reducer > (find_max_deg_vertex); + dc.cout() << "No source vertex provided. Using highest degree vertex " + << v.vid << " as source." << std::endl; + sources.push_back(v.vid); + } + + // Running The Engine ------------------------------------------------------- + graphlab::omni_engine engine(dc, graph, exec_type, clopts); + + // Signal all the vertices in the source set + for (size_t i = 0; i < sources.size(); ++i) { + engine.signal(sources[i], min_distance_type(0)); + } + + engine.start(); + const float runtime = engine.elapsed_seconds(); + dc.cout() << "Finished Running engine in " << runtime << " seconds." + << std::endl; + + // Save the final graph ----------------------------------------------------- + if (saveprefix != "") { + graph.save(saveprefix, shortest_path_writer(), false, // do not gzip + true, // save vertices + false); // do not save edges + } + + // Tear-down communication layer and quit ----------------------------------- + graphlab::mpi_tools::finalize(); + return EXIT_SUCCESS; +} // End of main \ No newline at end of file From 5908307ee60c09a5955bb3ee7b583fde6607b1d9 Mon Sep 17 00:00:00 2001 From: Kailash Joshi Date: Sun, 10 Aug 2014 23:19:25 -0600 Subject: [PATCH 4/4] Shortest Path capturing path and distance --- toolkits/graph_analytics/CMakeLists.txt | 1 + toolkits/graph_analytics/sssp_mult_path.cpp | 401 ++++++++++++++++++++ 2 files changed, 402 insertions(+) create mode 100644 toolkits/graph_analytics/sssp_mult_path.cpp diff --git a/toolkits/graph_analytics/CMakeLists.txt b/toolkits/graph_analytics/CMakeLists.txt index 0af59e692e..7d33a90b0b 100644 --- a/toolkits/graph_analytics/CMakeLists.txt +++ b/toolkits/graph_analytics/CMakeLists.txt @@ -14,6 +14,7 @@ add_graphlab_executable(approximate_diameter approximate_diameter.cpp) add_graphlab_executable(eigen_vector_normalization eigen_vector_normalization.cpp) add_graphlab_executable(graph_laplacian graph_laplacian.cpp) add_graphlab_executable(partitioning partitioning.cpp) +add_graphlab_executable(sssp_mult_path sssp_mult_path.cpp) # add_graphlab_executable(warp_pagerank warp_pagerank.cpp) # add_graphlab_executable(warp_pagerank2 warp_pagerank2.cpp) diff --git a/toolkits/graph_analytics/sssp_mult_path.cpp b/toolkits/graph_analytics/sssp_mult_path.cpp new file mode 100644 index 0000000000..26941b8da8 --- /dev/null +++ b/toolkits/graph_analytics/sssp_mult_path.cpp @@ -0,0 +1,401 @@ +/** + * Copyright (c) 2009 Carnegie Mellon University. + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * For more about this software visit: + * + * http://www.graphlab.ml.cmu.edu + * + */ + +#include +#include +#include +#include +#include + +#include + +/** + * \brief The type used to measure distances in the graph. + */ +typedef float distance_type; + +/** + * \brief The type used to collect paths in the graph. + */ +typedef std::set path_type; + +/** + * \brief The current distance of the vertex. + */ +struct vertex_data { + distance_type dist; + distance_type parent_node; + path_type paths; + + vertex_data(distance_type dist = std::numeric_limits::max()) : + dist(dist), parent_node(dist) { + } + + void save(graphlab::oarchive& oarc) const { + oarc << paths << dist << parent_node; + } + + void load(graphlab::iarchive& iarc) { + iarc >> paths >> dist >> parent_node; + } +}; // end of vertex data + + + +/** + * \brief The distance associated with the edge. + */ +struct edge_data : graphlab::IS_POD_TYPE { + distance_type dist; + edge_data(distance_type dist = 1) : dist(dist) { } +}; // end of edge data + + +/** + * \brief The graph type encodes the distances between vertices and + * edges + */ +typedef graphlab::distributed_graph graph_type; + + +/** + * \brief Get the other vertex in the edge. + */ +inline graph_type::vertex_type +get_other_vertex(const graph_type::edge_type& edge, + const graph_type::vertex_type& vertex) { + return vertex.id() == edge.source().id()? edge.target() : edge.source(); +} + +/** + * \brief Get the parent node of the vertex ID. + */ +inline graph_type::vertex_type get_parent_node(const graph_type::edge_type& edge, + const graph_type::vertex_type& vertex) { + return vertex.id() == edge.source().id() ? edge.source() : edge.target(); +} + +/** + * \brief Use directed or undireced edges. + */ +bool DIRECTED_SSSP = false; + + +/** + * \brief This class is used as the gather type. + */ +struct min_distance_type { + distance_type dist; + distance_type parent_node; + path_type alternate_paths; + bool isAlternatePath; + min_distance_type(distance_type dist = + std::numeric_limits::max(), + distance_type parent_node = + std::numeric_limits::max()) : dist(dist), parent_node(parent_node), isAlternatePath(false) { } + min_distance_type& operator+=(const min_distance_type& other) { + if (dist == other.dist) { + alternate_paths.insert(other.parent_node); + isAlternatePath = true; + } else { + isAlternatePath = false; + } + dist = std::min(dist, other.dist); + return *this; + } + + void save(graphlab::oarchive& oarc) const { + oarc << alternate_paths << dist << parent_node << isAlternatePath; + } + + void load(graphlab::iarchive& iarc) { + iarc >> alternate_paths >> dist >> parent_node >> isAlternatePath; + } +}; + + +/** + * \brief The single source shortest path vertex program. + */ +class sssp : + public graphlab::ivertex_program + { + distance_type min_dist; + bool changed; + distance_type parent_node; + path_type alternate_paths; + bool isAlternatePath; +public: + + + void init(icontext_type& context, const vertex_type& vertex, + const min_distance_type& msg) { + min_dist = msg.dist; + parent_node = msg.parent_node; + isAlternatePath = msg.isAlternatePath; + alternate_paths = msg.alternate_paths; + } + + /** + * \brief We use the messaging model to compute the SSSP update + */ + edge_dir_type gather_edges(icontext_type& context, + const vertex_type& vertex) const { + return graphlab::NO_EDGES; + }; // end of gather_edges + + + // /** + // * \brief Collect the distance to the neighbor + // */ + // min_distance_type gather(icontext_type& context, const vertex_type& vertex, + // edge_type& edge) const { + // return min_distance_type(edge.data() + + // get_other_vertex(edge, vertex).data()); + // } // end of gather function + + + /** + * \brief If the distance is smaller then update + */ + void apply(icontext_type& context, vertex_type& vertex, + const graphlab::empty& empty) { + changed = false; + if(vertex.data().dist > min_dist) { + changed = true; + vertex.data().dist = min_dist; + } + if (isAlternatePath == false) { + vertex.data().paths.insert(parent_node); + } else { + vertex.data().paths = alternate_paths; + vertex.data().paths.insert(parent_node); + } + } + + /** + * \brief Determine if SSSP should run on all edges or just in edges + */ + edge_dir_type scatter_edges(icontext_type& context, + const vertex_type& vertex) const { + if(changed) + return DIRECTED_SSSP? graphlab::OUT_EDGES : graphlab::ALL_EDGES; + else return graphlab::NO_EDGES; + }; // end of scatter_edges + + /** + * \brief The scatter function just signal adjacent pages + */ + void scatter(icontext_type& context, const vertex_type& vertex, + edge_type& edge) const { + const vertex_type other = get_other_vertex(edge, vertex); + const vertex_type path = get_parent_node(edge, vertex); + distance_type newd = vertex.data().dist + edge.data().dist; + if (other.data().dist > newd) { + const min_distance_type msg(newd,path.id()); + context.signal(other, msg); + } + } // end of scatter + + void save(graphlab::oarchive& oarc) const { + oarc << alternate_paths << parent_node << min_dist << isAlternatePath + << changed; + } + + void load(graphlab::iarchive& iarc) { + iarc >> alternate_paths >> parent_node >> min_dist >> isAlternatePath + >> changed; + } + +}; // end of shortest path vertex program + + + + +/** + * \brief We want to save the final graph so we define a write which will be + * used in graph.save("path/prefix", pagerank_writer()) to save the graph. + */ +struct shortest_path_writer { + std::string save_vertex(const graph_type::vertex_type& vtx) { + std::stringstream strm; + if (vtx.data().dist == 0) { + strm << vtx.id() << "\t" << vtx.data().dist << "\t" << "'" + << vtx.id() << "'" << std::endl; + } else { + strm << vtx.id() << "\t" << vtx.data().dist << "\t"; + std::set::iterator it; + + for (it = vtx.data().paths.begin(); it != vtx.data().paths.end(); + it++) { + strm << "'" << *it << "'"; + } + strm << std::endl; + } + return strm.str(); + } + std::string save_edge(graph_type::edge_type e) { return ""; } +}; // end of shortest_path_writer + + + +struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE { + size_t degree; + graphlab::vertex_id_type vid; + max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) { + if (degree < other.degree) { + (*this) = other; + } + return (*this); + } +}; + +max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) { + max_deg_vertex_reducer red; + red.degree = vtx.num_in_edges() + vtx.num_out_edges(); + red.vid = vtx.id(); + return red; +} + +int main(int argc, char** argv) { + // Initialize control plain using mpi + graphlab::mpi_tools::init(argc, argv); + graphlab::distributed_control dc; + global_logger().set_log_level(LOG_INFO); + + // Parse command line options ----------------------------------------------- + graphlab::command_line_options + clopts("Single Source Shortest Path Algorithm."); + std::string graph_dir; + std::string format = "adj"; + std::string exec_type = "synchronous"; + size_t powerlaw = 0; + std::vector sources; + bool max_degree_source = false; + clopts.attach_option("graph", graph_dir, + "The graph file. If none is provided " + "then a toy graph will be created"); + clopts.add_positional("graph"); + clopts.attach_option("format", format, + "graph format"); + clopts.attach_option("source", sources, + "The source vertices"); + clopts.attach_option("max_degree_source", max_degree_source, + "Add the vertex with maximum degree as a source"); + + clopts.add_positional("source"); + + clopts.attach_option("directed", DIRECTED_SSSP, + "Treat edges as directed."); + + clopts.attach_option("engine", exec_type, + "The engine type synchronous or asynchronous"); + + + clopts.attach_option("powerlaw", powerlaw, + "Generate a synthetic powerlaw out-degree graph. "); + std::string saveprefix; + clopts.attach_option("saveprefix", saveprefix, + "If set, will save the resultant pagerank to a " + "sequence of files with prefix saveprefix"); + + if(!clopts.parse(argc, argv)) { + dc.cout() << "Error in parsing command line arguments." << std::endl; + return EXIT_FAILURE; + } + + + // Build the graph ---------------------------------------------------------- + graph_type graph(dc, clopts); + if(powerlaw > 0) { // make a synthetic graph + dc.cout() << "Loading synthetic Powerlaw graph." << std::endl; + graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000); + } else if (graph_dir.length() > 0) { // Load the graph from a file + dc.cout() << "Loading graph in format: "<< format << std::endl; + graph.load_format(graph_dir, format); + } else { + dc.cout() << "graph or powerlaw option must be specified" << std::endl; + clopts.print_description(); + return EXIT_FAILURE; + } + // must call finalize before querying the graph + graph.finalize(); + dc.cout() << "#vertices: " << graph.num_vertices() << std::endl + << "#edges: " << graph.num_edges() << std::endl; + + + + if(sources.empty()) { + if (max_degree_source == false) { + dc.cout() + << "No source vertex provided. Adding vertex 0 as source" + << std::endl; + sources.push_back(0); + } + } + + if (max_degree_source) { + max_deg_vertex_reducer v = graph.map_reduce_vertices(find_max_deg_vertex); + dc.cout() + << "No source vertex provided. Using highest degree vertex " << v.vid << " as source." + << std::endl; + sources.push_back(v.vid); + } + + + + // Running The Engine ------------------------------------------------------- + graphlab::omni_engine engine(dc, graph, exec_type, clopts); + + + + // Signal all the vertices in the source set + for(size_t i = 0; i < sources.size(); ++i) { + engine.signal(sources[i], min_distance_type(0)); + } + + engine.start(); + const float runtime = engine.elapsed_seconds(); + dc.cout() << "Finished Running engine in " << runtime + << " seconds." << std::endl; + + + // Save the final graph ----------------------------------------------------- + if (saveprefix != "") { + graph.save(saveprefix, shortest_path_writer(), + false, // do not gzip + true, // save vertices + false); // do not save edges + } + + // Tear-down communication layer and quit ----------------------------------- + graphlab::mpi_tools::finalize(); + return EXIT_SUCCESS; +} // End of main + + +// We render this entire program in the documentation + + +