-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCleaner.py
More file actions
53 lines (45 loc) · 1.44 KB
/
Cleaner.py
File metadata and controls
53 lines (45 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
"""
Created on Wed May 5 15:36:25 2021
@author: Santiago
"""
import json
import os
import networkx as nx
from tqdm import tqdm
from networkx.readwrite import json_graph
from multiprocessing import Process
import glob
# script so that each graph have the same node set even though there is isolated nodes
def Cleaner(arr):
comp_list=[]
dir = "test_graphs/"
# get the full node set by combining each node set
for G in arr:
comp_list = comp_list + list(G)
# get rid of duplicates
comp_set=set(comp_list)
# for each graph add the missing nodes
for G in arr:
for comp in comp_set:
if comp not in list(G):
G.add_node(comp,bipartite=0)
#save them in final_graph
dataset="graph"+G.name
res = json_graph.node_link_data(G)
if not os.path.exists(dir):
os.makedirs(dir)
#write new cleaned graphs
with open(dir+dataset+".json","w") as outfile:
json.dump(res,outfile)
if __name__ == '__main__':
# get all the uncleaned graphs in a list for Cleaner
arr=[]
for result_path in glob.glob("test_graphs/graph*",recursive=True):
with open(result_path, "r", encoding="utf-8") as file:
data = json.load(file)
g=json_graph.node_link_graph(data)
#set graph name if not done already
arr.append(g)
# Clean the graphs and save them
Cleaner(arr)