-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompute_soaps.py
More file actions
150 lines (108 loc) · 4.19 KB
/
compute_soaps.py
File metadata and controls
150 lines (108 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from Pool.mpi_pool import MPIPool
import sys,argparse
import numpy as np
from libs.io import Frame_Dataset_h5
from tqdm import tqdm
import h5py
from glob import glob
from time import ctime
import spglib as spg
sys.path.insert(0,'/home/musil/git/glosim2/')
sys.path.insert(0,'/local/git/glosim2/')
from libmatch.soap import get_Soaps
def compute_soap(fn,soap_params,frame_names=None,nprocess=1,string_dtype ='S200'):
frame_reader = Frame_Dataset_h5(fn,mode='r',disable_pbar=True)
if frame_names is None:
frame_names = frame_reader.names
ffs = frame_reader.load_frames(frame_names,frame_type='quippy')
frames = [ffs[frame_name] for frame_name in frame_names]
fings = get_Soaps(frames, nprocess=nprocess, **soap_params)
soaps = []
idx2frame = []
for fing, frame_name in zip(fings, frame_names):
sss = fing['AVG']
nrm = np.linalg.norm(sss)
soaps.append(sss/nrm)
idx2frame.append(np.array([fn, frame_name], dtype=string_dtype))
soaps = np.asarray(soaps,dtype='f8')
idx2frame = np.asarray(idx2frame,dtype=string_dtype)
fn_soaps = fn[:-3] + '-soaps.npy'
fn_idx2frame = fn[:-3] + '-idx2frame.npy'
np.save(fn_soaps,soaps)
np.save(fn_idx2frame,idx2frame)
return (fn,fn_soaps,fn_idx2frame)
def compute_soap_wrapper(kwargs):
return compute_soap(**kwargs)
if __name__ == '__main__':
pool = MPIPool()
if not pool.is_master():
pool.wait()
# pool.wait(callbacks=[fout.close,executor.shutdown])
sys.exit(0)
dataPath = '/home/musil/workspace/qmat/structures/'
fns = glob(dataPath + 'relaxed_structures_step1_*.h5')
chunkSize = 50
fout = dataPath + 'descriptor-chunk{}_new.h5'.format(chunkSize)
centerweight = 1.
gaussian_width = 0.5
cutoff = 3.5
cutoff_transition_width = 0.5
nmax = 10
lmax = 15
nocenters = []
is_fast_average = True
print len(fns)
Ntot = 0
frame_readers = {}
sizes = [0]
Nstr = []
frame_names = {}
for fn in tqdm(fns,desc='Frame count'):
frame_names[fn] = []
Nstr.append(len(fn))
rr = Frame_Dataset_h5(fn, mode='r')
ffs = rr.names
with h5py.File(fn,'r') as f:
for frame_name in ffs:
if f[frame_name].attrs['Nequivalent_site'] == 1:
frame_names[fn].append(frame_name)
sizes.append(len(frame_names[fn]))
Ntot += len(frame_names[fn])
frame_readers[fn] = rr
print Ntot
Nstr = np.max(Nstr) + 1
print Nstr
strides = np.cumsum(sizes)
strides = {fn:(strides[it],strides[it+1]) for it,fn in enumerate(fns)}
print strides
soap_params = {
'centerweight': centerweight,
'gaussian_width': gaussian_width,'cutoff': cutoff,
'cutoff_transition_width': cutoff_transition_width,
'nmax': nmax, 'lmax': lmax, 'is_fast_average':is_fast_average,
'chem_channels': False ,'nocenters': nocenters,'dispbar':True,
}
frame = frame_readers[fns[0]].load_frame('frame_0')
fings = get_Soaps([frame],nprocess=1, **soap_params)[0]['AVG']
Nsoap = fings.shape[0]
print Nsoap
# fout = dataPath + 'descriptor_test-chunk{}.h5'.format(chunkSize)
with h5py.File(fout, mode='w', libver='latest') as f:
idx2frame = f.create_dataset("idx2frame", (Ntot, 2),
dtype="S{}".format(Nstr), chunks=(chunkSize, 2))
data = f.create_dataset("data", (Ntot, Nsoap), dtype='f8', chunks=(chunkSize, Nsoap))
f.attrs['created'] = ctime()
for k,v in soap_params.iteritems():
f.attrs[k] = v
inputs = [dict(fn=fn,frame_names=frame_names[fn],soap_params=soap_params,
nprocess=1,string_dtype ="S{}".format(Nstr)) for fn in fns]
fn_names = pool.map(compute_soap_wrapper,inputs)
print 'End of pool'
with h5py.File(fout, mode='r+', libver='latest') as f:
data = f['data']
idx2frame = f['idx2frame']
for fn,fn_soaps,fn_idx2frame in tqdm(fn_names,desc='save h5'):
st,nd = strides[fn]
data[st:nd,:] = np.load(fn_soaps)
idx2frame[st:nd,:] = np.load(fn_idx2frame)
pool.close()