-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhiCGraphProc.py
More file actions
67 lines (55 loc) · 1.92 KB
/
hiCGraphProc.py
File metadata and controls
67 lines (55 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
"""
Miscellaneous functions for Hi-C graph data analysis
Part of ChromaWalker package
"""
import numpy as np
from scipy.stats import gmean
import pandas as pd
#####################
# Epigenetics data handling
def _epigen_extractDensity(track, partitiondata, mode='lin'):
"""
Extract partition-average density values from epigen data track.
Parameter mode: 'lin' for arithmetic mean, 'log' for geometric mean.
"""
if mode == 'lin':
func = np.average
elif mode == 'log':
func = gmean
else:
print '_epigen_extractDensity: Invalid mode %s!' % mode
return
data = {}
for key, (st, en) in partitiondata:
data[key] = func(track[st:en])
return data
def _epigen_vecToZScore(vec):
"""
Convert vector of values into Z scores.
"""
mean = np.average(vec)
sd = np.std(vec) * np.sqrt(len(vec) / (len(vec) - 1.0))
return (vec - mean) / sd
def _epigen_vecToZScore_weighted(vec, weights, printsummary=False):
"""
Convert vector of values (weighted) into Z scores.
"""
mean = np.sum(vec * weights) / np.sum(weights)
sd = np.sum((vec ** 2 - mean ** 2) * weights) / np.sum(weights) * \
(len(vec) / (len(vec) - 1.0))
sd = np.sqrt(sd)
if printsummary:
print 'minval maxval meanval sigma sigma/mu minZ maxZ = ' + \
'\t%e\t%e\t%e\t%e\t%e\t%e\t%e' % (np.min(vec), np.max(vec), mean, sd,
sd / mean, np.min((vec - mean) / sd), np.max((vec - mean) / sd))
return (vec - mean) / sd
def _epigen_dumpNodeData(datalist, cols, fname, sep=','):
"""
Write datalist[rowindex][colindex] to file fname, with separator sep.
Each datalist[rowindex] is a list of quantities corresponding to cols.
"""
datadict = {col: datalist[icol] for icol, col in enumerate(cols)}
df = pd.DataFrame(datadict, columns=cols)
df.to_csv(fname, sep=sep, index=False)
return