forked from phbradley/tcr-dist
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic.py
More file actions
133 lines (107 loc) · 3.98 KB
/
basic.py
File metadata and controls
133 lines (107 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
## LAZY-- some basic imports and shared params
##
##
from glob import glob
from os import popen, system, chdir, remove, getcwd, mkdir
from os.path import exists, isdir, isfile
import math
from math import floor,sqrt
from sys import stderr,argv,exit
import random
import paths
from blargs import Parser
from parse_tsv import *
############################################################################################
############################################################################################
## some shared TCRdist analysis pipeline parameters
pipeline_params= {
## Use the update to probability calculation? (March 2017)
'new_probs':True,
## A length-scale factor that lets us fiddle with the distance calculation
## and still get similar clustering and TCRdiv calculations
## The history is that with the original TCRdist we used nice round values of 25/50 for
## several distance thresholds. When we moved to the updated TCRdist, on average
## distances decreased by a factor of 1.355, so as a temporary hack we introduced
## this parameter so that clustering and diversity calculations can be run with
## default parameters and give similar results.
#'distance_threshold_25': 25.0 / 1.355,
'distance_threshold_25': 25.0 / 1.298, ## now moving to a gap penalty in the CDR3 region of 12
}
## naming scheme for the gene segment types, occasionally useful for iterating
segtypes_uppercase = ['VA','JA','VB','JB']
segtypes_lowercase = ['va','ja','vb','jb']
############################################################################################
############################################################################################
## you could modify this function if you have a different cmdline tool for converting svg to png
## like inkscape or cairosvg
##
def convert_svg_to_png( svgfile, pngfile, verbose=True, allow_missing=False, allow_failure=True ):
if not isfile(svgfile):
errmsg = 'Error: convert_svg_to_png: svgfile does not exist: {}'.format(svgfile)
print errmsg
Log( errmsg )
if allow_missing:
return
else:
exit()
cmd = 'convert {} {}'.format( svgfile, pngfile )
if verbose:
print cmd
system(cmd)
if isfile( pngfile ):
## success
return
## cmdline inkscape
cmd = 'inkscape --export-png {} {}'.format( pngfile, svgfile )
if verbose:
print cmd
system(cmd)
if isfile( pngfile ):
## success
return
## this is probably a long-shot, but in case inkscape is installed on mac
inkscape_exe = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
if isfile( inkscape_exe ):
from os.path import abspath
svgfile_full = abspath( svgfile )
pngfile_full = abspath( pngfile )
cmd = '{} --export-png {} {}'.format( inkscape_exe, pngfile_full, svgfile_full )
if verbose:
print cmd
system(cmd)
if isfile( pngfile ):
## success
return
## another possibility
cmd = 'rsvg-convert {} -o {}'.format( svgfile, pngfile )
if verbose:
print cmd
system(cmd)
if isfile( pngfile ):
## success
return
## this might also occur if the svgfile were empty...
errmsg = 'Error: convert command failed: cmd="{}" -- is the "convert" cmdline tool (Imagemagick) installed?'\
.format( cmd )
print errmsg
Log( errmsg )
if not allow_failure:
exit()
def get_mean_and_sdev( l ):
N = len(l)
assert N>0
mean = float( sum( l ) ) / N
sdev = 0.0
for x in l: sdev += ( x- mean )**2
sdev = sqrt( float(sdev)/N )
return mean, sdev
def get_median(l_in):
l = l_in[:] ##make copy
l.sort()
n = len(l)
if n%2: return l[n/2] ## n is odd
else: return 0.5 * ( l[n/2] + l[n/2 - 1 ] ) ## n is even
def Log(s): ## silly legacy helper function
stderr.write(s)
if s and not s.endswith('\n'):
stderr.write('\n')