Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#Compiled python modules
*.pyc
*.pyxbldc
*.c
*.so

#Setuptools distribution folder
/dist/
Expand Down
10 changes: 5 additions & 5 deletions bin/nucleoatac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
Script to run nucleoatac.
Various functions for calling nucleosomes using ATAC-Seq data.
Expand All @@ -24,14 +24,14 @@ from nucleoatac.cli import nucleoatac_parser, nucleoatac_main


if __name__ == '__main__':
print "Command run: "+ ' '.join(map(str,sys.argv))
print "nucleoatac version " + __version__
print "start run at: " + time.strftime("%Y-%m-%d %H:%M")
print("Command run: "+ ' '.join(map(str,sys.argv)))
print("nucleoatac version " + __version__)
print("start run at: " + time.strftime("%Y-%m-%d %H:%M"))
try:
parser = nucleoatac_parser()
args = parser.parse_args()
nucleoatac_main(args)
print "end run at: " + time.strftime("%Y-%m-%d %H:%M")
print("end run at: " + time.strftime("%Y-%m-%d %H:%M"))
except KeyboardInterrupt:
sys.stderr.write("User interrupted nucleoatac.")
sys.exit(0)
Expand Down
10 changes: 5 additions & 5 deletions bin/pyatac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
Script to run pyatac command line utilies.
Various functions designed for working with paired-end ATAC-Seq data.
Expand All @@ -23,14 +23,14 @@ from pyatac.cli import pyatac_parser, pyatac_main


if __name__ == '__main__':
print "Command run: "+ ' '.join(map(str,sys.argv))
print "pyatac version "+__version__
print "start run at: " + time.strftime("%Y-%m-%d %H:%M")
print("Command run: "+ ' '.join(map(str,sys.argv)))
print("pyatac version "+__version__)
print("start run at: " + time.strftime("%Y-%m-%d %H:%M"))
try:
parser = pyatac_parser()
args = parser.parse_args()
pyatac_main(args)
print "end run at: " + time.strftime("%Y-%m-%d %H:%M")
print("end run at: " + time.strftime("%Y-%m-%d %H:%M"))
except KeyboardInterrupt:
sys.stderr.write("User interrupted pyatac.")
sys.exit(0)
Expand Down
4 changes: 2 additions & 2 deletions nucleoatac/Magic.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from pkg_resources import resource_filename
from importlib.resources import files

default_vplot = resource_filename('nucleoatac.vplot','standard_vplot.VMat')
default_vplot = files("nucleoatac.vplot").joinpath("standard_vplot.VMat")
4 changes: 2 additions & 2 deletions nucleoatac/NFRCalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def findNFRs(self):
if self.chrom in tbx.contigs:
for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()):
nucs.append(int(row[1]))
for j in xrange(1,len(nucs)):
for j in range(1,len(nucs)):
left = nucs[j-1] + 73
right = nucs[j] - 72
if right <= left:
Expand All @@ -106,7 +106,7 @@ def process(self, params):
self.findNFRs()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self,name)

41 changes: 21 additions & 20 deletions nucleoatac/NucleosomeCalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
self.bias_mat.start + offset,
self.bias_mat.end - offset),
vmat.mat,mode = 'valid')[0]
self.vals = self.vals * self.nuc_cov/ self.cov.vals
self.vals = self.vals * self.nuc_cov// self.cov.vals



Expand All @@ -79,7 +79,7 @@ def simulateReads(self):
sim_mat = np.reshape(sim_vect, self.vmat.mat.shape)
return sim_mat
def simulateDist(self, numiters = 1000):
self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters))
self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)]
def analStd(self):
flatv = np.ravel(self.vmat.mat)
var = calculateCov(self.probs, flatv, self.reads)
Expand All @@ -91,9 +91,9 @@ def analMean(self):

def norm(x, v, w, mean):
"""compute values of normal pdf with given mean and sd at values in x"""
norm = (1.0/(np.sqrt(2*np.pi*v)) *
np.exp(-(x - mean)**2/(2*v)))
norm = norm * (w/max(norm))
norm = (1.0//(np.sqrt(2*np.pi*v)) *
np.exp(-(x - mean)**2//(2*v)))
norm = norm * (w//max(norm))
return norm

class Nucleosome(Chunk):
Expand Down Expand Up @@ -124,7 +124,7 @@ def getZScore(self, nuctrack):
s = SignalDistribution(self.start, nuctrack.params.vmat, nuctrack.bias_mat,
self.nuc_cov)
std = s.analStd()
self.z = self.norm_signal / std
self.z = self.norm_signal // std
def getOcc(self, nuctrack):
try:
self.occ = nuctrack.occ.get(pos = self.start)
Expand All @@ -139,7 +139,7 @@ def addNorms(x,params):
"""Add several normal distributions together"""
l = len(x)
fit = np.zeros(l)
i = len(params)/3
i = len(params)//3
for j in range(i):
fit += norm(x,params[j*3],params[3*j+1],params[3*j+2])
return fit
Expand All @@ -149,28 +149,29 @@ def err_func(pars,y):
return sum((addNorms(x, pars) - y)**2)
def fitNorm(guess, bound, sig):
"""Fit a normal to the signal with lower and upperbounds to sd"""
print((guess, bound, sig))
a = (sig,)
res = optimize.minimize(err_func,guess,args = a, bounds=bound,method="L-BFGS-B")
return res
index = self.start - nuctrack.start
allnucs = nuctrack.sorted_nuc_keys
x = bisect_left(allnucs,index)
if x == 0:
left = index - nuctrack.params.nonredundant_sep/3
means = (nuctrack.params.nonredundant_sep/3,)
left = index - nuctrack.params.nonredundant_sep//3
means = (nuctrack.params.nonredundant_sep//3,)
elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep:
left = allnucs[x-1]
means = (index - allnucs[x-1],0)
else:
left = index - nuctrack.params.nonredundant_sep/3
means = (nuctrack.params.nonredundant_sep/3,)
left = index - nuctrack.params.nonredundant_sep//3
means = (nuctrack.params.nonredundant_sep//3,)
if x == len(allnucs)-1:
right = index + nuctrack.params.nonredundant_sep/3 + 1
right = index + nuctrack.params.nonredundant_sep//3 + 1
elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep:
right = allnucs[x+1]
means += (allnucs[x+1] - left,)
else:
right = index + nuctrack.params.nonredundant_sep/3 +1
right = index + nuctrack.params.nonredundant_sep//3 +1
sig = nuctrack.smoothed.vals[left:right]
sig[sig<0] = 0
if len(means)==1:
Expand Down Expand Up @@ -237,14 +238,14 @@ def __init__(self, chunk):
def initialize(self, parameters):
self.params = parameters
def getFragmentMat(self):
self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1),
self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac)
self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1),
self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac)
self.mat.makeFragmentMat(self.params.bam)
def makeBiasMat(self):
self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
self.end + self.params.window, 0, self.params.upper)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
self.end + self.params.window + self.params.upper/2 + 1, log = True)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2,
self.end + self.params.window + self.params.upper//2 + 1, log = True)
if self.params.fasta is not None:
bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
self.bias_mat.makeBiasMat(bias_track)
Expand Down Expand Up @@ -298,7 +299,7 @@ def findAllNucs(self):
#find peaks in normalized sigal
cands1 = call_peaks(combined, min_signal = 0,
sep = self.params.redundant_sep,
boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2)
boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2)
for i in cands1:
nuc = Nucleosome(i + self.start, self)
if nuc.nuc_cov > self.params.min_reads:
Expand All @@ -310,7 +311,7 @@ def findAllNucs(self):
self.nuc_collection[i] = nuc
self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
self.nonredundant = reduce_peaks( self.sorted_nuc_keys,
map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys),
[self.nuc_collection[x].z for x in self.sorted_nuc_keys],
self.params.nonredundant_sep)
self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)
def fit(self):
Expand Down Expand Up @@ -340,7 +341,7 @@ def process(self, params):
self.makeInsertionTrack()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self,name)

34 changes: 18 additions & 16 deletions nucleoatac/Occupancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def getFragmentSizes(self, bamfile, chunklist = None):
self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist)
def modelNFR(self, boundaries = (35,115)):
"""Model NFR distribution with gamma distribution"""
print(boundaries)
b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower
print(b)
boundaries = (min(boundaries[0],b), boundaries[1])
x = np.arange(boundaries[0],boundaries[1])
y = self.fragmentsizes.get(boundaries[0],boundaries[1])
Expand All @@ -42,9 +44,9 @@ def gamma_fit(X,o,p):
nz = x_mod >= 0
else:
nz = x_mod > 0
res[nz] = a * x_mod[nz]**(k-1) * np.exp(-x_mod[nz]/theta) / (theta **k * gamma(k))
res[nz] = a * x_mod[nz]**(k-1) * np.exp(-x_mod[nz]//theta) // (theta **k * gamma(k))
return res
res_score = np.ones(boundaries[0]+1)*np.float('inf')
res_score = np.ones(boundaries[0]+1)*np.float64('inf')
res_param = [0 for i in range(boundaries[0]+1)]
pranges = ((0.01,10),(0.01,150),(0.01,1))
for i in range(15,boundaries[0]+1):
Expand All @@ -67,11 +69,11 @@ def gamma_fit(X,o,p):
def plotFits(self,filename=None):
"""plot the Fits"""
fig = plt.figure()
plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(),
plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(),
label = "Observed")
plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit")
plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model")
plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model")
plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit")
plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model")
plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model")
plt.legend()
plt.xlabel("Fragment size")
plt.ylabel("Relative Frequency")
Expand Down Expand Up @@ -109,8 +111,8 @@ def calculateOccupancy(inserts, bias, params):
nuc_probs = nuc_probs / np.sum(nuc_probs)
nfr_probs = params.nfr_probs * bias
nfr_probs = nfr_probs / np.sum(nfr_probs)
x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas)
logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l)))
x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas]
logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)])
logliks[np.isnan(logliks)] = -float('inf')
occ = params.alphas[np.argmax(logliks)]
#Compute upper and lower bounds for 95% confidence interval
Expand All @@ -129,11 +131,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params):
"""Calculate Occupancy track"""
offset=self.start - mat.start
if offset<params.flank:
raise Exception("For calculateOccupancyMLE, mat does not have sufficient flanking regions"),offset
raise Exception("For calculateOccupancyMLE, mat does not have sufficient flanking regions")(offset)
self.vals=np.ones(self.end - self.start)*float('nan')
self.lower_bound = np.ones(self.end - self.start)*float('nan')
self.upper_bound =np.ones(self.end - self.start)*float('nan')
for i in xrange(params.halfstep,len(self.vals),params.step):
for i in range(params.halfstep,len(self.vals),params.step):
new_inserts = np.sum(mat.get(lower = 0, upper = params.upper,
start = self.start+i-params.flank, end = self.start+i+params.flank+1),
axis = 1)
Expand Down Expand Up @@ -190,7 +192,7 @@ def __init__(self, insert_dist, upper, fasta, pwm, sep = 120, min_occ = 0.1, fla
if step%2 == 0:
step = step - 1
self.step = step
self.halfstep = (self.step-1) / 2
self.halfstep = (self.step-1) // 2

class OccChunk(Chunk):
"""Class for calculating occupancy and occupancy peaks
Expand All @@ -209,15 +211,15 @@ def makeBiasMat(self):
self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank,
self.end + self.params.flank, 0, self.params.upper)
if self.params.fasta is not None:
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
self.end + self.params.window + self.params.upper/2 + 1, log = True)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2,
self.end + self.params.window + self.params.upper//2 + 1, log = True)
bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
self.bias_mat.makeBiasMat(bias_track)
def calculateOcc(self):
"""calculate occupancy for chunk"""
self.occ = OccupancyTrack(self.chrom,self.start,self.end)
self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params)
self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0)
self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank//3.0)
def getCov(self):
"""Get read coverage for regions"""
self.cov = CoverageTrack(self.chrom, self.start, self.end)
Expand All @@ -232,7 +234,7 @@ def callPeaks(self):
def getNucDist(self):
"""Get nucleosomal insert distribution"""
nuc_dist = np.zeros(self.params.upper)
for peak in self.peaks.keys():
for peak in list(self.peaks.keys()):
sub = self.mat.get(start = self.peaks[peak].start-self.params.flank, end = self.peaks[peak].start+1+self.params.flank)
sub_sum = np.sum(sub,axis=1)
sub_sum = sub_sum / float(sum(sub_sum))
Expand All @@ -248,7 +250,7 @@ def process(self, params):
self.callPeaks()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self, name)

Expand Down
5 changes: 3 additions & 2 deletions nucleoatac/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#Define version based on setup script
import pkg_resources
__version__ = pkg_resources.require("NucleoATAC")[0].version
from importlib.metadata import version

__version__ = version("NucleoATAC")
4 changes: 2 additions & 2 deletions nucleoatac/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def nucleoatac_main(args):
print('---------Calling NFR positions--------------------------------------------------')
run_nfr(args)
elif call == "run":
occ_args = parser.parse_args(map(str,['occ','--bed',args.bed,'--bam',args.bam,
occ_args = parser.parse_args(list(map(str,['occ','--bed',args.bed,'--bam',args.bam,
'--fasta', args.fasta, '--pwm', args.pwm,
'--out',args.out,'--cores',args.cores]))
'--out',args.out,'--cores',args.cores])))
vprocess_args = parser.parse_args(['vprocess','--sizes',args.out+'.nuc_dist.txt','--out',args.out])
nuc_args_list = ['nuc','--bed',args.bed,'--bam',args.bam,'--out',args.out,'--cores', str(args.cores),
'--occ_track', args.out + '.occ.bedgraph.gz','--vmat', args.out + '.VMat',
Expand Down
10 changes: 5 additions & 5 deletions nucleoatac/diff_occ.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _diffHelper(arg):
occ.occ, [occ.peaks[i] for i in sorted(occ.peaks.keys())])
occ.removeData()
except Exception as e:
print('Caught exception when processing:\n'+ chunk.asBed()+"\n")
print(('Caught exception when processing:\n'+ chunk.asBed()+"\n"))
traceback.print_exc()
print()
raise e
Expand All @@ -43,7 +43,7 @@ def _writeDiff(pos_queue, out):
for pos in poslist:
pos.write(out_handle)
pos_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing occupancy track\n')
traceback.print_exc()
print()
Expand All @@ -57,9 +57,9 @@ def run_diff(args, bases = 500000):
"""
chrs = read_chrom_sizes_from_bam(args.bam)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down))
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down))
chunks.merge()
maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks])))
maxQueueSize = max(2,int(100 * bases // np.mean([chunk.length() for chunk in chunks])))
#get fragmentsizes
fragment_dist1 = FragmentMixDistribution(0, upper = args.upper)
fragment_dist1.fragmentsizes = FragmentSizes(0, args.upper, vals = FragmentSizes.open(args.sizes1).get(0,args.upper))
Expand All @@ -78,7 +78,7 @@ def run_diff(args, bases = 500000):
diff_process.start()
nuc_dist = np.zeros(args.upper)
for j in sets:
tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params)))
tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params))))
for result in tmp:
diff_queue.put(result[1])
pool1.close()
Expand Down
2 changes: 1 addition & 1 deletion nucleoatac/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, *args):
def read(bedfile, source, min_occ = 0):
"""Make a list of chunks from a tab-delimited bedfile"""
if bedfile[-3:] == '.gz':
infile = gzip.open(bedfile,"r")
infile = gzip.open(bedfile,"rt")
else:
infile = open(bedfile,"r")
out = NucList()
Expand Down
Loading