From 22edcd0d026845c94013e12e5c550a17842edb7f Mon Sep 17 00:00:00 2001 From: Alec Ethell Date: Thu, 9 Apr 2026 16:38:28 -0500 Subject: [PATCH] oops didn't see there was already a py3 branch -- mostly 2to3 handled, with some pyproject.toml/setup.py rework and float/byte 2to3 weirdness --- .gitignore | 2 + bin/nucleoatac | 10 +-- bin/pyatac | 10 +-- nucleoatac/Magic.py | 4 +- nucleoatac/NFRCalling.py | 4 +- nucleoatac/NucleosomeCalling.py | 41 +++++----- nucleoatac/Occupancy.py | 34 ++++---- nucleoatac/__init__.py | 5 +- nucleoatac/cli.py | 4 +- nucleoatac/diff_occ.py | 10 +-- nucleoatac/merge.py | 2 +- nucleoatac/multinomial_cov.pyx | 29 ++++--- nucleoatac/run_nfr.py | 8 +- nucleoatac/run_nuc.py | 21 ++--- nucleoatac/run_occ.py | 16 ++-- pyatac/VMat.py | 40 +++++----- pyatac/__init__.py | 6 +- pyatac/bias.py | 13 ++- pyatac/chunk.py | 14 ++-- pyatac/chunkmat2d.py | 20 ++--- pyatac/fragments.pyx | 136 ++++++++++++++++++-------------- pyatac/fragmentsizes.py | 2 +- pyatac/get_counts.py | 4 +- pyatac/get_cov.py | 10 +-- pyatac/get_ins.py | 16 ++-- pyatac/get_nucleotide.py | 6 +- pyatac/get_pwm.py | 10 +-- pyatac/get_sizes.py | 2 +- pyatac/make_bias_track.py | 12 +-- pyatac/make_bias_vplot.py | 12 +-- pyatac/make_vplot.py | 8 +- pyatac/seq.py | 9 +-- pyatac/signal_around_sites.py | 10 +-- pyatac/tracks.py | 20 ++--- pyatac/utils.py | 6 +- pyproject.toml | 38 +++++++++ setup.py | 85 +++++++++++++------- tests.py | 2 +- tests/test_chunkmat2d.py | 2 +- tests/test_occupancy.py | 4 +- 40 files changed, 388 insertions(+), 299 deletions(-) create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index e8503d2..37fd0e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ #Compiled python modules *.pyc *.pyxbldc +*.c +*.so #Setuptools distribution folder /dist/ diff --git a/bin/nucleoatac b/bin/nucleoatac index b0e08df..c440b68 100644 --- a/bin/nucleoatac +++ b/bin/nucleoatac @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Script to run nucleoatac. Various functions for calling nucleosomes using ATAC-Seq data. @@ -24,14 +24,14 @@ from nucleoatac.cli import nucleoatac_parser, nucleoatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "nucleoatac version " + __version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("nucleoatac version " + __version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = nucleoatac_parser() args = parser.parse_args() nucleoatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted nucleoatac.") sys.exit(0) diff --git a/bin/pyatac b/bin/pyatac index cc3dbb4..5d776ef 100644 --- a/bin/pyatac +++ b/bin/pyatac @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Script to run pyatac command line utilies. Various functions designed for working with paired-end ATAC-Seq data. @@ -23,14 +23,14 @@ from pyatac.cli import pyatac_parser, pyatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "pyatac version "+__version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("pyatac version "+__version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = pyatac_parser() args = parser.parse_args() pyatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted pyatac.") sys.exit(0) diff --git a/nucleoatac/Magic.py b/nucleoatac/Magic.py index 4090299..be089d1 100644 --- a/nucleoatac/Magic.py +++ b/nucleoatac/Magic.py @@ -1,3 +1,3 @@ -from pkg_resources import resource_filename +from importlib.resources import files -default_vplot = resource_filename('nucleoatac.vplot','standard_vplot.VMat') \ No newline at end of file +default_vplot = files("nucleoatac.vplot").joinpath("standard_vplot.VMat") \ No newline at end of file diff --git a/nucleoatac/NFRCalling.py b/nucleoatac/NFRCalling.py index d41fd4c..a0be397 100644 --- a/nucleoatac/NFRCalling.py +++ b/nucleoatac/NFRCalling.py @@ -89,7 +89,7 @@ def findNFRs(self): if self.chrom in tbx.contigs: for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()): nucs.append(int(row[1])) - for j in xrange(1,len(nucs)): + for j in range(1,len(nucs)): left = nucs[j-1] + 73 right = nucs[j] - 72 if right <= left: @@ -106,7 +106,7 @@ def process(self, params): self.findNFRs() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index d146d73..081761c 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -61,7 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov): self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] - self.vals = self.vals * self.nuc_cov/ self.cov.vals + self.vals = self.vals * self.nuc_cov// self.cov.vals @@ -79,7 +79,7 @@ def simulateReads(self): sim_mat = np.reshape(sim_vect, self.vmat.mat.shape) return sim_mat def simulateDist(self, numiters = 1000): - self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters)) + self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)] def analStd(self): flatv = np.ravel(self.vmat.mat) var = calculateCov(self.probs, flatv, self.reads) @@ -91,9 +91,9 @@ def analMean(self): def norm(x, v, w, mean): """compute values of normal pdf with given mean and sd at values in x""" - norm = (1.0/(np.sqrt(2*np.pi*v)) * - np.exp(-(x - mean)**2/(2*v))) - norm = norm * (w/max(norm)) + norm = (1.0//(np.sqrt(2*np.pi*v)) * + np.exp(-(x - mean)**2//(2*v))) + norm = norm * (w//max(norm)) return norm class Nucleosome(Chunk): @@ -124,7 +124,7 @@ def getZScore(self, nuctrack): s = SignalDistribution(self.start, nuctrack.params.vmat, nuctrack.bias_mat, self.nuc_cov) std = s.analStd() - self.z = self.norm_signal / std + self.z = self.norm_signal // std def getOcc(self, nuctrack): try: self.occ = nuctrack.occ.get(pos = self.start) @@ -139,7 +139,7 @@ def addNorms(x,params): """Add several normal distributions together""" l = len(x) fit = np.zeros(l) - i = len(params)/3 + i = len(params)//3 for j in range(i): fit += norm(x,params[j*3],params[3*j+1],params[3*j+2]) return fit @@ -149,6 +149,7 @@ def err_func(pars,y): return sum((addNorms(x, pars) - y)**2) def fitNorm(guess, bound, sig): """Fit a normal to the signal with lower and upperbounds to sd""" + print((guess, bound, sig)) a = (sig,) res = optimize.minimize(err_func,guess,args = a, bounds=bound,method="L-BFGS-B") return res @@ -156,21 +157,21 @@ def fitNorm(guess, bound, sig): allnucs = nuctrack.sorted_nuc_keys x = bisect_left(allnucs,index) if x == 0: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep: left = allnucs[x-1] means = (index - allnucs[x-1],0) else: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) if x == len(allnucs)-1: - right = index + nuctrack.params.nonredundant_sep/3 + 1 + right = index + nuctrack.params.nonredundant_sep//3 + 1 elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep: right = allnucs[x+1] means += (allnucs[x+1] - left,) else: - right = index + nuctrack.params.nonredundant_sep/3 +1 + right = index + nuctrack.params.nonredundant_sep//3 +1 sig = nuctrack.smoothed.vals[left:right] sig[sig<0] = 0 if len(means)==1: @@ -237,14 +238,14 @@ def __init__(self, chunk): def initialize(self, parameters): self.params = parameters def getFragmentMat(self): - self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1), - self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac) + self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1), + self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) - bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, - self.end + self.params.window + self.params.upper/2 + 1, log = True) + bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2, + self.end + self.params.window + self.params.upper//2 + 1, log = True) if self.params.fasta is not None: bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) @@ -298,7 +299,7 @@ def findAllNucs(self): #find peaks in normalized sigal cands1 = call_peaks(combined, min_signal = 0, sep = self.params.redundant_sep, - boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2) + boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2) for i in cands1: nuc = Nucleosome(i + self.start, self) if nuc.nuc_cov > self.params.min_reads: @@ -310,7 +311,7 @@ def findAllNucs(self): self.nuc_collection[i] = nuc self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys())) self.nonredundant = reduce_peaks( self.sorted_nuc_keys, - map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys), + [self.nuc_collection[x].z for x in self.sorted_nuc_keys], self.params.nonredundant_sep) self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant) def fit(self): @@ -340,7 +341,7 @@ def process(self, params): self.makeInsertionTrack() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/Occupancy.py b/nucleoatac/Occupancy.py index 9e9cd59..4839502 100644 --- a/nucleoatac/Occupancy.py +++ b/nucleoatac/Occupancy.py @@ -28,7 +28,9 @@ def getFragmentSizes(self, bamfile, chunklist = None): self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist) def modelNFR(self, boundaries = (35,115)): """Model NFR distribution with gamma distribution""" + print(boundaries) b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower + print(b) boundaries = (min(boundaries[0],b), boundaries[1]) x = np.arange(boundaries[0],boundaries[1]) y = self.fragmentsizes.get(boundaries[0],boundaries[1]) @@ -42,9 +44,9 @@ def gamma_fit(X,o,p): nz = x_mod >= 0 else: nz = x_mod > 0 - res[nz] = a * x_mod[nz]**(k-1) * np.exp(-x_mod[nz]/theta) / (theta **k * gamma(k)) + res[nz] = a * x_mod[nz]**(k-1) * np.exp(-x_mod[nz]//theta) // (theta **k * gamma(k)) return res - res_score = np.ones(boundaries[0]+1)*np.float('inf') + res_score = np.ones(boundaries[0]+1)*np.float64('inf') res_param = [0 for i in range(boundaries[0]+1)] pranges = ((0.01,10),(0.01,150),(0.01,1)) for i in range(15,boundaries[0]+1): @@ -67,11 +69,11 @@ def gamma_fit(X,o,p): def plotFits(self,filename=None): """plot the Fits""" fig = plt.figure() - plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(), + plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(), label = "Observed") - plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit") - plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model") - plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit") + plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model") plt.legend() plt.xlabel("Fragment size") plt.ylabel("Relative Frequency") @@ -109,8 +111,8 @@ def calculateOccupancy(inserts, bias, params): nuc_probs = nuc_probs / np.sum(nuc_probs) nfr_probs = params.nfr_probs * bias nfr_probs = nfr_probs / np.sum(nfr_probs) - x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas) - logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l))) + x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas] + logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)]) logliks[np.isnan(logliks)] = -float('inf') occ = params.alphas[np.argmax(logliks)] #Compute upper and lower bounds for 95% confidence interval @@ -129,11 +131,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params): """Calculate Occupancy track""" offset=self.start - mat.start if offset=0 and col=0: self.mat[row, col] += 1 else: @@ -139,16 +139,16 @@ def __init__(self, chrom, start, end, lower, upper): self.mat = np.ones(self.mat.shape) def makeBiasMat(self, bias_track): """Make 2D matrix representing sequence bias preferences""" - offset = self.upper/2 + offset = self.upper//2 bias = bias_track.get(self.start-offset,self.end+offset) if not bias_track.log: nonzero = np.where(bias !=0)[0] bias = np.log(bias + min(bias[nonzero])) pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower,mid+(i-1)//2]=1 + pattern[i-self.lower,mid-(i//2)]=1 for i in range(self.upper-self.lower): self.mat[i]=np.exp(np.convolve(bias,pattern[i,:],mode='valid')) def normByInsertDist(self, insertsizes): diff --git a/pyatac/fragments.pyx b/pyatac/fragments.pyx index a463206..532753b 100644 --- a/pyatac/fragments.pyx +++ b/pyatac/fragments.pyx @@ -1,146 +1,162 @@ -#### Import needed modules ##### +# cython: language_level=3 + import pyatac.seq as seq import numpy as np cimport numpy as np cimport cython from pysam.libcalignmentfile cimport AlignmentFile, AlignedSegment -from pysam.libcfaidx cimport FastaFile - -DTYPE = np.float -ctypedef np.float_t DTYPE_t +DTYPE = np.float64 +ctypedef np.float64_t DTYPE_t -#### Import needed modules ##### -#import pysam @cython.boundscheck(False) -def makeFragmentMat(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): +def makeFragmentMat(str bamfile, str chrom, int start, int end, + int lower, int upper, int atac=1): + cdef int nrow = upper - lower cdef int ncol = end - start - cdef np.ndarray[DTYPE_t, ndim=2] mat = np.zeros( (nrow, ncol), dtype = DTYPE) + cdef np.ndarray[DTYPE_t, ndim=2] mat = np.zeros((nrow, ncol), dtype=DTYPE) + cdef AlignmentFile bamHandle = AlignmentFile(bamfile) cdef AlignedSegment read cdef int l_pos, ilen, row, col + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): if read.is_proper_pair and not read.is_reverse: if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion + l_pos = read.reference_start + 4 ilen = abs(read.template_length) - 8 else: - l_pos = read.pos + l_pos = read.reference_start ilen = abs(read.template_length) + row = ilen - lower - col = (ilen-1)/2 + l_pos - start - if col >= 0 and col < ncol and row < nrow and row >= 0: + col = (ilen - 1) // 2 + l_pos - start + + if 0 <= col < ncol and 0 <= row < nrow: mat[row, col] += 1 + bamHandle.close() return mat + @cython.boundscheck(False) -def getInsertions(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): +def getInsertions(str bamfile, str chrom, int start, int end, + int lower, int upper, int atac=1): + cdef int npos = end - start - cdef np.ndarray[DTYPE_t, ndim=1] mat = np.zeros(npos, dtype = DTYPE) + cdef np.ndarray[DTYPE_t, ndim=1] mat = np.zeros(npos, dtype=DTYPE) + cdef AlignmentFile bamHandle = AlignmentFile(bamfile) cdef AlignedSegment read cdef int l_pos, ilen, r_pos + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): if read.is_proper_pair and not read.is_reverse: if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion + l_pos = read.reference_start + 4 ilen = abs(read.template_length) - 8 else: - l_pos = read.pos + l_pos = read.reference_start ilen = abs(read.template_length) + r_pos = l_pos + ilen - 1 - if ilen >= lower and ilen < upper: - if l_pos >= start and l_pos < end: + + if lower <= ilen < upper: + if start <= l_pos < end: mat[l_pos - start] += 1 - if r_pos >= start and r_pos < end: + if start <= r_pos < end: mat[r_pos - start] += 1 + bamHandle.close() return mat @cython.boundscheck(False) -def getStrandedInsertions(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): +def getStrandedInsertions(str bamfile, str chrom, int start, int end, + int lower, int upper, int atac=1): + cdef int npos = end - start - cdef np.ndarray[DTYPE_t, ndim=1] matplus = np.zeros(npos, dtype = DTYPE) - cdef np.ndarray[DTYPE_t, ndim=1] matminus = np.zeros(npos, dtype = DTYPE) + cdef np.ndarray[DTYPE_t, ndim=1] matplus = np.zeros(npos, dtype=DTYPE) + cdef np.ndarray[DTYPE_t, ndim=1] matminus = np.zeros(npos, dtype=DTYPE) + cdef AlignmentFile bamHandle = AlignmentFile(bamfile) cdef AlignedSegment read cdef int l_pos, ilen, r_pos + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): if read.is_proper_pair and not read.is_reverse: if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion + l_pos = read.reference_start + 4 ilen = abs(read.template_length) - 8 else: - l_pos = read.pos + l_pos = read.reference_start ilen = abs(read.template_length) + r_pos = l_pos + ilen - 1 - if ilen >= lower and ilen < upper: - if l_pos >= start and l_pos < end: + + if lower <= ilen < upper: + if start <= l_pos < end: matplus[l_pos - start] += 1 - if r_pos >= start and r_pos < end: + if start <= r_pos < end: matminus[r_pos - start] += 1 + bamHandle.close() return (matplus, matminus) - @cython.boundscheck(False) -def getAllFragmentSizes(str bamfile, int lower, int upper, int atac = 1): - cdef np.ndarray[DTYPE_t, ndim =1] sizes = np.zeros(upper - lower, dtype= np.float) - # loop over samfile +def getAllFragmentSizes(str bamfile, int lower, int upper, int atac=1): + + cdef np.ndarray[DTYPE_t, ndim=1] sizes = np.zeros(upper - lower, dtype=DTYPE) + cdef AlignmentFile bamHandle = AlignmentFile(bamfile) cdef AlignedSegment read cdef int ilen + for read in bamHandle: - if read.is_proper_pair and not read.is_reverse: + if read.is_proper_pair and not read.is_reverse: if atac: - - #get insert size - #correct by 8 base pairs to be inserion to insertion ilen = abs(read.template_length) - 8 else: ilen = abs(read.template_length) - if ilen < upper and ilen >= lower: - sizes[ilen - lower]+=1 + + if lower <= ilen < upper: + sizes[ilen - lower] += 1 + bamHandle.close() return sizes @cython.boundscheck(False) -def getFragmentSizesFromChunkList(chunks, str bamfile, int lower, int upper, int atac = 1): - cdef np.ndarray[DTYPE_t, ndim =1] sizes = np.zeros(upper - lower, dtype= np.float) - # loop over samfile +def getFragmentSizesFromChunkList(chunks, str bamfile, + int lower, int upper, int atac=1): + + cdef np.ndarray[DTYPE_t, ndim=1] sizes = np.zeros(upper - lower, dtype=DTYPE) + cdef AlignmentFile bamHandle = AlignmentFile(bamfile) cdef AlignedSegment read cdef int ilen, l_pos, center + for chunk in chunks: - for read in bamHandle.fetch(chunk.chrom, max(0, chunk.start - upper), chunk.end + upper): + for read in bamHandle.fetch(chunk.chrom, + max(0, chunk.start - upper), + chunk.end + upper): + if read.is_proper_pair and not read.is_reverse: if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion + l_pos = read.reference_start + 4 ilen = abs(read.template_length) - 8 else: - l_pos = read.pos + l_pos = read.reference_start ilen = abs(read.template_length) - center = l_pos + (ilen-1)/2 - if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: - sizes[ilen - lower]+=1 + + center = l_pos + (ilen - 1) // 2 + + if (lower <= ilen < upper and + chunk.start <= center < chunk.end): + sizes[ilen - lower] += 1 + bamHandle.close() return sizes - diff --git a/pyatac/fragmentsizes.py b/pyatac/fragmentsizes.py index 4fd6ba0..10af79c 100644 --- a/pyatac/fragmentsizes.py +++ b/pyatac/fragmentsizes.py @@ -71,7 +71,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'sizes': - fragmentsizes = np.array(map(float,line.rstrip("\n").split("\t"))) + fragmentsizes = np.array(list(map(float,line.rstrip("\n").split("\t")))) try: new = FragmentSizes(lower, upper, vals = fragmentsizes) except NameError: diff --git a/pyatac/get_counts.py b/pyatac/get_counts.py index 978b144..fe1fecc 100644 --- a/pyatac/get_counts.py +++ b/pyatac/get_counts.py @@ -32,12 +32,12 @@ def get_counts(args): if read.is_proper_pair and not read.is_reverse: if args.atac: #get left position - l_pos = read.pos + 4 + l_pos = read.reference_start + 4 #get insert size #correct by 8 base pairs to be inserion to insertion ilen = abs(read.template_length) - 8 else: - l_pos = read.pos + l_pos = read.reference_start ilen = abs(read.template_length) r_pos = l_pos + ilen - 1 if _between(ilen, args.lower, args.upper) and (_between(l_pos, chunk.start, chunk.end) or _between(r_pos, chunk.start, chunk.end)): diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 29a6fe2..4a5ddf9 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -22,7 +22,7 @@ def _covHelper(arg): """Computes coverage track for a particular set of bed regions""" (chunk, args) = arg try: - offset = args.window / 2 + offset = args.window // 2 mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) @@ -42,7 +42,7 @@ def _writeCov(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing coverage track\n') traceback.print_exc() print() @@ -62,12 +62,12 @@ def get_cov(args, bases = 50000, splitsize = 1000): if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() sets = chunks.split(bases = bases) - maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) + maxQueueSize = max(2,int(2 * bases // np.mean([chunk.length() for chunk in chunks]))) pool1 = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.cov.bedgraph','w') out_handle.close() @@ -75,7 +75,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): write_process = mp.Process(target = _writeCov, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_covHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_ins.py b/pyatac/get_ins.py index 8d28e96..0b17501 100644 --- a/pyatac/get_ins.py +++ b/pyatac/get_ins.py @@ -21,12 +21,12 @@ def _insHelperSmooth(arg): """Computes smoothed insertion track for a particular set of bed regions""" (chunk, args) = arg try: - offset = args.smooth / 2 + offset = args.smooth // 2 ins = InsertionTrack(chunk.chrom, chunk.start - offset, chunk.end + offset) ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) ins.smooth_track(args.smooth, window = "gaussian", mode= 'valid') except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -39,7 +39,7 @@ def _insHelper(arg): ins = InsertionTrack(chunk.chrom, chunk.start, chunk.end) ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -51,7 +51,7 @@ def _writeIns(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -71,12 +71,12 @@ def get_ins(args, bases = 50000, splitsize = 1000): if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() sets = chunks.split(bases = bases) - maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) + maxQueueSize = max(2,int(2 * bases // np.mean([chunk.length() for chunk in chunks]))) pool1 = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.ins.bedgraph','w') out_handle.close() @@ -85,9 +85,9 @@ def get_ins(args, bases = 50000, splitsize = 1000): write_process.start() for j in sets: if args.smooth: - tmp = pool1.map(_insHelperSmooth, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelperSmooth, list(zip(j,itertools.repeat(args)))) else: - tmp = pool1.map(_insHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index 2419523..112ef8c 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -31,7 +31,7 @@ def _nucleotideHelper(arg): mat += submat n += 1 except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+'\n') + print(('Caught exception when processing:\n'+ chunk.asBed()+'\n')) traceback.print_exc() print() raise e @@ -66,7 +66,7 @@ def get_nucleotide(args): params = _NucleotideParameters(args.up, args.down, args.fasta, args.dinucleotide) sets = chunks.split(bases = 10000) pool = Pool(processes = args.cores) - tmp = pool.map(_nucleotideHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_nucleotideHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = np.zeros(params.matsize) @@ -74,7 +74,7 @@ def get_nucleotide(args): for i in tmp: result += i[0] n += i[1] - result = result / n + result = result // n if args.norm: normfreqs = seq.getNucFreqs(params.fasta, params.nucleotides) result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index bdfc534..9a36582 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -34,7 +34,7 @@ def _pwmHelper(arg): mat += ins.getStrandedInsertionSequences(params.fasta, params.nucleotides, up = params.up, down = params.down) n += sum(ins.vals) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -62,14 +62,14 @@ def get_pwm(args, bases = 50000, splitsize = 1000): chrs = read_chrom_sizes_from_fasta(args.fasta) if args.bed is None: chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank) sets = chunks.split(bases = bases) params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta, lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym) pool = Pool(processes = args.cores) - tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_pwmHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() n = 0.0 @@ -77,7 +77,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): for i in tmp: result += i[0] n += i[1] - result /= n + result //= n if args.bed: normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta, params.nucleotides) else: @@ -88,7 +88,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): left = result[:,0:(args.flank + 1)] right = result[:,args.flank:] rightflipped = np.fliplr(np.flipud(right)) - combined = (left + rightflipped) / 2 + combined = (left + rightflipped) // 2 result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank])))) #save pwm = PWM(result, args.flank, args.flank, params.nucleotides) diff --git a/pyatac/get_sizes.py b/pyatac/get_sizes.py index cf66f54..aac0905 100644 --- a/pyatac/get_sizes.py +++ b/pyatac/get_sizes.py @@ -30,7 +30,7 @@ def get_sizes(args): if not args.no_plot: #make figure fig = plt.figure() - plt.plot(range(sizes.lower,sizes.upper),sizes.get(sizes.lower,sizes.upper),label = args.out) + plt.plot(list(range(sizes.lower,sizes.upper)),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") fig.savefig(args.out+'.fragmentsizes.eps') diff --git a/pyatac/make_bias_track.py b/pyatac/make_bias_track.py index 94e6640..2b6cd01 100644 --- a/pyatac/make_bias_track.py +++ b/pyatac/make_bias_track.py @@ -23,7 +23,7 @@ def _biasHelper(arg): bias = InsertionBiasTrack(chunk.chrom, chunk.start, chunk.end) bias.computeBias(params.fasta, params.chrs, params.pwm) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -36,7 +36,7 @@ def _writeBias(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -63,13 +63,13 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): params = _BiasParams(args.fasta, args.pwm) if args.bed is None: chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed) - chunks.checkChroms(params.chrs.keys()) + chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases = bases) - maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) + maxQueueSize = max(2,int(2 * bases // np.mean([chunk.length() for chunk in chunks]))) pool = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.Scores.bedgraph','w') out_handle.close() @@ -77,7 +77,7 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): write_process = mp.Process(target = _writeBias, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params))) + tmp = pool.map(_biasHelper, list(zip(j,itertools.repeat(params)))) for track in tmp: write_queue.put(track) pool.close() diff --git a/pyatac/make_bias_vplot.py b/pyatac/make_bias_vplot.py index ce80274..5847030 100644 --- a/pyatac/make_bias_vplot.py +++ b/pyatac/make_bias_vplot.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Script to make V-plot @@ -31,8 +31,8 @@ def _biasVplotHelper(arg): for chunk in chunks: try: chunk.center() - biastrack = InsertionBiasTrack(chunk.chrom, chunk.start - params.flank - 1 - (params.upper/2), - chunk.end + params.flank + params.upper/2+1) + biastrack = InsertionBiasTrack(chunk.chrom, chunk.start - params.flank - 1 - (params.upper//2), + chunk.end + params.flank + params.upper//2+1) if params.bg is not None: biastrack.read_track(params.bg, empty = 0) else: @@ -44,11 +44,11 @@ def _biasVplotHelper(arg): add = biasmat.get(start = chunk.start - params.flank, end = chunk.end + params.flank, flip = (chunk.strand == "-")) if params.scale: - mat += add/np.sum(add) + mat += add//np.sum(add) else: mat += add except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() raise e @@ -80,7 +80,7 @@ def make_bias_vplot(args): sizes = args.sizes, scale = args.scale, pwm = args.pwm, fasta = args.fasta) pool = Pool(processes = args.cores) - tmp = pool.map(_biasVplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_biasVplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index edb57a2..e6e1bc2 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -13,7 +13,7 @@ import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.chunk import ChunkList from pyatac.chunkmat2d import FragmentMat2D -import VMat as V +from . import VMat as V from multiprocessing import Pool import itertools import traceback @@ -32,10 +32,10 @@ def _vplotHelper(arg): submat.makeFragmentMat(params.bam) add = submat.get(start = chunk.start- params.flank, end = chunk.end + params.flank, flip = (chunk.strand =="-")) if params.scale: - add = add/np.sum(add) + add = add//np.sum(add) result += add except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -67,7 +67,7 @@ def make_vplot(args): params = _VplotParams(flank = args.flank, lower = args.lower, upper = args.upper, bam = args.bam, atac = args.atac, scale = args.scale) pool = Pool(processes = args.cores) - tmp = pool.map(_vplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_vplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) diff --git a/pyatac/seq.py b/pyatac/seq.py index df825ed..9d1cd36 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -4,7 +4,6 @@ @author: Alicia Schep, Greenleaf Lab, Stanford University """ -import string import numpy as np import pysam @@ -22,7 +21,7 @@ def get_sequence(chunk, fastafile): return sequence.upper() -DNA_Translation = string.maketrans('ACGT', 'TGCA') +DNA_Translation = str.maketrans('ACGT', 'TGCA') def complement(sequence): """Get complement of DNA sequenceuence""" @@ -41,7 +40,7 @@ def seq_to_mat(sequence, nucleotides): raise Exception("Usage Error! Nucleotides must all be of same length! No mixing single nucleotides with dinucleotides, etc") mat = np.zeros((len(nucleotides),len(sequence)-l+1)) for i in range(len(nucleotides)): - mat[i] = np.array(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)])) + mat[i] = np.array(list(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)]))) return(mat) def getNucFreqs(fasta, nucleotides): @@ -55,7 +54,7 @@ def getNucFreqs(fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) f.close() - return out/n + return out//n def getNucFreqsFromChunkList(chunks, fasta, nucleotides): @@ -69,5 +68,5 @@ def getNucFreqsFromChunkList(chunks, fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) handle.close() - return out/n + return out//n diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 67a03bf..7eead19 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -54,7 +54,7 @@ def _signalHelper(arg): if params.scale: tmp = sig tmp[np.isnan(tmp)]=0 - sig = sig / (np.sum(abs(sig))+ (np.sum(abs(sig))==0)) + sig = sig // (np.sum(abs(sig))+ (np.sum(abs(sig))==0)) if params.all: mat[counter] = sig counter += 1 @@ -62,7 +62,7 @@ def _signalHelper(arg): sig[np.isnan(sig)]=0 agg += sig except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() bg.close() @@ -96,7 +96,7 @@ def get_signal(args): args.scale, args.positive, args.all) sets = chunks.split(items = min(args.cores*20,len(chunks))) pool = Pool(processes = args.cores) - tmp = pool.map(_signalHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_signalHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() if args.all: @@ -108,9 +108,9 @@ def get_signal(args): result = sum(tmp) if not args.no_agg: if args.norm: - result = result / len(chunks) + result = result // len(chunks) fig = plt.figure() - plt.plot(range(-args.up,args.down+1),result) + plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") plt.ylabel("Signal Intensity") fig.savefig(args.out+'.agg.track.eps') diff --git a/pyatac/tracks.py b/pyatac/tracks.py index d1ac322..0a17ac2 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -10,7 +10,7 @@ from pyatac.chunk import Chunk from pyatac.utils import smooth import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import getInsertions, getStrandedInsertions +from pyatac.fragments import getInsertions, getStrandedInsertions from pyatac.seq import get_sequence, seq_to_mat, complement class Track(Chunk): @@ -47,7 +47,7 @@ def write_track(self, handle, start = None, end = None, vals = None, write_zero if vals is None: vals=self.vals if len(vals)!=self.end-self.start: - print len(vals),self.end-self.start + print(len(vals),self.end-self.start) raise Exception("Error! Inconsistency between length of \ values and start/end values") prev_value = None @@ -88,14 +88,14 @@ def read_track(self, bedgraph, start = None, end = None, empty = np.nan, flank = def log(self, pseudo = 1): """Log values. Add psuedo count so values don't equal 0 before logging""" if self.log: - print "Logging a track that is already log..." + print("Logging a track that is already log...") adjusted = self.vals + pseudo self.vals = np.log(adjusted) self.log = True def exp(self): """Take exponent of values""" if not self.log: - print "taking exponent of a non-logged track..." + print("taking exponent of a non-logged track...") self.vals = np.exp(self.vals) self.log = False def smooth_track(self, window_len, window='flat', sd = None, @@ -105,8 +105,8 @@ def smooth_track(self, window_len, window='flat', sd = None, self.vals = smooth(self.vals, window_len, window = window, sd = sd, mode = mode, norm = norm) if mode == 'valid': - self.start = self.start + window_len/2 - self.end = self.end - window_len/2 + self.start = self.start + window_len//2 + self.end = self.end - window_len//2 def get(self, start = None, end = None, pos = None): """Obtain value of track at particular interval or position""" if pos: @@ -135,7 +135,7 @@ def plot(self, name = None, start = None, end = None, filename=None): name = self.name values = self.get(start,end) fig = plt.figure() - plt.plot(range(start,end),values) + plt.plot(list(range(start,end)),values) plt.xlabel(self.chrom) plt.ylabel(name) if filename: @@ -143,7 +143,7 @@ def plot(self, name = None, start = None, end = None, filename=None): plt.close(fig) #Also save text output! filename2 = ".".join(filename.split(".")[:-1]+['txt']) - out = np.vstack((range(start,end),values)) + out = np.vstack((list(range(start,end)),values)) np.savetxt(filename2,out,delimiter="\t") else: fig.show() @@ -208,7 +208,7 @@ def __init__(self, chrom, start, end): Track.__init__(self, chrom, start, end, "coverage") def calculateCoverage(self, mat, lower, upper, window_len): """Compute coverage of fragment centers using flat window""" - offset=self.start-mat.start-(window_len/2) + offset=self.start-mat.start-(window_len//2) if offset<0: raise Exception("Insufficient flanking region on \ mat to calculate coverage with desired window") @@ -222,7 +222,7 @@ def calculateCoverage(self, mat, lower, upper, window_len): mode='valid',norm=False) def calculateCoverageSmooth(self,mat,lower,upper,window_len,sd): """Compute coverage of fragment centers using gaussia window""" - offset=self.start-mat.start-(window_len/2) + offset=self.start-mat.start-(window_len//2) if offset<0: raise Exception("Insufficient flanking region on \ mat to calculate coverage with desired window") diff --git a/pyatac/utils.py b/pyatac/utils.py index d972f4d..2be9f28 100644 --- a/pyatac/utils.py +++ b/pyatac/utils.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ General tools for dealing with ATAC-Seq data using Python. @@ -37,7 +37,7 @@ def smooth(sig, window_len, window='flat', sd = None, mode = 'valid', if window=='gaussian' and sd is None: sd = (window_len-1)/6.0 if window=="gaussian": - w = signal.gaussian(window_len,sd) + w = signal.windows.gaussian(window_len,sd) if window=="flat": w = np.ones(window_len) sig_nonan = copy(sig) @@ -90,7 +90,7 @@ def call_peaks(sigvals, min_signal = 0, sep = 120, boundary = None, order =1): replace = min(sigvals[~np.isnan(sigvals)]) sigvals[np.isnan(sigvals)]=replace if boundary is None: - boundary = sep/2 + boundary = sep//2 random = np.random.RandomState(seed = 25) l = len(sigvals) peaks = signal.argrelmax(sigvals *(1+ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..680a3d8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[build-system] +requires = [ + "setuptools>=68", + "wheel", + "cython>=3.0", + "numpy>=1.26,<2.1" +] +build-backend = "setuptools.build_meta" + +[project] +name = "NucleoATAC" +version = "1.0.0" +description = "Nucleosome calling from ATAC-seq" +authors = [ + { name = "Alicia Schep", email = "aschep@stanford.edu" } +] +license = { text = "MIT" } +requires-python = ">=3.11" + +dependencies = [ + "numpy>=1.26", + "scipy>=1.11", + "pysam>=0.22", + "matplotlib>=3.8" +] + +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] + +[project.scripts] +nucleoatac = "nucleoatac.cli:main" +pyatac = "pyatac.cli:main" diff --git a/setup.py b/setup.py index e654055..2537fcb 100644 --- a/setup.py +++ b/setup.py @@ -1,36 +1,63 @@ -from setuptools import setup -from setuptools.command.test import test as TestCommand -import sys -import os +from setuptools import setup, Extension, find_packages +from Cython.Build import cythonize +import numpy as np -if float(sys.version[:3])<2.7 or float(sys.version[:3])>=2.8: - sys.stderr.write("CRITICAL: Python version must be 2.7!\n") - sys.exit(1) +extensions = [ + Extension( + "pyatac.fragments", + ["pyatac/fragments.pyx"], + include_dirs=[np.get_include()], + ), + Extension( + "nucleoatac.multinomial_cov", + ["nucleoatac/multinomial_cov.pyx"], + include_dirs=[np.get_include()], + ), +] +setup( + name="NucleoATAC", + version="1.0.0", + description="Nucleosome calling from ATAC-seq", + author="Alicia Schep", + author_email="aschep@stanford.edu", + url="https://github.com/GreenleafLab/NucleoATAC", + license="MIT", -class NoTestCommand(TestCommand): - def run(self): - print("NucleoATAC does not support running tests with " - "'python setup.py test'. Please run 'python tests.py'") + packages=find_packages(), + python_requires=">=3.11", + + install_requires=[ + "numpy>=1.26", + "scipy>=1.11", + "pysam>=0.22", + "matplotlib>=3.8" + ], + + ext_modules=cythonize( + extensions, + compiler_directives={"language_level": "3"}, + ), + + include_dirs=[np.get_include()], + + entry_points={ + "console_scripts": [ + "nucleoatac=nucleoatac.cli:main", + "pyatac=pyatac.cli:main", + ], + }, -setup(name='NucleoATAC', - version='0.3.4', - description='python package for calling nucleosomes with ATAC-Seq', - classifiers=[ - 'Development Status :: 3 - Alpha', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.7', - 'Topic :: Scientific/Engineering :: Bio-Informatics'], - keywords='ATAC-Seq sequencing bioinformatics', - url='https://github.com/GreenleafLab/NucleoATAC', - author='Alicia Schep', - author_email='aschep@stanford.edu', - license='MIT', - packages=['pyatac','pyatac.pwm','nucleoatac','nucleoatac.vplot'], - install_requires=['cython >= 0.22','numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], - scripts=['bin/pyatac','bin/nucleoatac'], include_package_data=True, zip_safe=False, - tests_require=['nose'], - cmdclass = {'test': NoTestCommand}) + + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Bio-Informatics", + ], +) diff --git a/tests.py b/tests.py index b020255..f0a3f6f 100644 --- a/tests.py +++ b/tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import matplotlib import nose diff --git a/tests/test_chunkmat2d.py b/tests/test_chunkmat2d.py index 757ff9a..3fac1af 100644 --- a/tests/test_chunkmat2d.py +++ b/tests/test_chunkmat2d.py @@ -41,7 +41,7 @@ def test_biasmat2(self): self.assertTrue(abs(correct - self.biasmat.mat[51,220]) < 0.01*correct) def test_normByInsertDist(self): """test that normalization by insert distribution works as expected""" - isizes = FragmentSizes(lower=100,upper=200, vals = np.array(range(100,200))) + isizes = FragmentSizes(lower=100,upper=200, vals = np.array(list(range(100,200)))) self.biasmat.normByInsertDist(isizes) a1 = self.biastrack.get(pos = self.biasmat.start -50) a2 = self.biastrack.get(pos = self.biasmat.start + 50) diff --git a/tests/test_occupancy.py b/tests/test_occupancy.py index f335f87..60de099 100644 --- a/tests/test_occupancy.py +++ b/tests/test_occupancy.py @@ -31,7 +31,7 @@ def test_occupancy_calc3(self): a = np.random.multinomial(10,self.fragment_dist.nfr_fit.get()) b = np.random.multinomial(30,self.fragment_dist.nuc_fit.get()) results[i],lower[i],upper[i] = calculateOccupancy(a+b,np.array([1,1,1]),self.params) - print np.mean(results) + print(np.mean(results)) self.assertTrue(abs(np.mean(results)-0.75)<0.1) self.assertTrue(sum(upper < 0.75) < 85) self.assertTrue(sum(lower > 0.75) < 85) @@ -51,7 +51,7 @@ def test_occupancy_calc4(self): a = np.random.multinomial(10,nfrprob) b = np.random.multinomial(30,nucprob) results[i],lower[i],upper[i] = calculateOccupancy(a+b,bias,self.params) - print np.mean(results) + print(np.mean(results)) self.assertTrue(abs(np.mean(results)-0.75)<0.1) self.assertTrue(sum(upper < 0.75) < 85) self.assertTrue(sum(lower > 0.75) < 85)