-
Notifications
You must be signed in to change notification settings - Fork 18
Turning in my MP1 #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,7 @@ | |
| """ | ||
| YOUR HEADER COMMENT HERE | ||
|
|
||
| @author: YOUR NAME HERE | ||
| @author: Harris Davidson | ||
|
|
||
| """ | ||
|
|
||
|
|
@@ -30,10 +30,17 @@ def get_complement(nucleotide): | |
| >>> get_complement('C') | ||
| 'G' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| if nucleotide == 'A': return 'T' | ||
| elif nucleotide == 'T': return 'A' | ||
| elif nucleotide == 'C': return 'G' | ||
| elif nucleotide == 'G': return 'C' | ||
|
|
||
|
|
||
| # print(get_complement('A')) | ||
| # print(get_complement('T')) | ||
| # print(get_complement('C')) | ||
| #print(get_complement('G')) | ||
|
|
||
| def get_reverse_complement(dna): | ||
| """ Computes the reverse complementary sequence of DNA for the specfied DNA | ||
| sequence | ||
|
|
@@ -45,9 +52,12 @@ def get_reverse_complement(dna): | |
| >>> get_reverse_complement("CCGCGTTCA") | ||
| 'TGAACGCGG' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| compliment = "" | ||
| for f in dna: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try using variable names that are more descriptive than a random letter - it helps with code readability. |
||
| compliment = compliment + get_complement(f) | ||
| return compliment[::-1] | ||
|
|
||
| #print(get_reverse_complement("ATGCCCGCTTT")) | ||
|
|
||
| def rest_of_ORF(dna): | ||
| """ Takes a DNA sequence that is assumed to begin with a start | ||
|
|
@@ -62,9 +72,41 @@ def rest_of_ORF(dna): | |
| >>> rest_of_ORF("ATGAGATAGG") | ||
| 'ATGAGA' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| i = 0 | ||
| while i < len(dna): | ||
| codon = dna[i:i+3] | ||
| if (codon == 'TAG') or (codon == 'TAA') or (codon == 'TGA'): | ||
| return dna[0:i] | ||
| i = i+3 | ||
| return dna | ||
|
|
||
| #print(rest_of_ORF("ATGAGATAGG")) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, delete commented out portions of code when submitting. |
||
| # pos = [] #indeices of stop codons | ||
| # b = 0 | ||
| # while b>-1: | ||
| # b = dna.find("TAG",b+1) | ||
| # pos = pos + [b] | ||
| # b=0 | ||
| # while b>-1: | ||
| # b = dna.find("TAA",b+1) | ||
| # pos = pos + [b] | ||
| # b=0 | ||
| # while b>-1: | ||
| # b = dna.find("TGA",b+1) | ||
| # pos = pos + [b] | ||
| # | ||
| # stops = [] | ||
| # | ||
| # for x in pos: | ||
| # if 0==x%3: | ||
| # stops.append(x) | ||
| # stops.sort | ||
| # if len(stops) == 0: | ||
| # return dna | ||
| # else: | ||
| # return dna[0:stops[-1]] | ||
|
|
||
| #print(rest_of_ORF('ATGAGATAGG')) | ||
|
|
||
| def find_all_ORFs_oneframe(dna): | ||
| """ Finds all non-nested open reading frames in the given DNA | ||
|
|
@@ -79,9 +121,23 @@ def find_all_ORFs_oneframe(dna): | |
| >>> find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC") | ||
| ['ATGCATGAATGTAGA', 'ATGTGCCC'] | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| all_ORFs = [] | ||
| #print(all_ORFs) | ||
|
|
||
| #for i in range(0,len(dna),3): | ||
| i = 0 | ||
| while i < len(dna)-3: | ||
| if dna[i:i+3] == 'ATG': | ||
| #print(rest_of_ORF(dna[i:])) | ||
| all_ORFs.append(rest_of_ORF(dna[i:])) | ||
| #print(all_ORFs[-1]) | ||
| #print(len(all_ORFs[-1])) | ||
| i = i + len(all_ORFs[-1]) | ||
| #print(i) | ||
| else: | ||
| i = i + 3 | ||
| return all_ORFs | ||
| #print(find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC")) | ||
|
|
||
| def find_all_ORFs(dna): | ||
| """ Finds all non-nested open reading frames in the given DNA sequence in | ||
|
|
@@ -96,9 +152,8 @@ def find_all_ORFs(dna): | |
| >>> find_all_ORFs("ATGCATGAATGTAG") | ||
| ['ATGCATGAATGTAG', 'ATGAATGTAG', 'ATG'] | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| return find_all_ORFs_oneframe(dna)+find_all_ORFs_oneframe(dna[1:])+find_all_ORFs_oneframe(dna[2:]) | ||
| #print(find_all_ORFs('ATGCATGAATGTAG')) | ||
|
|
||
| def find_all_ORFs_both_strands(dna): | ||
| """ Finds all non-nested open reading frames in the given DNA sequence on both | ||
|
|
@@ -109,18 +164,22 @@ def find_all_ORFs_both_strands(dna): | |
| >>> find_all_ORFs_both_strands("ATGCGAATGTAGCATCAAA") | ||
| ['ATGCGAATG', 'ATGCTACATTCGCAT'] | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| return find_all_ORFs(dna) + find_all_ORFs(get_reverse_complement(dna)) | ||
| #print(find_all_ORFs_both_strands("ATGCGAATGTAGCATCAAA")) | ||
|
|
||
| def longest_ORF(dna): | ||
| """ Finds the longest ORF on both strands of the specified DNA and returns it | ||
| as a string | ||
| >>> longest_ORF("ATGCGAATGTAGCATCAAA") | ||
| 'ATGCTACATTCGCAT' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| all_ORFs = find_all_ORFs_both_strands(dna) | ||
| length = [] | ||
| for f in all_ORFs: | ||
| length.append(len(f)) | ||
| longest = length.index(max(length)) | ||
| return all_ORFs[longest] | ||
| #print(all_ORFs[longest]) | ||
|
|
||
|
|
||
| def longest_ORF_noncoding(dna, num_trials): | ||
|
|
@@ -130,9 +189,15 @@ def longest_ORF_noncoding(dna, num_trials): | |
| dna: a DNA sequence | ||
| num_trials: the number of random shuffles | ||
| returns: the maximum length longest ORF """ | ||
| # TODO: implement this | ||
| pass | ||
| max_length = 0 | ||
| length = [] | ||
| for i in range(1,num_trials): | ||
| sdna = shuffle_string(dna) | ||
| if(max_length < len(longest_ORF(sdna))): | ||
| max_length = len(longest_ORF(sdna)) | ||
| return max_length | ||
|
|
||
| #print(longest_ORF_noncoding('ATGCGAATGTAGCATCAAA',3)) | ||
|
|
||
| def coding_strand_to_AA(dna): | ||
| """ Computes the Protein encoded by a sequence of DNA. This function | ||
|
|
@@ -148,8 +213,17 @@ def coding_strand_to_AA(dna): | |
| >>> coding_strand_to_AA("ATGCCCGCTTT") | ||
| 'MPA' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| acid_sequence = '' | ||
| for i in range(0,len(dna)-2,3): | ||
| #print(type(i)) | ||
| #codon = dna[i:i+3] | ||
|
|
||
| #print(codon) | ||
| acid_sequence = acid_sequence + aa_table[dna[i:i+3]] | ||
| #print(acid_sequence) | ||
| return acid_sequence | ||
|
|
||
| #print(coding_strand_to_AA("ATGCCCGCTTT")) | ||
|
|
||
|
|
||
| def gene_finder(dna): | ||
|
|
@@ -158,9 +232,36 @@ def gene_finder(dna): | |
| dna: a DNA sequence | ||
| returns: a list of all amino acid sequences coded by the sequence dna. | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| if __name__ == "__main__": | ||
| import doctest | ||
| doctest.testmod() | ||
| minlength = longest_ORF_noncoding(dna,1500) | ||
| #minlength = 6000 | ||
| print(minlength) | ||
| all_ORFs = find_all_ORFs_both_strands(dna) | ||
| #print(all_ORFs) | ||
| all_coding_ORFs=[] | ||
| for i in range(0,len(all_ORFs)-1): | ||
| if len(all_ORFs[i]) > minlength: | ||
| #print(all_ORFs[i]) | ||
| all_coding_ORFs.append(all_ORFs[i]) | ||
| #else: | ||
| #print('too short') | ||
| #print(all_coding_ORFs) | ||
| amino_sequences=[] | ||
| for i in range(0,len(all_coding_ORFs)): | ||
| #print(coding_strand_to_AA(all_coding_ORFs[i])) | ||
| #print(all_coding_ORFs[i]) | ||
| amino_sequences.append(coding_strand_to_AA(all_coding_ORFs[i])) | ||
|
|
||
| return amino_sequences | ||
|
|
||
| #print(gene_finder("ATGCCCGCTTT")) | ||
|
|
||
| from load import load_seq | ||
| dna = load_seq("./data/X73525.fa") | ||
|
|
||
| print(gene_finder(dna)) | ||
| #gene_finder(dna) | ||
| # | ||
| # if __name__ == "__main__": | ||
| # import doctest | ||
| # doctest.testmod(verbose=True) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remember to delete your intermediate print statements when submitting your code.