Spaces:
Sleeping
Sleeping
File size: 4,605 Bytes
2c8f0e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
class DNAseq:
def __init__(self, sequence):
self.sequence = sequence.lower()
def get_total_bases(self):
return len(self.sequence)
# Total length of sequence
def get_base_count(self, base):
base = base.lower()
return self.sequence.count(base)
# Total number of specified base. Can be A, T, G or C
def get_base_counts(self):
base_counts = {
'a': self.get_base_count('a'),
't': self.get_base_count('t'),
'g': self.get_base_count('g'),
'c': self.get_base_count('c'),
}
return base_counts
# Total number of each base within the sequence returned as a dictionary
def get_base_percentage(self, base):
total_bases = self.get_total_bases()
base_count = self.get_base_count(base)
base_percentage = (base_count / total_bases) * 100
return base_percentage
# Base content by percentage.Can be A, T, G or C
def get_base_percentages(self):
base_percentages = {
'a': self.get_base_percentage('a'),
't': self.get_base_percentage('t'),
'g': self.get_base_percentage('g'),
'c': self.get_base_percentage('c')
}
return base_percentages
# Base content percentage for each base returned as a dictionary
def get_gc_content(self):
total_bases = self.get_total_bases()
gc_count = self.sequence.count('g') + self.sequence.count('c')
gc_content = (gc_count / total_bases) * 100
return gc_content
# Guanine Cytosine (gc) content by percentage
def get_at_content(self):
total_bases = self.get_total_bases()
at_count = self.sequence.count('a') + self.sequence.count('t')
at_content = (at_count / total_bases) * 100
return at_content
# Adenine Thymine (at) content by percentage
def get_purine_content(self):
total_bases = self.get_total_bases()
ag_count = self.sequence.count('a') + self.sequence.count('g')
ag_content = (ag_count / total_bases) * 100
return ag_content
# Adenine Guanine (purine) content by percentage
def get_pyrimidine_content(self):
total_bases = self.get_total_bases()
ct_count = self.sequence.count('c') + self.sequence.count('t')
ct_content = (ct_count / total_bases) * 100
return ct_content
# Cytosine Thymine (pyrimidine) content by percentage
def get_base_at_position(self, position):
pos = position - 1
if 0 <= pos < len(self.sequence):
base_at_pos = self.sequence[pos]
return base_at_pos
else:
return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
# Returns the base at a specified position in the sequence
def get_base_at_positions(self, position_list):
if self.check_positions(position_list):
pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)}
return pos_dict
else:
return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence))
# Returns base for each position in list
def check_positions(self, position_list):
# Check if the positions are within the range of the sequence length
# Value = 0 -> position out of sequence range
# Value = 1 -> position within sequence range
checked = {}
for pos in position_list:
if pos <= 0 or pos > len(self.sequence):
checked[pos] = 0
else:
checked[pos] = 1
# Check if all values are equal to 1 / All positions in the list are within the range of the sequence length
all_values_equal_to_1 = all(value == 1 for value in checked.values())
if all_values_equal_to_1:
valid = True
else:
valid = False
return valid
def get_subsequence(self, start_position, end_position):
# Ensure the start and end positions are within the bounds of the sequence
if start_position > 0 and end_position <= len(self.sequence):
return self.sequence[start_position - 1:end_position]
else:
return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
# Returns the subsequence based on given positions
def subsequence_total_bases(self, start_position, end_position):
return len(self.get_subsequence(start_position, end_position))
|