class DNAseq: def __init__(self, sequence): self.sequence = sequence.lower() def get_total_bases(self): return len(self.sequence) # Total length of sequence def get_base_count(self, base): base = base.lower() return self.sequence.count(base) # Total number of specified base. Can be A, T, G or C def get_base_counts(self): base_counts = { 'a': self.get_base_count('a'), 't': self.get_base_count('t'), 'g': self.get_base_count('g'), 'c': self.get_base_count('c'), } return base_counts # Total number of each base within the sequence returned as a dictionary def get_base_percentage(self, base): total_bases = self.get_total_bases() base_count = self.get_base_count(base) base_percentage = (base_count / total_bases) * 100 return base_percentage # Base content by percentage.Can be A, T, G or C def get_base_percentages(self): base_percentages = { 'a': self.get_base_percentage('a'), 't': self.get_base_percentage('t'), 'g': self.get_base_percentage('g'), 'c': self.get_base_percentage('c') } return base_percentages # Base content percentage for each base returned as a dictionary def get_gc_content(self): total_bases = self.get_total_bases() gc_count = self.sequence.count('g') + self.sequence.count('c') gc_content = (gc_count / total_bases) * 100 return gc_content # Guanine Cytosine (gc) content by percentage def get_at_content(self): total_bases = self.get_total_bases() at_count = self.sequence.count('a') + self.sequence.count('t') at_content = (at_count / total_bases) * 100 return at_content # Adenine Thymine (at) content by percentage def get_purine_content(self): total_bases = self.get_total_bases() ag_count = self.sequence.count('a') + self.sequence.count('g') ag_content = (ag_count / total_bases) * 100 return ag_content # Adenine Guanine (purine) content by percentage def get_pyrimidine_content(self): total_bases = self.get_total_bases() ct_count = self.sequence.count('c') + self.sequence.count('t') ct_content = (ct_count / total_bases) * 100 return ct_content # Cytosine Thymine (pyrimidine) content by percentage def get_base_at_position(self, position): pos = position - 1 if 0 <= pos < len(self.sequence): base_at_pos = self.sequence[pos] return base_at_pos else: return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence)) # Returns the base at a specified position in the sequence def get_base_at_positions(self, position_list): if self.check_positions(position_list): pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)} return pos_dict else: return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence)) # Returns base for each position in list def check_positions(self, position_list): # Check if the positions are within the range of the sequence length # Value = 0 -> position out of sequence range # Value = 1 -> position within sequence range checked = {} for pos in position_list: if pos <= 0 or pos > len(self.sequence): checked[pos] = 0 else: checked[pos] = 1 # Check if all values are equal to 1 / All positions in the list are within the range of the sequence length all_values_equal_to_1 = all(value == 1 for value in checked.values()) if all_values_equal_to_1: valid = True else: valid = False return valid def get_subsequence(self, start_position, end_position): # Ensure the start and end positions are within the bounds of the sequence if start_position > 0 and end_position <= len(self.sequence): return self.sequence[start_position - 1:end_position] else: return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence)) # Returns the subsequence based on given positions def subsequence_total_bases(self, start_position, end_position): return len(self.get_subsequence(start_position, end_position))