File size: 4,605 Bytes
2c8f0e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
class DNAseq:
    def __init__(self, sequence):
        self.sequence = sequence.lower()

    def get_total_bases(self):
        return len(self.sequence)
        # Total length of sequence

    def get_base_count(self, base):
        base = base.lower()
        return self.sequence.count(base)
        # Total number of specified base. Can be A, T, G or C

    def get_base_counts(self):
        base_counts = {
            'a': self.get_base_count('a'),
            't': self.get_base_count('t'),
            'g': self.get_base_count('g'),
            'c': self.get_base_count('c'),
        }
        return base_counts
        # Total number of each base within the sequence returned as a dictionary

    def get_base_percentage(self, base):
        total_bases = self.get_total_bases()
        base_count = self.get_base_count(base)
        base_percentage = (base_count / total_bases) * 100
        return base_percentage
        # Base content by percentage.Can be A, T, G or C

    def get_base_percentages(self):
        base_percentages = {
            'a': self.get_base_percentage('a'),
            't': self.get_base_percentage('t'),
            'g': self.get_base_percentage('g'),
            'c': self.get_base_percentage('c')
        }
        return base_percentages
        # Base content percentage for each base returned as a dictionary

    def get_gc_content(self):
        total_bases = self.get_total_bases()
        gc_count = self.sequence.count('g') + self.sequence.count('c')
        gc_content = (gc_count / total_bases) * 100
        return gc_content
        # Guanine Cytosine (gc) content by percentage

    def get_at_content(self):
        total_bases = self.get_total_bases()
        at_count = self.sequence.count('a') + self.sequence.count('t')
        at_content = (at_count / total_bases) * 100
        return at_content
        # Adenine Thymine (at) content by percentage

    def get_purine_content(self):
        total_bases = self.get_total_bases()
        ag_count = self.sequence.count('a') + self.sequence.count('g')
        ag_content = (ag_count / total_bases) * 100
        return ag_content
        # Adenine Guanine (purine) content by percentage

    def get_pyrimidine_content(self):
        total_bases = self.get_total_bases()
        ct_count = self.sequence.count('c') + self.sequence.count('t')
        ct_content = (ct_count / total_bases) * 100
        return ct_content
        # Cytosine Thymine (pyrimidine) content by percentage

    def get_base_at_position(self, position):
        pos = position - 1

        if 0 <= pos < len(self.sequence):
            base_at_pos = self.sequence[pos]
            return base_at_pos
        else:
            return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
        # Returns the base at a specified position in the sequence

    def get_base_at_positions(self, position_list):
        if self.check_positions(position_list):
            pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)}
            return pos_dict
        else:
            return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence))
        # Returns base for each position in list

    def check_positions(self, position_list):
        # Check if the positions are within the range of the sequence length
        # Value = 0 -> position out of sequence range
        # Value = 1 -> position within sequence range

        checked = {}
        for pos in position_list:
            if pos <= 0 or pos > len(self.sequence):
                checked[pos] = 0
            else:
                checked[pos] = 1

        # Check if all values are equal to 1 / All positions in the list are within the range of the sequence length
        all_values_equal_to_1 = all(value == 1 for value in checked.values())

        if all_values_equal_to_1:
            valid = True
        else:
            valid = False

        return valid

    def get_subsequence(self, start_position, end_position):
        # Ensure the start and end positions are within the bounds of the sequence
        if start_position > 0 and end_position <= len(self.sequence):
            return self.sequence[start_position - 1:end_position]
        else:
            return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
        # Returns the subsequence based on given positions

    def subsequence_total_bases(self, start_position, end_position):
        return len(self.get_subsequence(start_position, end_position))