Kevin Louis commited on
Commit
4b032bf
1 Parent(s): a388195

Upload DNAseq.py

Browse files
Files changed (1) hide show
  1. DNAseq.py +14 -82
DNAseq.py CHANGED
@@ -1,119 +1,51 @@
1
- class DNAseq:
2
- def __init__(self, sequence):
3
- self.sequence = sequence.lower()
4
 
5
- def get_total_bases(self):
6
- return len(self.sequence)
7
- # Total length of sequence
8
-
9
- def get_base_count(self, base):
10
- base = base.lower()
11
- return self.sequence.count(base)
12
- # Total number of specified base. Can be A, T, G or C
13
 
 
14
  def get_base_counts(self):
15
  base_counts = {
16
- 'a': self.get_base_count('a'),
17
- 't': self.get_base_count('t'),
18
- 'g': self.get_base_count('g'),
19
- 'c': self.get_base_count('c'),
20
  }
21
  return base_counts
22
  # Total number of each base within the sequence returned as a dictionary
23
 
24
- def get_base_percentage(self, base):
25
- total_bases = self.get_total_bases()
26
- base_count = self.get_base_count(base)
27
- base_percentage = (base_count / total_bases) * 100
28
- return base_percentage
29
- # Base content by percentage.Can be A, T, G or C
30
-
31
  def get_base_percentages(self):
32
  base_percentages = {
33
- 'a': self.get_base_percentage('a'),
34
- 't': self.get_base_percentage('t'),
35
- 'g': self.get_base_percentage('g'),
36
- 'c': self.get_base_percentage('c')
37
  }
38
  return base_percentages
39
  # Base content percentage for each base returned as a dictionary
40
 
41
  def get_gc_content(self):
42
- total_bases = self.get_total_bases()
43
  gc_count = self.sequence.count('g') + self.sequence.count('c')
44
  gc_content = (gc_count / total_bases) * 100
45
  return gc_content
46
  # Guanine Cytosine (gc) content by percentage
47
 
48
  def get_at_content(self):
49
- total_bases = self.get_total_bases()
50
  at_count = self.sequence.count('a') + self.sequence.count('t')
51
  at_content = (at_count / total_bases) * 100
52
  return at_content
53
  # Adenine Thymine (at) content by percentage
54
 
55
  def get_purine_content(self):
56
- total_bases = self.get_total_bases()
57
  ag_count = self.sequence.count('a') + self.sequence.count('g')
58
  ag_content = (ag_count / total_bases) * 100
59
  return ag_content
60
  # Adenine Guanine (purine) content by percentage
61
 
62
  def get_pyrimidine_content(self):
63
- total_bases = self.get_total_bases()
64
  ct_count = self.sequence.count('c') + self.sequence.count('t')
65
  ct_content = (ct_count / total_bases) * 100
66
  return ct_content
67
  # Cytosine Thymine (pyrimidine) content by percentage
68
-
69
- def get_base_at_position(self, position):
70
- pos = position - 1
71
-
72
- if 0 <= pos < len(self.sequence):
73
- base_at_pos = self.sequence[pos]
74
- return base_at_pos
75
- else:
76
- return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
77
- # Returns the base at a specified position in the sequence
78
-
79
- def get_base_at_positions(self, position_list):
80
- if self.check_positions(position_list):
81
- pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)}
82
- return pos_dict
83
- else:
84
- return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence))
85
- # Returns base for each position in list
86
-
87
- def check_positions(self, position_list):
88
- # Check if the positions are within the range of the sequence length
89
- # Value = 0 -> position out of sequence range
90
- # Value = 1 -> position within sequence range
91
-
92
- checked = {}
93
- for pos in position_list:
94
- if pos <= 0 or pos > len(self.sequence):
95
- checked[pos] = 0
96
- else:
97
- checked[pos] = 1
98
-
99
- # Check if all values are equal to 1 / All positions in the list are within the range of the sequence length
100
- all_values_equal_to_1 = all(value == 1 for value in checked.values())
101
-
102
- if all_values_equal_to_1:
103
- valid = True
104
- else:
105
- valid = False
106
-
107
- return valid
108
-
109
- def get_subsequence(self, start_position, end_position):
110
- # Ensure the start and end positions are within the bounds of the sequence
111
- if start_position > 0 and end_position <= len(self.sequence):
112
- return self.sequence[start_position - 1:end_position]
113
- else:
114
- return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
115
- # Returns the subsequence based on given positions
116
-
117
- def subsequence_total_bases(self, start_position, end_position):
118
- return len(self.get_subsequence(start_position, end_position))
119
-
 
1
+ from sequence import Sequence
 
 
2
 
 
 
 
 
 
 
 
 
3
 
4
+ class DNAseq(Sequence):
5
  def get_base_counts(self):
6
  base_counts = {
7
+ 'a': self.get_unit_count('a'),
8
+ 't': self.get_unit_count('t'),
9
+ 'g': self.get_unit_count('g'),
10
+ 'c': self.get_unit_count('c'),
11
  }
12
  return base_counts
13
  # Total number of each base within the sequence returned as a dictionary
14
 
 
 
 
 
 
 
 
15
  def get_base_percentages(self):
16
  base_percentages = {
17
+ 'a': self.get_unit_percentage('a'),
18
+ 't': self.get_unit_percentage('t'),
19
+ 'g': self.get_unit_percentage('g'),
20
+ 'c': self.get_unit_percentage('c'),
21
  }
22
  return base_percentages
23
  # Base content percentage for each base returned as a dictionary
24
 
25
  def get_gc_content(self):
26
+ total_bases = self.get_seq_length()
27
  gc_count = self.sequence.count('g') + self.sequence.count('c')
28
  gc_content = (gc_count / total_bases) * 100
29
  return gc_content
30
  # Guanine Cytosine (gc) content by percentage
31
 
32
  def get_at_content(self):
33
+ total_bases = self.get_seq_length()
34
  at_count = self.sequence.count('a') + self.sequence.count('t')
35
  at_content = (at_count / total_bases) * 100
36
  return at_content
37
  # Adenine Thymine (at) content by percentage
38
 
39
  def get_purine_content(self):
40
+ total_bases = self.get_seq_length()
41
  ag_count = self.sequence.count('a') + self.sequence.count('g')
42
  ag_content = (ag_count / total_bases) * 100
43
  return ag_content
44
  # Adenine Guanine (purine) content by percentage
45
 
46
  def get_pyrimidine_content(self):
47
+ total_bases = self.get_seq_length()
48
  ct_count = self.sequence.count('c') + self.sequence.count('t')
49
  ct_content = (ct_count / total_bases) * 100
50
  return ct_content
51
  # Cytosine Thymine (pyrimidine) content by percentage