Kevin Louis commited on
Commit
2c8f0e3
1 Parent(s): 14066c2

app main files

Browse files
CTS_user_log.csv ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ user_query,ref_question,query_score,code_executed,ref_question_2,query_score_2,ref_question_3,query_score_3,similarity_metric,model_used_for_embeddings,lower_threshold,upper_threshold,date,time,response
2
+ How many bases are there in the sequence,How many bases are there in the sequence,1.06480165e-20,a2,How many bases does the sequence have,0.032355085,How many bases does the sequence have,0.032355085,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:21:36,930
3
+ there are cats and two dogs,How many bases are there,1.6690745,a2,What is the number of bases,1.6727788,What is the number of bases,1.6727788,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:25:06,Your Query Wasn't Understood. Can You Rephrase The Query
4
+ What is the base at position 78,What is the base at position/site 5 and 50,0.48746616,b1,What is the base at position/site 5,0.51927286,What is the base at position/site 5,0.51927286,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:31:04,c
5
+ What is the base at position 0,What is the base at position/site 5,0.5530416,b1,What are the bases from/between position 10 to/and 20?,0.6031008,What are the bases from/between position 10 to/and 20?,0.6031008,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:31:09,Position is out of range. Positions should be 1 - 930
6
+ What is the base at position 1000,What is the base at position/site 5 and 50,0.48386246,b1,"What is the base at position/site 5, 50, 515, 1568, 34578",0.49479663,"What is the base at position/site 5, 50, 515, 1568, 34578",0.49479663,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:31:38,Position is out of range. Positions should be 1 - 930
7
+ What are the bases from position 5 to 90,What are the bases from/between position 10 to/and 20?,0.24776845,b3,how many bases are there from positions 5 to/and 9,0.32962877,how many bases are there from positions 5 to/and 9,0.32962877,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:42:59,"ttgaggagaccattgacaccgtcattagcaatgcactacaactgtcacaacctaaa
8
+ ccgcagaaacaactcacggcccaatcgac"
9
+ How many are there from position 5 to 90,how many bases are there from positions 5 to/and 9,0.81035805,b2,how many bases are there between position 5 to/and 9,0.81407386,how many bases are there between position 5 to/and 9,0.81407386,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:43:26,86
10
+ what is the base at site 7 8 and 21,What is the base at position/site 34578,0.5133599,b1,What is the base at position/site 15,0.51463896,What is the base at position/site 15,0.51463896,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,10:48:34,g
11
+ What is the base at site 789,What is the base at position/site 1568,0.6266778,b1,What is the base at position/site 34578,0.6326653,What is the base at position/site 34578,0.6326653,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:09:15,a
12
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:10:19,930
13
+ How many bases are there,How many bases are there,7.908044e-21,a2,What is the number of bases,0.18966407,What is the number of bases,0.18966407,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:38:25,20
14
+ What is the base at site 5,What is the base at position/site 5,0.2645588,b1,What is the base at position/site 515,0.3784331,What is the base at position/site 515,0.3784331,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:46:46,t
15
+ What is the base at site 6,What is the base at position/site 5,0.49194798,b1,What is the base at position/site 515,0.53231597,What is the base at position/site 515,0.53231597,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:48:47,t
16
+ How many bases are in the sequence,How many bases are there in the sequence,0.0076738065,a2,How many bases does the sequence have,0.03213788,How many bases does the sequence have,0.03213788,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:49:22,15
17
+ Is there a pig on the plane,What is the amino acid count in the sequence,1.7117482,a1,What is the number of amino acids in the sequence,1.7167848,What is the number of amino acids in the sequence,1.7167848,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:49:54,Your Query Wasn't Understood. Can You Rephrase The Query
18
+ How many thymine,How many thymine t are there?,0.26310813,a6,What is the number of thymine t?,0.34814063,What is the number of thymine t?,0.34814063,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:50:27,5
19
+ how many guanines are there,How many guanines g are there?,0.30252665,a3,What is the number of guanines g?,0.41288453,What is the number of guanines g?,0.41288453,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,11:56:27,6
20
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:19:13,60
21
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:26:05,60
22
+ How many guanines bases are there in the sequence,How many guanines g does the sequence have?,0.3019901,a3,How many guanines g are there in the sequence?,0.3040974,How many guanines g are there in the sequence?,0.3040974,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:26:11,11
23
+ What is the base at position 10,What is the base at position/site 5,0.4366287,b1,What is the base at position/site 5 and 50,0.4367645,What is the base at position/site 5 and 50,0.4367645,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:26:20,g
24
+ What are the bases from position 2 to 10,What are the bases from/between position 10 to/and 20?,0.1379996,b3,how many bases are there from positions 5 to/and 9,0.28074524,how many bases are there from positions 5 to/and 9,0.28074524,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:26:29,gcattgagg
25
+ How many bases are there from position 2 to 10,how many bases are there between position 5 to/and 9,0.1829868,b2,how many bases are there from positions 5 to/and 9,0.18728873,how many bases are there from positions 5 to/and 9,0.18728873,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:26:40,9
26
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:27:21,60
27
+ What is the length,What is the length of the DNA ,0.86588484,a2,What is the length of the sequence,0.8859824,What is the length of the sequence,0.8859824,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:32:32,0
28
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:35:44,0
29
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:35:54,0
30
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:41:09,60
31
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:46:29,0
32
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:51:46,0
33
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:51:57,0
34
+ ,What is the quantiy of amino acids in the sequence,1.7896336,a1,What is the base at position/site 15,1.8019854,What is the base at position/site 15,1.8019854,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:52:06,Your Query Wasn't Understood. Can You Rephrase The Query
35
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:53:57,0
36
+ ,What is the quantiy of amino acids in the sequence,1.7896336,a1,What is the base at position/site 15,1.8019854,What is the base at position/site 15,1.8019854,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,12:54:09,Your Query Wasn't Understood. Can You Rephrase The Query
37
+ What is the length of the sequence,What is the length of the sequence,1.974797e-20,a1,How long is the sequence,0.1940434,How long is the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,13:43:27,60
38
+ What are the bases from position 2 to 10,What are the bases from/between position 10 to/and 20?,0.1379996,b3,how many bases are there from positions 5 to/and 9,0.28074524,how many bases are there from positions 5 to/and 9,0.28074524,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,13:43:53,gcattgagg
39
+ What is the base at position 10,What is the base at position/site 5,0.4366287,b1,What is the base at position/site 5 and 50,0.4367645,What is the base at position/site 5 and 50,0.4367645,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,14:11:51,g
40
+ How many bases are there from position 2 to 10,how many bases are there between position 5 to/and 9,0.1829868,b2,how many bases are there from positions 5 to/and 9,0.18728873,how many bases are there from positions 5 to/and 9,0.18728873,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,14:12:54,9
41
+ How long is the sequence ,How long is the sequence,2.4214691e-20,a1,What is the length of the sequence,0.1940434,What is the length of the sequence,0.1940434,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,14:13:48,60
42
+ Is there an S strand in the DNA sequence ,What is the length of the DNA sequence,0.8219202,a2,How long is the DNA sequence,0.84429514,How long is the DNA sequence,0.84429514,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,14:14:44,60
43
+ Do you like syrup waffles? ,How many adenine a are there in the sequence?,1.8768792,a4,How many adenine a does the sequence contain?,1.8772323,How many adenine a does the sequence contain?,1.8772323,k nearest neighbours,all-mpnet-base-v2,1.1,1.4,2023-09-19,14:16:35,Your Query Wasn't Understood. Can You Rephrase The Query
DNAseq.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class DNAseq:
2
+ def __init__(self, sequence):
3
+ self.sequence = sequence.lower()
4
+
5
+ def get_total_bases(self):
6
+ return len(self.sequence)
7
+ # Total length of sequence
8
+
9
+ def get_base_count(self, base):
10
+ base = base.lower()
11
+ return self.sequence.count(base)
12
+ # Total number of specified base. Can be A, T, G or C
13
+
14
+ def get_base_counts(self):
15
+ base_counts = {
16
+ 'a': self.get_base_count('a'),
17
+ 't': self.get_base_count('t'),
18
+ 'g': self.get_base_count('g'),
19
+ 'c': self.get_base_count('c'),
20
+ }
21
+ return base_counts
22
+ # Total number of each base within the sequence returned as a dictionary
23
+
24
+ def get_base_percentage(self, base):
25
+ total_bases = self.get_total_bases()
26
+ base_count = self.get_base_count(base)
27
+ base_percentage = (base_count / total_bases) * 100
28
+ return base_percentage
29
+ # Base content by percentage.Can be A, T, G or C
30
+
31
+ def get_base_percentages(self):
32
+ base_percentages = {
33
+ 'a': self.get_base_percentage('a'),
34
+ 't': self.get_base_percentage('t'),
35
+ 'g': self.get_base_percentage('g'),
36
+ 'c': self.get_base_percentage('c')
37
+ }
38
+ return base_percentages
39
+ # Base content percentage for each base returned as a dictionary
40
+
41
+ def get_gc_content(self):
42
+ total_bases = self.get_total_bases()
43
+ gc_count = self.sequence.count('g') + self.sequence.count('c')
44
+ gc_content = (gc_count / total_bases) * 100
45
+ return gc_content
46
+ # Guanine Cytosine (gc) content by percentage
47
+
48
+ def get_at_content(self):
49
+ total_bases = self.get_total_bases()
50
+ at_count = self.sequence.count('a') + self.sequence.count('t')
51
+ at_content = (at_count / total_bases) * 100
52
+ return at_content
53
+ # Adenine Thymine (at) content by percentage
54
+
55
+ def get_purine_content(self):
56
+ total_bases = self.get_total_bases()
57
+ ag_count = self.sequence.count('a') + self.sequence.count('g')
58
+ ag_content = (ag_count / total_bases) * 100
59
+ return ag_content
60
+ # Adenine Guanine (purine) content by percentage
61
+
62
+ def get_pyrimidine_content(self):
63
+ total_bases = self.get_total_bases()
64
+ ct_count = self.sequence.count('c') + self.sequence.count('t')
65
+ ct_content = (ct_count / total_bases) * 100
66
+ return ct_content
67
+ # Cytosine Thymine (pyrimidine) content by percentage
68
+
69
+ def get_base_at_position(self, position):
70
+ pos = position - 1
71
+
72
+ if 0 <= pos < len(self.sequence):
73
+ base_at_pos = self.sequence[pos]
74
+ return base_at_pos
75
+ else:
76
+ return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
77
+ # Returns the base at a specified position in the sequence
78
+
79
+ def get_base_at_positions(self, position_list):
80
+ if self.check_positions(position_list):
81
+ pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)}
82
+ return pos_dict
83
+ else:
84
+ return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence))
85
+ # Returns base for each position in list
86
+
87
+ def check_positions(self, position_list):
88
+ # Check if the positions are within the range of the sequence length
89
+ # Value = 0 -> position out of sequence range
90
+ # Value = 1 -> position within sequence range
91
+
92
+ checked = {}
93
+ for pos in position_list:
94
+ if pos <= 0 or pos > len(self.sequence):
95
+ checked[pos] = 0
96
+ else:
97
+ checked[pos] = 1
98
+
99
+ # Check if all values are equal to 1 / All positions in the list are within the range of the sequence length
100
+ all_values_equal_to_1 = all(value == 1 for value in checked.values())
101
+
102
+ if all_values_equal_to_1:
103
+ valid = True
104
+ else:
105
+ valid = False
106
+
107
+ return valid
108
+
109
+ def get_subsequence(self, start_position, end_position):
110
+ # Ensure the start and end positions are within the bounds of the sequence
111
+ if start_position > 0 and end_position <= len(self.sequence):
112
+ return self.sequence[start_position - 1:end_position]
113
+ else:
114
+ return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
115
+ # Returns the subsequence based on given positions
116
+
117
+ def subsequence_total_bases(self, start_position, end_position):
118
+ return len(self.get_subsequence(start_position, end_position))
119
+
code_function_mapping.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ code,function,description
2
+ a1,DNAseq(dna).get_total_bases(),calculates the total number of amino acids in the sequence
3
+ a2,DNAseq(dna).get_total_bases(),calculates the total number of bases in the sequence
4
+ a3,DNAseq(dna).get_base_count('g'),calculates the total number of guanine bases in the sequence
5
+ a4,DNAseq(dna).get_base_count('a'),calculates the total number of adenine bases in the sequence
6
+ a5,DNAseq(dna).get_base_count('c'),calculates the total number of cytosine bases in the sequence
7
+ a6,DNAseq(dna).get_base_count('t'),calculates the total number of thymine bases in the sequence
8
+ b1,DNAseq(dna).get_base_at_position(list_at_index_0(ParameterExtractor(query).extract_integers())),Returns the base at a specified position
9
+ b2,"DNAseq(dna).subsequence_total_bases(list_at_index_0(ParameterExtractor(query).extract_integers()), list_at_index_1(ParameterExtractor(query).extract_integers()))",Calculates the total number of bases in a subsequence defined by specified start and end positions
10
+ b3,"DNAseq(dna).get_subsequence(list_at_index_0(ParameterExtractor(query).extract_integers()), list_at_index_1(ParameterExtractor(query).extract_integers()))",Returns the bases in a subsequence defined by specified start and end positions
helper.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import datetime
3
+
4
+ def list_at_index(extracted_list, index):
5
+ value_at_index = extracted_list[index]
6
+
7
+ return value_at_index
8
+
9
+
10
+ def list_at_index_0(extracted_list):
11
+ value_at_index = extracted_list[0]
12
+
13
+ return value_at_index
14
+
15
+
16
+ def list_at_index_1(extracted_list):
17
+ value_at_index = extracted_list[1]
18
+
19
+ return value_at_index
20
+
21
+ def logger(log_filename, log_data, response):
22
+ with open(log_filename, mode='a', newline='') as log_file:
23
+ log_writer = csv.writer(log_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
24
+
25
+ # Get the current date and time
26
+ current_datetime = datetime.datetime.now()
27
+ date_str = current_datetime.strftime("%Y-%m-%d")
28
+ time_str = current_datetime.strftime("%H:%M:%S")
29
+ log_data.append(date_str)
30
+ log_data.append(time_str)
31
+ log_data.append(response)
32
+ # Write the log data to the CSV file
33
+ log_writer.writerow(log_data)
34
+
parameter_extractor.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ParameterExtractor:
2
+ def __init__(self, query):
3
+ self.query = query.lower()
4
+
5
+ def extract_integers(self):
6
+ extracted_integers = []
7
+ current_number = ""
8
+
9
+ for char in self.query:
10
+ if char.isdigit():
11
+ # Append digits to the current number
12
+ current_number += char
13
+ elif current_number:
14
+ # If a number is complete, convert and append it
15
+ extracted_integers.append(int(current_number))
16
+ current_number = "" # Reset current_number
17
+
18
+ # Check for any remaining numbers after the loop
19
+ if current_number:
20
+ extracted_integers.append(int(current_number))
21
+
22
+ sorted_integers = sorted(extracted_integers)
23
+ return sorted_integers
ref_query_db_index ADDED
Binary file (273 kB). View file
 
reference_query_db.json ADDED
The diff for this file is too large to render. See raw diff