Artteiv commited on
Commit
f7e0e10
1 Parent(s): 973ee97
.gitignore CHANGED
@@ -1,9 +1,7 @@
1
  arxivdb/
2
  models/
3
  __pycache__/
4
- chat/__pycache__
5
- chat/arxiv_bot/__pycache__
6
- chatbot_django/__pycache__
7
  apikey.txt
8
  db.sqlite3
9
  hotfix.ipynb
 
1
  arxivdb/
2
  models/
3
  __pycache__/
4
+ *.pyc
 
 
5
  apikey.txt
6
  db.sqlite3
7
  hotfix.ipynb
chat/__pycache__/consumers.cpython-310.pyc CHANGED
Binary files a/chat/__pycache__/consumers.cpython-310.pyc and b/chat/__pycache__/consumers.cpython-310.pyc differ
 
chat/__pycache__/model_manage.cpython-310.pyc CHANGED
Binary files a/chat/__pycache__/model_manage.cpython-310.pyc and b/chat/__pycache__/model_manage.cpython-310.pyc differ
 
chat/consumers.py CHANGED
@@ -13,9 +13,9 @@ class ChatConsumer(WebsocketConsumer):
13
  pass
14
  def receive(self, text_data):
15
  text_data_json = json.loads(text_data)
16
- message = text_data_json["message"]
17
  print(message)
18
- record, messagee = md.full_chain_single_question(message, self.db_instance)
19
  print("First answer: ",record)
20
  self.send(text_data=json.dumps({"message": messagee}))
21
 
 
13
  pass
14
  def receive(self, text_data):
15
  text_data_json = json.loads(text_data)
16
+ message = text_data_json["messages"]
17
  print(message)
18
+ record, messagee = md.full_chain_history_question(message, self.db_instance)
19
  print("First answer: ",record)
20
  self.send(text_data=json.dumps({"message": messagee}))
21
 
chat/model_manage.py CHANGED
@@ -194,6 +194,7 @@ def full_chain_history_question(chat_history: list, db_instance):
194
  else:
195
  QA_Prompt = make_answer_prompt(temp_chat[-1]["parts"][0], contexts)
196
  temp_chat[-1]["parts"] = QA_Prompt
 
197
  answer = model.generate_content(temp_chat).text
198
  return temp_answer, answer
199
  except Exception as e:
 
194
  else:
195
  QA_Prompt = make_answer_prompt(temp_chat[-1]["parts"][0], contexts)
196
  temp_chat[-1]["parts"] = QA_Prompt
197
+ print(temp_chat)
198
  answer = model.generate_content(temp_chat).text
199
  return temp_answer, answer
200
  except Exception as e:
chat/templates/index.html CHANGED
@@ -206,7 +206,7 @@
206
  this.chatSocket.onmessage = (event) => {
207
  var mes = JSON.parse(event.data)
208
  this.messages.push({
209
- role: "Chat bot",
210
  content: mes.message
211
  });
212
  console.log('Message received: ', event.data);
 
206
  this.chatSocket.onmessage = (event) => {
207
  var mes = JSON.parse(event.data)
208
  this.messages.push({
209
+ role: "model",
210
  content: mes.message
211
  });
212
  console.log('Message received: ', event.data);
consumers.py DELETED
@@ -1,21 +0,0 @@
1
- import json
2
- from . import model_manage as md
3
- from chat.arxiv_bot.arxiv_bot_utils import ArxivSQL
4
- from channels.generic.websocket import WebsocketConsumer
5
-
6
-
7
- class ChatConsumer(WebsocketConsumer):
8
- def connect(self):
9
- self.accept()
10
- self.db_instance = ArxivSQL()
11
-
12
- def disconnect(self, close_code):
13
- pass
14
- def receive(self, text_data):
15
- text_data_json = json.loads(text_data)
16
- message = text_data_json["messages"]
17
- print(message)
18
- record, messagee = md.full_chain_history_question(message, self.db_instance)
19
- print("First answer: ",record)
20
- self.send(text_data=json.dumps({"message": messagee}))
21
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model_manage.py DELETED
@@ -1,201 +0,0 @@
1
- # my_app/model_manager.py
2
- import google.generativeai as genai
3
- import chat.arxiv_bot.arxiv_bot_utils as utils
4
- import json
5
-
6
- model = None
7
-
8
- def create_model():
9
- with open("apikey.txt","r") as apikey:
10
- key = apikey.readline()
11
- genai.configure(api_key=key)
12
- for m in genai.list_models():
13
- if 'generateContent' in m.supported_generation_methods:
14
- print(m.name)
15
- print("He was there")
16
- config = genai.GenerationConfig(max_output_tokens=2048,
17
- temperature=0.7)
18
- safety_settings = [
19
- {
20
- "category": "HARM_CATEGORY_DANGEROUS",
21
- "threshold": "BLOCK_NONE",
22
- },
23
- {
24
- "category": "HARM_CATEGORY_HARASSMENT",
25
- "threshold": "BLOCK_NONE",
26
- },
27
- {
28
- "category": "HARM_CATEGORY_HATE_SPEECH",
29
- "threshold": "BLOCK_NONE",
30
- },
31
- {
32
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
33
- "threshold": "BLOCK_NONE",
34
- },
35
- {
36
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
37
- "threshold": "BLOCK_NONE",
38
- },
39
- ]
40
- global model
41
- model = genai.GenerativeModel("gemini-pro",
42
- generation_config=config,
43
- safety_settings=safety_settings)
44
- return model
45
-
46
- def get_model():
47
- global model
48
- if model is None:
49
- # Khởi tạo model ở đây
50
- model = create_model() # Giả sử create_model là hàm tạo model của bạn
51
- return model
52
-
53
- def extract_keyword_prompt(query):
54
- """A prompt that return a JSON block as arguments for querying database"""
55
-
56
- prompt = (
57
- """[INST] SYSTEM: You are an assistant that choose only one action below based on guest question.
58
- 1. If the guest question is asking for a single specific document or article with explicit title, you need to respond the information in JSON format with 2 keys "title", "author" if found any above. The authors are separated with the word 'and'.
59
- 2. If the guest question is asking for relevant informations about a topic, you need to respond the information in JSON format with 2 keys "keywords", "description", include a list of keywords represent the main academic topic, \
60
- and a description about the main topic. You may paraphrase the keywords to add more. \
61
- 3. If the guest is not asking for any informations or documents, you need to respond with a polite answer in JSON format with 1 key "answer".
62
- QUESTION: '{query}'
63
- [/INST]
64
- ANSWER:
65
- """
66
- ).format(query=query)
67
-
68
- return prompt
69
-
70
- def make_answer_prompt(input, contexts):
71
- """A prompt that return the final answer, based on the queried context"""
72
-
73
- prompt = (
74
- """[INST] You are a library assistant that help to search articles and documents based on user's question.
75
- From guest's question, you have found some records and documents that may help. Now you need to answer the guest with the information found.
76
- If no information found in the database, you may generate some other recommendation related to user's question using your own knowledge. Each article or paper must have a link to the pdf download page.
77
- You should answer in a conversational form politely.
78
- QUESTION: '{input}'
79
- INFORMATION: '{contexts}'
80
- [/INST]
81
- ANSWER:
82
- """
83
- ).format(input=input, contexts=contexts)
84
-
85
- return prompt
86
-
87
- def response(args, db_instance):
88
- """Create response context, based on input arguments"""
89
- keys = list(dict.keys(args))
90
- if "answer" in keys:
91
- return args['answer'], None # trả lời trực tiếp
92
-
93
- if "keywords" in keys:
94
- # perform query
95
- query_texts = args["description"]
96
- keywords = args["keywords"]
97
- results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
98
- # print(results)
99
- ids = results['metadatas'][0]
100
- if len(ids) == 0:
101
- # go crawl some
102
- new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
103
- print("Got new records: ",len(new_records))
104
- if type(new_records) == str:
105
- return "Error occured, information not found", new_records
106
- utils.db.add(new_records)
107
- db_instance.add(new_records)
108
- results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
109
- ids = results['metadatas'][0]
110
- print("Re-queried on chromadb, results: ",ids)
111
- paper_id = [id['paper_id'] for id in ids]
112
- paper_info = db_instance.query_id(paper_id)
113
- print(paper_info)
114
- records = [] # get title (2), author (3), link (6)
115
- result_string = ""
116
- if paper_info:
117
- for i in range(len(paper_info)):
118
- result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
119
- records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
120
- return result_string, records
121
- else:
122
- return "Information not found", "Information not found"
123
- # invoke llm and return result
124
-
125
- if "title" in keys:
126
- title = args['title']
127
- authors = utils.authors_str_to_list(args['author'])
128
- paper_info = db_instance.query(title = title,author = authors)
129
- # if query not found then go crawl brh
130
- # print(paper_info)
131
-
132
- if len(paper_info) == 0:
133
- new_records = utils.crawl_exact_paper(title=title,author=authors)
134
- print("Got new records: ",len(new_records))
135
- if type(new_records) == str:
136
- # print(new_records)
137
- return "Error occured, information not found", "Information not found"
138
- utils.db.add(new_records)
139
- db_instance.add(new_records)
140
- paper_info = db_instance.query(title = title,author = authors)
141
- print("Re-queried on chromadb, results: ",paper_info)
142
- # -------------------------------------
143
- records = [] # get title (2), author (3), link (6)
144
- result_string = ""
145
- for i in range(len(paper_info)):
146
- result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
147
- records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
148
- # process results:
149
- if len(result_string) == 0:
150
- return "Information not found", "Information not found"
151
- return result_string, records
152
- # invoke llm and return result
153
- def full_chain_single_question(input_prompt, db_instance):
154
- try:
155
- first_prompt = extract_keyword_prompt(input_prompt)
156
- temp_answer = model.generate_content(first_prompt).text
157
-
158
- args = json.loads(utils.trimming(temp_answer))
159
- contexts, results = response(args, db_instance)
160
- if not results:
161
- # print(contexts)
162
- return "Random question, direct return", contexts
163
- else:
164
- output_prompt = make_answer_prompt(input_prompt,contexts)
165
- answer = model.generate_content(output_prompt).text
166
- return temp_answer, answer
167
- except Exception as e:
168
- # print(e)
169
- return temp_answer, "Error occured: " + str(e)
170
-
171
-
172
- def format_chat_history_from_web(chat_history: list):
173
- temp_chat = []
174
- for message in chat_history:
175
- temp_chat.append(
176
- {
177
- "role": message["role"],
178
- "parts": [message["content"]]
179
- }
180
- )
181
- return temp_chat
182
-
183
- def full_chain_history_question(chat_history: list, db_instance):
184
- try:
185
- temp_chat = format_chat_history_from_web(chat_history)
186
- first_prompt = extract_keyword_prompt(temp_chat[-1]["parts"][0])
187
- temp_answer = model.generate_content(first_prompt).text
188
-
189
- args = json.loads(utils.trimming(temp_answer))
190
- contexts, results = response(args, db_instance)
191
- if not results:
192
- # print(contexts)
193
- return "Random question, direct return", contexts
194
- else:
195
- QA_Prompt = make_answer_prompt(temp_chat[-1]["parts"][0], contexts)
196
- temp_chat[-1]["parts"] = QA_Prompt
197
- answer = model.generate_content(temp_chat).text
198
- return temp_answer, answer
199
- except Exception as e:
200
- # print(e)
201
- return temp_answer, "Error occured: " + str(e)