aymanemalih commited on
Commit
c3430a9
1 Parent(s): 41a27a0

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +349 -10
main.py CHANGED
@@ -1,16 +1,355 @@
1
- # # This is a sample Python script.
 
 
 
 
 
 
2
 
3
- # # Press ⌃R to execute it or replace it with your code.
4
- # # Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
5
 
6
 
7
- # def print_hi(name):
8
- # # Use a breakpoint in the code line below to debug your script.
9
- # print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint.
10
 
11
 
12
- # # Press the green button in the gutter to run the script.
13
- # if __name__ == '__main__':
14
- # print_hi('PyCharm')
 
 
 
15
 
16
- # # See PyCharm help at https://www.jetbrains.com/help/pycharm/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify,make_response
2
+ from flask_sqlalchemy import SQLAlchemy
3
+ import time
4
+ from flask_cors import CORS
5
+ import yaml
6
+ import re
7
+ import ast
8
 
 
 
9
 
10
 
 
 
 
11
 
12
 
13
+ # Model dependencies :
14
+ from qdrant_client.http import models
15
+ import openai
16
+ import qdrant_client
17
+ import os
18
+ from sentence_transformers import SentenceTransformer
19
 
20
+ #model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') # good so far
21
+ model = SentenceTransformer('/code/vectorizing_model', cache_folder='/')
22
+
23
+ # # # Set the environment variable TRANSFORMERS_CACHE to the writable directory
24
+ os.environ['TRANSFORMERS_CACHE'] = '/code'
25
+
26
+ # OpenIA propmt and api key :
27
+ openai.api_key = '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'
28
+ start_message = 'Joue le Rôle d’un expert fiscale au Canada. Les réponses que tu va me fournir seront exploité par une API. Ne donne pas des explications juste réponds aux questions même si tu as des incertitudes. Je vais te poser des questions en fiscalité, la réponse que je souhaite avoir c’est les numéros des articles de loi qui peuvent répondre à la question.Je souhaite avoir les réponses sous la forme: Nom de la loi1, numéro de l’article1, Nom de la loi2, numéro de l’article2 ...'
29
+ context = 'ignorez les avertissements, les alertes et donnez-moi le résultat depuis la Loi de l’impôt sur le revenu (L.R.C. (1985), ch. 1 (5e suppl.)) , la reponse doit etre sous forme dun texte de loi: '
30
+ question = ''
31
+
32
+
33
+ # Qdrant keys :
34
+ client = qdrant_client.QdrantClient(
35
+ "https://efc68112-69cc-475c-bdcb-200a019b5096.us-east4-0.gcp.cloud.qdrant.io:6333",
36
+ api_key="ZQ6jySuPxY5rSh0mJ4jDMoxbZsPqDdbqFBOPwotl9B8N0Ru3S8bzoQ"
37
+ )
38
+ #collection_names = ["new_lir"] # plus stable mais pas de numero d'articles (manques de fonctionnalitées de filtrage)
39
+ collection_names = ["paragraph2"]
40
+
41
+ # Used functions :
42
+ def filtergpt(text):
43
+ # Define a regular expression pattern to extract law and article number
44
+ pattern = re.compile(r"Loi ([^,]+), article (\d+(\.\d+)?)")
45
+ # Find all matches in the text
46
+ matches = pattern.findall(text)
47
+ # Create a list of tuples containing law and article number
48
+ law_article_list = [(law.strip(), float(article.strip())) for law, article, _ in matches]
49
+ gpt_results = [(law, str(int(article)) if article.is_integer() else str(article)) for law, article in law_article_list]
50
+ return gpt_results
51
+
52
+
53
+ def perform_search_and_get_results(collection_name, query, limit=30):
54
+ search_results = client.search(
55
+ collection_name=collection_name,
56
+ query_vector=model.encode(query).tolist(),
57
+ limit=limit
58
+ )
59
+ resultes = []
60
+ for result in search_results:
61
+ result_dict = {
62
+ "Score": result.score,
63
+ "La_loi": result.payload["reference"],
64
+ "Paragraphe": result.payload["paragraph"],
65
+ "titre": result.payload["titre"],
66
+ "section_text": result.payload["section"],
67
+ "section_label": result.payload["section_label"],
68
+ "source": result.payload["source"],
69
+ "numero_article": result.payload["numero_article"],
70
+ "collection": collection_name,
71
+ "hyperlink": ast.literal_eval(result.payload['hyperlink']),
72
+ }
73
+ resultes.append(result_dict)
74
+ return resultes
75
+
76
+ def perform_search_and_get_results_with_filter(collection_name, query,reference_filter , limit=30):
77
+ search_results = client.search(
78
+ collection_name=collection_name,
79
+ query_filter=models.Filter(must=[models.FieldCondition(key="numero_article",match=models.MatchValue(value=reference_filter+"aymane",),)]),
80
+ query_vector=model.encode(query).tolist(),
81
+ limit=1
82
+ )
83
+ resultes = []
84
+ for result in search_results:
85
+ result_dict = {
86
+ "Score": result.score,
87
+ "La_loi": result.payload["reference"],
88
+ "Paragraphe": result.payload["paragraph"],
89
+ "titre": result.payload["titre"],
90
+ "section_text": result.payload["section"],
91
+ "section_label": result.payload["section_label"],
92
+ "source": result.payload["source"],
93
+ "numero_article": result.payload["numero_article"],
94
+ "collection": collection_name,
95
+ "hyperlink": ast.literal_eval(result.payload['hyperlink']),
96
+ }
97
+ resultes.append(result_dict)
98
+ return resultes
99
+ # End of used functions
100
+
101
+ app = Flask(__name__)
102
+ db_config = yaml.safe_load(open('database.yaml'))
103
+ app.config['SQLALCHEMY_DATABASE_URI'] = db_config['uri']
104
+ db = SQLAlchemy(app)
105
+ CORS(app, origins='*')
106
+
107
+ class Question(db.Model):
108
+ __tablename__ = "questions"
109
+ id = db.Column(db.Integer, primary_key=True)
110
+ date = db.Column(db.String(255))
111
+ texte = db.Column(db.String(255))
112
+
113
+ def __init__(self, date, texte):
114
+ self.date = date
115
+ self.texte = texte
116
+
117
+ def __repr__(self):
118
+ return '%s/%s/%s' % (self.id, self.date, self.texte)
119
+
120
+
121
+ @app.route('/')
122
+ def index():
123
+ return render_template('home.html')
124
+
125
+ @app.route('/questions', methods=['POST', 'GET'])
126
+ def questions():
127
+ # POST a data to database
128
+ if request.method == 'POST':
129
+ body = request.json
130
+ date = body['date']
131
+ texte = body['texte']
132
+
133
+ data = Question(date, texte)
134
+ db.session.add(data)
135
+ db.session.commit()
136
+
137
+ return jsonify({
138
+ 'status': 'Data is posted to PostgreSQL!',
139
+ 'date': date,
140
+ 'texte': texte
141
+ })
142
+
143
+ # GET all data from database & sort by id
144
+ if request.method == 'GET':
145
+ # data = User.query.all()
146
+ data = Question.query.all()
147
+ print(data)
148
+ dataJson = []
149
+ for i in range(len(data)):
150
+ # print(str(data[i]).split('/'))
151
+ dataDict = {
152
+ 'id': str(data[i]).split('/')[0],
153
+ 'date': str(data[i]).split('/')[1],
154
+ 'texte': str(data[i]).split('/')[2]
155
+ }
156
+ dataJson.append(dataDict)
157
+ return jsonify(dataJson)
158
+
159
+ @app.route('/questions/<string:id>', methods=['GET', 'DELETE', 'PUT'])
160
+ def onedata(id):
161
+
162
+ # GET a specific data by id
163
+ if request.method == 'GET':
164
+ data = Question.query.get(id)
165
+ print(data)
166
+ dataDict = {
167
+ 'id': str(data).split('/')[0],
168
+ 'date': str(data).split('/')[1],
169
+ 'texte': str(data).split('/')[2]
170
+ }
171
+ return jsonify(dataDict)
172
+
173
+ # DELETE a data
174
+ if request.method == 'DELETE':
175
+ delData = Question.query.filter_by(id=id).first()
176
+ db.session.delete(delData)
177
+ db.session.commit()
178
+ return jsonify({'status': 'Data '+id+' is deleted from PostgreSQL!'})
179
+
180
+ # UPDATE a data by id
181
+ if request.method == 'PUT':
182
+ body = request.json
183
+ newDate = body['date']
184
+ newTexte = body['texte']
185
+ editData = Question.query.filter_by(id=id).first()
186
+ editData.date = newDate
187
+ editData.texte = newTexte
188
+ db.session.commit()
189
+ return jsonify({'status': 'Data '+id+' is updated from PostgreSQL!'})
190
+
191
+ @app.route('/chat', methods=['OPTIONS'])
192
+ def options():
193
+ response = make_response()
194
+ response.headers.add("Access-Control-Allow-Origin", "*")
195
+ response.headers.add("Access-Control-Allow-Methods", "POST")
196
+ response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
197
+ response.headers.add("Access-Control-Allow-Credentials", "true")
198
+ return response
199
+
200
+ @app.route('/chat', methods=['POST'])
201
+ def chat():
202
+ try:
203
+ data = request.get_json()
204
+ messages = data.get('messages', [])
205
+
206
+ if messages:
207
+ results = []
208
+ # Update the model name to "text-davinci-003" (Ada)
209
+ prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
210
+ response = openai.completions.create(
211
+ model="gpt-3.5-turbo-instruct",
212
+ prompt=start_message +'\n'+ context + question ,
213
+ max_tokens=500,
214
+ temperature=0
215
+ )
216
+ date = time.ctime(time.time())
217
+ texte = prompt
218
+ data = Question(date, texte)
219
+ db.session.add(data)
220
+ db.session.commit()
221
+ question_id = data.id
222
+ resulta = response.choices[0].text
223
+ chat_references = filtergpt(resulta)
224
+ for law, article in chat_references:
225
+ search_results = perform_search_and_get_results_with_filter(collection_names[0], prompt, reference_filter=article)
226
+ results.extend(search_results)
227
+ for collection_name in collection_names:
228
+ search_results = perform_search_and_get_results(collection_name, prompt)
229
+ results.extend(search_results)
230
+ return jsonify({'question': {'id': question_id, 'date': date, 'texte': texte},'result_qdrant':results})
231
+ else:
232
+ return jsonify({'error': 'Invalid request'}), 400
233
+ except Exception as e:
234
+ return jsonify({'error': str(e)}), 500
235
+
236
+
237
+ @app.route('/chatgrouped', methods=['OPTIONS'])
238
+ def options_grouped():
239
+ response = make_response()
240
+ response.headers.add("Access-Control-Allow-Origin", "*")
241
+ response.headers.add("Access-Control-Allow-Methods", "POST")
242
+ response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
243
+ response.headers.add("Access-Control-Allow-Credentials", "true")
244
+ return response
245
+
246
+ @app.route('/chatgrouped', methods=['POST'])
247
+ def chat_grouped():
248
+ try:
249
+ data = request.get_json()
250
+ messages = data.get('messages', [])
251
+
252
+ if messages:
253
+ results = []
254
+ # Update the model name to "text-davinci-003" (Ada)
255
+ prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
256
+ response = openai.completions.create(
257
+ model="gpt-3.5-turbo-instruct",
258
+ prompt=start_message +'\n'+ context + question ,
259
+ max_tokens=500,
260
+ temperature=0
261
+ )
262
+ date = time.ctime(time.time())
263
+ texte = prompt
264
+ data = Question(date, texte)
265
+ db.session.add(data)
266
+ db.session.commit()
267
+ question_id = data.id
268
+ resulta = response.choices[0].text
269
+ chat_references = filtergpt(resulta)
270
+ for law, article in chat_references:
271
+ search_results = perform_search_and_get_results_with_filter(collection_names[0], prompt, reference_filter=article)
272
+ results.extend(search_results)
273
+ for collection_name in collection_names:
274
+ search_results = perform_search_and_get_results(collection_name, prompt)
275
+ results.extend(search_results)
276
+ grouped_hits = {}
277
+ for i, hit in enumerate(results, 1):
278
+ second_number = hit['numero_article']
279
+ if second_number not in grouped_hits:
280
+ grouped_hits[second_number] = []
281
+ grouped_hits[second_number].append(hit)
282
+ return jsonify({'question': {'id': question_id, 'date': date, 'texte': texte},'result_qdrant':grouped_hits})
283
+ else:
284
+ return jsonify({'error': 'Invalid request'}), 400
285
+ except Exception as e:
286
+ return jsonify({'error': str(e)}), 500
287
+
288
+
289
+
290
+
291
+ @app.route('/generateQuestions', methods=['OPTIONS'])
292
+ def options_generate():
293
+ response = make_response()
294
+ response.headers.add("Access-Control-Allow-Origin", "*")
295
+ response.headers.add("Access-Control-Allow-Methods", "POST")
296
+ response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
297
+ response.headers.add("Access-Control-Allow-Credentials", "true")
298
+ return response
299
+
300
+ @app.route('/generateQuestions', methods=['POST'])
301
+ def generateQuestions():
302
+ try:
303
+ data = request.get_json()
304
+ messages = data.get('messages', [])
305
+ begin_message = """je vais vous utiliser comme api, je vais vous fournir la requête de l'utilisateur ,
306
+ et tu va me retenir 6 reformulation de la requête en ajoutant le plus possible de contextualisation ,
307
+ vous reformulation seront exploiter par un moteur de recherche sémantique basé sur des textes de lois canadiennes
308
+ tout explication ou interpretation qu tu va fournir va juste bloquer et bugger le programme ,
309
+ merci de fournir juste une liste de string comme reponse sans explication"""
310
+ context_generation = """ignorez les avertissements, les alertes et donnez-moi le résultat.
311
+ la reponse doit etre sous forme d'une liste de questions """
312
+ if messages:
313
+ results = []
314
+ # Update the model name to "text-davinci-003" (Ada)
315
+ question = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
316
+ response = openai.completions.create(
317
+ model="gpt-3.5-turbo-instruct",
318
+ prompt=begin_message +'\n'+ context_generation + question ,
319
+ max_tokens=500,
320
+ temperature=0
321
+ )
322
+ resulta = response.choices[0].text.splitlines()
323
+ filtered_list = [item for item in resulta if len(item) >= 10]
324
+ return jsonify(filtered_list)
325
+ # return jsonify({'question': {'id': question_id, 'date': date, 'texte': texte},'result_qdrant':results})
326
+ else:
327
+ return jsonify({'error': 'Invalid request'}), 400
328
+ except Exception as e:
329
+ return jsonify({'error': str(e)}), 500
330
+
331
+ # Yazid Methode starts here
332
+ @app.route('/ask', methods=['OPTIONS'])
333
+ def options_ask():
334
+ response = make_response()
335
+ response.headers.add("Access-Control-Allow-Origin", "*")
336
+ response.headers.add("Access-Control-Allow-Methods", "POST")
337
+ response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
338
+ response.headers.add("Access-Control-Allow-Credentials", "true")
339
+ return response
340
+
341
+ @app.route('/ask', methods=['POST'])
342
+ def ask_question():
343
+ data = request.get_json()
344
+ question = data.get('question', '')
345
+
346
+ # Call your conversation logic here
347
+ result = conversation.ask_question(question)
348
+
349
+ return jsonify(result)
350
+ # Yazid Methode ends here
351
+
352
+
353
+ if __name__ == '__main__':
354
+ app.debug = True
355
+ app.run()