aymanemalih commited on
Commit
bac8f2c
1 Parent(s): 0f8a902

Upload 6 files

Browse files
Files changed (6) hide show
  1. LIR.csv +0 -0
  2. conversation.py +66 -0
  3. gpt.py +100 -0
  4. lookup.py +17 -0
  5. main.py +10 -346
  6. show_csv.py +19 -0
LIR.csv ADDED
The diff for this file is too large to render. See raw diff
 
conversation.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import openai
3
+ import sys
4
+ import os
5
+
6
+ import csv
7
+ import json
8
+
9
+ import lookup
10
+ import gpt
11
+
12
+ openai.api_key = "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"
13
+ if not openai.api_key:
14
+ openai.api_key = input("Please enter your OpenAI API key: ")
15
+ print()
16
+
17
+ program_name = sys.argv.pop(0)
18
+
19
+ # CSV processing
20
+ csv_file_path = "LIR.csv" # Update with the correct path
21
+ with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
22
+ reader = csv.DictReader(csvfile)
23
+ rows = list(reader)
24
+
25
+ # Configuration Parameters
26
+ chunk_size = 4000
27
+ overlap = 1000
28
+ limit = 20 # Change to 3 to get the top 3 answers
29
+ gpt.model = "gpt-3.5-turbo"
30
+
31
+ # Chunking CSV text
32
+ chunks = [row['texte'][i:i + chunk_size] for row in rows for i in range(0, len(row['texte']), chunk_size)]
33
+
34
+ print("Chunking CSV...\n")
35
+
36
+ def ask_question(question):
37
+ keywords = gpt.get_keywords(question)
38
+ matches = lookup.find_matches(chunks, keywords)
39
+ top_matches = list(matches.keys())[:limit]
40
+
41
+ responses = []
42
+
43
+ for i, chunk_id in enumerate(top_matches):
44
+ chunk = chunks[chunk_id]
45
+ response = gpt.answer_question(chunk, question)
46
+
47
+ if response.get("answer_found"):
48
+ matched_row = rows[chunk_id]
49
+ # Extract specific properties from the matched row
50
+ answer = response.get("response")
51
+
52
+ # Loop through the columns and add them to the JSON object
53
+ json_object = {"GPT_Response": answer}
54
+
55
+ for column_name, column_value in matched_row.items():
56
+ json_object[column_name] = column_value.encode("utf-8").decode("utf-8")
57
+
58
+
59
+ responses.append(json_object)
60
+
61
+ responses.append({"keywords:": keywords})
62
+
63
+ if not any(response.get("answer_found") for chunk_id in top_matches):
64
+ responses.append({"GPT_Response": "I'm sorry, but I can't find that information"})
65
+
66
+ return responses
gpt.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import json
3
+
4
+ model = "gpt-3.5-turbo"
5
+
6
+
7
+ def get_keywords(question):
8
+ prompt = f""" Je souhaite trouver la réponse à la question suivante dans une colonne d'un fichier csv. Veuillez me fournir 10 mots-clés et synonymes que je peux utiliser pour trouver les informations du csv. Un seul mot par mot-clé. Utilisez uniquement des lettres minuscules.
9
+ {question}"""
10
+
11
+
12
+ response = openai.ChatCompletion.create(
13
+ model=model,
14
+ messages=[
15
+ {
16
+ "role": "system",
17
+ "content": "Vous fournirez toujours 10 mots-clés incluant des synonymes pertinents et explicit des mots de la question d’origine. Les synonymes doivent être des termes juridiques couramment utilisés dans les articles de loi canadienne", },
18
+ {
19
+ "role": "user",
20
+ "content": prompt,
21
+ },
22
+ ],
23
+ functions=[
24
+ {
25
+ "name": "list_keywords",
26
+ "description": "Utilisez cette fonction pour donner à l'utilisateur une liste de mots-clés",
27
+ "parameters": {
28
+ "type": "object",
29
+ "properties": {
30
+ "list": {
31
+ "type": "array",
32
+ "items": {"type": "string", "description": "A keyword"},
33
+ "description": "A list of keywords",
34
+ }
35
+ },
36
+ },
37
+ "required": ["list"],
38
+ }
39
+ ],
40
+ function_call={"name": "list_keywords", "arguments": ["list"]},
41
+ )
42
+
43
+ arguments = response["choices"][0]["message"]["function_call"]["arguments"].lower()
44
+ keywords = json.loads(arguments)["list"]
45
+
46
+ return " ".join(keywords).split(" ")
47
+
48
+
49
+ def answer_question(chunk, question):
50
+ prompt = f"""```
51
+ {chunk}
52
+ ```
53
+
54
+ Sur la base des informations ci-dessus, quelle est la réponse à cette question?
55
+
56
+ ```
57
+ {question}
58
+ ```"""
59
+
60
+ response = openai.ChatCompletion.create(
61
+ model=model,
62
+ messages=[
63
+ {
64
+ "role": "system",
65
+ "content": "Définissez toujours answer_found sur false si la réponse à la question n'a pas été trouvée dans les informations fournies.",
66
+ },
67
+ {
68
+ "role": "user",
69
+ "content": prompt,
70
+ },
71
+ ],
72
+ functions=[
73
+ {
74
+ "name": "give_response",
75
+ "description": "Utilisez cette fonction pour donner la réponse et si la réponse à la question a été trouvée ou non dans le texte.",
76
+ "parameters": {
77
+ "type": "object",
78
+ "properties": {
79
+ "answer_found": {
80
+ "type": "boolean",
81
+ "description": "Définissez ceci sur true uniquement si le texte fourni inclut une réponse à la question",
82
+ },
83
+ "response": {
84
+ "type": "string",
85
+ "description": "La réponse complète à la question, si l'information était pertinente",
86
+ },
87
+ },
88
+ },
89
+ "required": ["answer_found"],
90
+ }
91
+ ],
92
+ )
93
+
94
+ try:
95
+ function_call = response["choices"][0]["message"]["function_call"]
96
+ arguments = function_call["arguments"].lower()
97
+ result = json.loads(arguments)
98
+ return result
99
+ except KeyError:
100
+ return {"answer_found": False, "response": ""}
lookup.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+
3
+ def find_matches(chunks, keywords, padding=500):
4
+ # Preprocess chunks
5
+ preprocessed_chunks = [' '.join(chunk.split()) for chunk in chunks]
6
+
7
+ # TF-IDF Vectorization
8
+ vectorizer = TfidfVectorizer(lowercase=True, stop_words='english')
9
+ tfidf_matrix = vectorizer.fit_transform(preprocessed_chunks)
10
+
11
+ # Compute cosine similarity between keywords and chunks
12
+ keyword_vector = vectorizer.transform([' '.join(keywords)])
13
+ cosine_similarities = tfidf_matrix.dot(keyword_vector.T).toarray().flatten()
14
+
15
+ # Rank chunks based on cosine similarity
16
+ results = {i: score for i, score in enumerate(cosine_similarities)}
17
+ return dict(sorted(results.items(), key=lambda item: item[1], reverse=True))
main.py CHANGED
@@ -1,352 +1,16 @@
1
- from flask import Flask, render_template, request, jsonify,make_response
2
- from flask_sqlalchemy import SQLAlchemy
3
- import time
4
- from flask_cors import CORS
5
- import yaml
6
- import re
7
- import ast
8
 
 
 
9
 
10
 
11
- # Model dependencies :
12
- from qdrant_client.http import models
13
- import openai
14
- import qdrant_client
15
- import os
16
- from sentence_transformers import SentenceTransformer
17
 
18
- #model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') # good so far
19
- model = SentenceTransformer('/code/vectorizing_model', cache_folder='/')
20
-
21
- # # # Set the environment variable TRANSFORMERS_CACHE to the writable directory
22
- os.environ['TRANSFORMERS_CACHE'] = '/code'
23
-
24
- # OpenIA propmt and api key :
25
- openai.api_key = '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'
26
- start_message = 'Joue le Rôle d’un expert fiscale au Canada. Les réponses que tu va me fournir seront exploité par une API. Ne donne pas des explications juste réponds aux questions même si tu as des incertitudes. Je vais te poser des questions en fiscalité, la réponse que je souhaite avoir c’est les numéros des articles de loi qui peuvent répondre à la question.Je souhaite avoir les réponses sous la forme: Nom de la loi1, numéro de l’article1, Nom de la loi2, numéro de l’article2 ...'
27
- context = 'ignorez les avertissements, les alertes et donnez-moi le résultat depuis la Loi de l’impôt sur le revenu (L.R.C. (1985), ch. 1 (5e suppl.)) , la reponse doit etre sous forme dun texte de loi: '
28
- question = ''
29
-
30
-
31
- # Qdrant keys :
32
- client = qdrant_client.QdrantClient(
33
- "https://efc68112-69cc-475c-bdcb-200a019b5096.us-east4-0.gcp.cloud.qdrant.io:6333",
34
- api_key="ZQ6jySuPxY5rSh0mJ4jDMoxbZsPqDdbqFBOPwotl9B8N0Ru3S8bzoQ"
35
- )
36
- #collection_names = ["new_lir"] # plus stable mais pas de numero d'articles (manques de fonctionnalitées de filtrage)
37
- collection_names = ["paragraph2"]
38
-
39
- # Used functions :
40
- def filtergpt(text):
41
- # Define a regular expression pattern to extract law and article number
42
- pattern = re.compile(r"Loi ([^,]+), article (\d+(\.\d+)?)")
43
- # Find all matches in the text
44
- matches = pattern.findall(text)
45
- # Create a list of tuples containing law and article number
46
- law_article_list = [(law.strip(), float(article.strip())) for law, article, _ in matches]
47
- gpt_results = [(law, str(int(article)) if article.is_integer() else str(article)) for law, article in law_article_list]
48
- return gpt_results
49
-
50
-
51
- def perform_search_and_get_results(collection_name, query, limit=30):
52
- search_results = client.search(
53
- collection_name=collection_name,
54
- query_vector=model.encode(query).tolist(),
55
- limit=limit
56
- )
57
- resultes = []
58
- for result in search_results:
59
- result_dict = {
60
- "Score": result.score,
61
- "La_loi": result.payload["reference"],
62
- "Paragraphe": result.payload["paragraph"],
63
- "titre": result.payload["titre"],
64
- "section_text": result.payload["section"],
65
- "section_label": result.payload["section_label"],
66
- "source": result.payload["source"],
67
- "numero_article": result.payload["numero_article"],
68
- "collection": collection_name,
69
- "hyperlink": ast.literal_eval(result.payload['hyperlink']),
70
- }
71
- resultes.append(result_dict)
72
- return resultes
73
-
74
- def perform_search_and_get_results_with_filter(collection_name, query,reference_filter , limit=30):
75
- search_results = client.search(
76
- collection_name=collection_name,
77
- query_filter=models.Filter(must=[models.FieldCondition(key="numero_article",match=models.MatchValue(value=reference_filter+"aymane",),)]),
78
- query_vector=model.encode(query).tolist(),
79
- limit=1
80
- )
81
- resultes = []
82
- for result in search_results:
83
- result_dict = {
84
- "Score": result.score,
85
- "La_loi": result.payload["reference"],
86
- "Paragraphe": result.payload["paragraph"],
87
- "titre": result.payload["titre"],
88
- "section_text": result.payload["section"],
89
- "section_label": result.payload["section_label"],
90
- "source": result.payload["source"],
91
- "numero_article": result.payload["numero_article"],
92
- "collection": collection_name,
93
- "hyperlink": ast.literal_eval(result.payload['hyperlink']),
94
- }
95
- resultes.append(result_dict)
96
- return resultes
97
- # End of used functions
98
-
99
- app = Flask(__name__)
100
- db_config = yaml.safe_load(open('database.yaml'))
101
- app.config['SQLALCHEMY_DATABASE_URI'] = db_config['uri']
102
- db = SQLAlchemy(app)
103
- CORS(app, origins='*')
104
-
105
- class Question(db.Model):
106
- __tablename__ = "questions"
107
- id = db.Column(db.Integer, primary_key=True)
108
- date = db.Column(db.String(255))
109
- texte = db.Column(db.String(255))
110
-
111
- def __init__(self, date, texte):
112
- self.date = date
113
- self.texte = texte
114
-
115
- def __repr__(self):
116
- return '%s/%s/%s' % (self.id, self.date, self.texte)
117
-
118
-
119
- @app.route('/')
120
- def index():
121
- return render_template('home.html')
122
-
123
- @app.route('/questions', methods=['POST', 'GET'])
124
- def questions():
125
- # POST a data to database
126
- if request.method == 'POST':
127
- body = request.json
128
- date = body['date']
129
- texte = body['texte']
130
-
131
- data = Question(date, texte)
132
- db.session.add(data)
133
- db.session.commit()
134
-
135
- return jsonify({
136
- 'status': 'Data is posted to PostgreSQL!',
137
- 'date': date,
138
- 'texte': texte
139
- })
140
-
141
- # GET all data from database & sort by id
142
- if request.method == 'GET':
143
- # data = User.query.all()
144
- data = Question.query.all()
145
- print(data)
146
- dataJson = []
147
- for i in range(len(data)):
148
- # print(str(data[i]).split('/'))
149
- dataDict = {
150
- 'id': str(data[i]).split('/')[0],
151
- 'date': str(data[i]).split('/')[1],
152
- 'texte': str(data[i]).split('/')[2]
153
- }
154
- dataJson.append(dataDict)
155
- return jsonify(dataJson)
156
-
157
- @app.route('/questions/<string:id>', methods=['GET', 'DELETE', 'PUT'])
158
- def onedata(id):
159
-
160
- # GET a specific data by id
161
- if request.method == 'GET':
162
- data = Question.query.get(id)
163
- print(data)
164
- dataDict = {
165
- 'id': str(data).split('/')[0],
166
- 'date': str(data).split('/')[1],
167
- 'texte': str(data).split('/')[2]
168
- }
169
- return jsonify(dataDict)
170
-
171
- # DELETE a data
172
- if request.method == 'DELETE':
173
- delData = Question.query.filter_by(id=id).first()
174
- db.session.delete(delData)
175
- db.session.commit()
176
- return jsonify({'status': 'Data '+id+' is deleted from PostgreSQL!'})
177
-
178
- # UPDATE a data by id
179
- if request.method == 'PUT':
180
- body = request.json
181
- newDate = body['date']
182
- newTexte = body['texte']
183
- editData = Question.query.filter_by(id=id).first()
184
- editData.date = newDate
185
- editData.texte = newTexte
186
- db.session.commit()
187
- return jsonify({'status': 'Data '+id+' is updated from PostgreSQL!'})
188
-
189
- @app.route('/chat', methods=['OPTIONS'])
190
- def options():
191
- response = make_response()
192
- response.headers.add("Access-Control-Allow-Origin", "*")
193
- response.headers.add("Access-Control-Allow-Methods", "POST")
194
- response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
195
- response.headers.add("Access-Control-Allow-Credentials", "true")
196
- return response
197
-
198
- @app.route('/chat', methods=['POST'])
199
- def chat():
200
- try:
201
- data = request.get_json()
202
- messages = data.get('messages', [])
203
-
204
- if messages:
205
- results = []
206
- # Update the model name to "text-davinci-003" (Ada)
207
- prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
208
- response = openai.completions.create(
209
- model="gpt-3.5-turbo-instruct",
210
- prompt=start_message +'\n'+ context + question ,
211
- max_tokens=500,
212
- temperature=0
213
- )
214
- date = time.ctime(time.time())
215
- texte = prompt
216
- data = Question(date, texte)
217
- db.session.add(data)
218
- db.session.commit()
219
- question_id = data.id
220
- resulta = response.choices[0].text
221
- chat_references = filtergpt(resulta)
222
- for law, article in chat_references:
223
- search_results = perform_search_and_get_results_with_filter(collection_names[0], prompt, reference_filter=article)
224
- results.extend(search_results)
225
- for collection_name in collection_names:
226
- search_results = perform_search_and_get_results(collection_name, prompt)
227
- results.extend(search_results)
228
- return jsonify({'question': {'id': question_id, 'date': date, 'texte': texte},'result_qdrant':results})
229
- else:
230
- return jsonify({'error': 'Invalid request'}), 400
231
- except Exception as e:
232
- return jsonify({'error': str(e)}), 500
233
-
234
-
235
- @app.route('/chatgrouped', methods=['OPTIONS'])
236
- def options_grouped():
237
- response = make_response()
238
- response.headers.add("Access-Control-Allow-Origin", "*")
239
- response.headers.add("Access-Control-Allow-Methods", "POST")
240
- response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
241
- response.headers.add("Access-Control-Allow-Credentials", "true")
242
- return response
243
-
244
- @app.route('/chatgrouped', methods=['POST'])
245
- def chat_grouped():
246
- try:
247
- data = request.get_json()
248
- messages = data.get('messages', [])
249
-
250
- if messages:
251
- results = []
252
- # Update the model name to "text-davinci-003" (Ada)
253
- prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
254
- response = openai.completions.create(
255
- model="gpt-3.5-turbo-instruct",
256
- prompt=start_message +'\n'+ context + question ,
257
- max_tokens=500,
258
- temperature=0
259
- )
260
- date = time.ctime(time.time())
261
- texte = prompt
262
- data = Question(date, texte)
263
- db.session.add(data)
264
- db.session.commit()
265
- question_id = data.id
266
- resulta = response.choices[0].text
267
- chat_references = filtergpt(resulta)
268
- for law, article in chat_references:
269
- search_results = perform_search_and_get_results_with_filter(collection_names[0], prompt, reference_filter=article)
270
- results.extend(search_results)
271
- for collection_name in collection_names:
272
- search_results = perform_search_and_get_results(collection_name, prompt)
273
- results.extend(search_results)
274
- grouped_hits = {}
275
- for i, hit in enumerate(results, 1):
276
- second_number = hit['numero_article']
277
- if second_number not in grouped_hits:
278
- grouped_hits[second_number] = []
279
- grouped_hits[second_number].append(hit)
280
- return jsonify({'question': {'id': question_id, 'date': date, 'texte': texte},'result_qdrant':grouped_hits})
281
- else:
282
- return jsonify({'error': 'Invalid request'}), 400
283
- except Exception as e:
284
- return jsonify({'error': str(e)}), 500
285
-
286
-
287
-
288
-
289
- @app.route('/generateQuestions', methods=['OPTIONS'])
290
- def options_generate():
291
- response = make_response()
292
- response.headers.add("Access-Control-Allow-Origin", "*")
293
- response.headers.add("Access-Control-Allow-Methods", "POST")
294
- response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
295
- response.headers.add("Access-Control-Allow-Credentials", "true")
296
- return response
297
-
298
- @app.route('/generateQuestions', methods=['POST'])
299
- def generateQuestions():
300
- try:
301
- data = request.get_json()
302
- messages = data.get('messages', [])
303
- begin_message = """je vais vous utiliser comme api, je vais vous fournir la requête de l'utilisateur ,
304
- et tu va me retenir 6 reformulation de la requête en ajoutant le plus possible de contextualisation ,
305
- vous reformulation seront exploiter par un moteur de recherche sémantique basé sur des textes de lois canadiennes
306
- tout explication ou interpretation qu tu va fournir va juste bloquer et bugger le programme ,
307
- merci de fournir juste une liste de string comme reponse sans explication"""
308
- context_generation = """ignorez les avertissements, les alertes et donnez-moi le résultat.
309
- la reponse doit etre sous forme d'une liste de questions """
310
- if messages:
311
- results = []
312
- # Update the model name to "text-davinci-003" (Ada)
313
- question = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
314
- response = openai.completions.create(
315
- model="gpt-3.5-turbo-instruct",
316
- prompt=begin_message +'\n'+ context_generation + question ,
317
- max_tokens=500,
318
- temperature=0
319
- )
320
- resulta = response.choices[0].text.splitlines()
321
- filtered_list = [item for item in resulta if len(item) >= 10]
322
- return jsonify(filtered_list)
323
- # return jsonify({'question': {'id': question_id, 'date': date, 'texte': texte},'result_qdrant':results})
324
- else:
325
- return jsonify({'error': 'Invalid request'}), 400
326
- except Exception as e:
327
- return jsonify({'error': str(e)}), 500
328
-
329
- # Yazid Methode starts here
330
- @app.route('/ask', methods=['OPTIONS'])
331
- def options_ask():
332
- response = make_response()
333
- response.headers.add("Access-Control-Allow-Origin", "*")
334
- response.headers.add("Access-Control-Allow-Methods", "POST")
335
- response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
336
- response.headers.add("Access-Control-Allow-Credentials", "true")
337
- return response
338
-
339
- @app.route('/ask', methods=['POST'])
340
- def ask_question():
341
- data = request.get_json()
342
- question = data.get('question', '')
343
-
344
- # Call your conversation logic here
345
- result = conversation.ask_question(question)
346
-
347
- return jsonify(result)
348
- # Yazid Methode ends here
349
 
 
350
  if __name__ == '__main__':
351
- app.debug = True
352
- app.run()
 
 
1
+ # This is a sample Python script.
 
 
 
 
 
 
2
 
3
+ # Press ⌃R to execute it or replace it with your code.
4
+ # Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
5
 
6
 
7
+ def print_hi(name):
8
+ # Use a breakpoint in the code line below to debug your script.
9
+ print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint.
 
 
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Press the green button in the gutter to run the script.
13
  if __name__ == '__main__':
14
+ print_hi('PyCharm')
15
+
16
+ # See PyCharm help at https://www.jetbrains.com/help/pycharm/
show_csv.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import csv
3
+
4
+ def display_csv(csv_file_path):
5
+ # CSV processing
6
+ with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
7
+ reader = csv.DictReader(csvfile)
8
+ rows = list(reader)
9
+
10
+ # Display the entire CSV using pandas
11
+ df = pd.DataFrame(rows)
12
+ print("Entire CSV File:")
13
+ print(df['texte'].head(10))
14
+
15
+ if __name__ == "__main__":
16
+ # Replace 'LIR.csv' with the correct path to your CSV file
17
+ csv_file_path = "LIR.csv"
18
+
19
+ display_csv("LIR.csv")