MarcdeFalco commited on
Commit
f7b57e5
·
1 Parent(s): d78dcaa

Update faiss DB with better chunking, added tabs and visualization

Browse files
app.py CHANGED
@@ -3,6 +3,8 @@ from huggingface_hub import login, InferenceClient
3
  import os
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
6
 
7
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
8
 
@@ -16,6 +18,18 @@ db_code = FAISS.load_local("faiss_code_education",
16
  embeddings,
17
  allow_dangerous_deserialization=True)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  system_prompt = """Tu es un assistant juridique spécialisé dans le Code de l'éducation français.
20
  Ta mission est d'aider les utilisateurs à comprendre la législation en répondant à leurs questions.
21
 
@@ -45,6 +59,9 @@ def query_rag(query, model, system_prompt):
45
  messages = [ { "role" : "system", "content" : system_prompt } ]
46
  messages.append( { "role" : "user", "content" : user } )
47
 
 
 
 
48
  chat_completion = client.chat_completion(
49
  messages=messages,
50
  model=model,
@@ -53,17 +70,27 @@ def query_rag(query, model, system_prompt):
53
  return chat_completion.choices[0].message.content, article_dict
54
 
55
  def create_context_response(response, article_dict):
56
- response += '\n\n**Références**\n\n'
57
  for i, article in enumerate(article_dict):
58
  art = article_dict[article]
59
- response += '* **' + art['chemin'] + '** : '+ art['texte'].replace('\n', '\n ')+'\n'
60
-
61
- return response
62
 
63
  def chat_interface(query, model, system_prompt):
64
  response, article_dict = query_rag(query, model, system_prompt)
65
- response_with_context = create_context_response(response, article_dict)
66
- return response_with_context
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  with gr.Blocks(title="Assistant Juridique pour le Code de l'éducation (Beta)") as demo:
69
  gr.Markdown(
@@ -87,18 +114,30 @@ with gr.Blocks(title="Assistant Juridique pour le Code de l'éducation (Beta)")
87
  "meta-llama/Meta-Llama-3-8B-Instruct",
88
  "HuggingFaceH4/zephyr-7b-beta",
89
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
90
- "mistralai/Mixtral-8x22B-v0.1"
 
91
  ],
92
  value="meta-llama/Meta-Llama-3-70B-Instruct")
93
 
94
  submit_button = gr.Button("Envoyer")
95
 
96
- response_box = gr.Markdown()
97
-
98
- system_box = gr.Textbox(label="Invite systeme", value=system_prompt)
 
 
 
 
 
 
 
 
 
 
99
 
100
  submit_button.click(chat_interface,
101
  inputs=[query_box, model, system_box],
102
- outputs=[response_box])
 
103
 
104
  demo.launch()
 
3
  import os
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ import umap
7
+ import pandas as pd
8
 
9
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
 
 
18
  embeddings,
19
  allow_dangerous_deserialization=True)
20
 
21
+ reducer = umap.UMAP()
22
+ index = db_code.index
23
+ ntotal = min(index.ntotal, 4998)
24
+ embeds = index.reconstruct_n(0, ntotal)
25
+ umap_embeds = reducer.fit_transform(embeds)
26
+
27
+ articles_df = pd.DataFrame({
28
+ "x" : umap_embeds[:,0],
29
+ "y" : umap_embeds[:,1],
30
+ "type" : [ "Source" ] * len(umap_embeds),
31
+ })
32
+
33
  system_prompt = """Tu es un assistant juridique spécialisé dans le Code de l'éducation français.
34
  Ta mission est d'aider les utilisateurs à comprendre la législation en répondant à leurs questions.
35
 
 
59
  messages = [ { "role" : "system", "content" : system_prompt } ]
60
  messages.append( { "role" : "user", "content" : user } )
61
 
62
+ if "factice" in model:
63
+ return user, article_dict
64
+
65
  chat_completion = client.chat_completion(
66
  messages=messages,
67
  model=model,
 
70
  return chat_completion.choices[0].message.content, article_dict
71
 
72
  def create_context_response(response, article_dict):
73
+ context = '\n'
74
  for i, article in enumerate(article_dict):
75
  art = article_dict[article]
76
+ context += '* **' + art['chemin'] + '** : '+ art['texte'].replace('\n', '\n ')+'\n'
77
+ return context
 
78
 
79
  def chat_interface(query, model, system_prompt):
80
  response, article_dict = query_rag(query, model, system_prompt)
81
+ context = create_context_response(response, article_dict)
82
+ return response, context
83
+
84
+ def update_plot(query):
85
+ query_embed = embeddings.embed_documents([query])[0]
86
+ query_umap_embed = reducer.transform([query_embed])
87
+
88
+ data = {
89
+ "x": umap_embeds[:, 0].tolist() + [query_umap_embed[0, 0]],
90
+ "y": umap_embeds[:, 1].tolist() + [query_umap_embed[0, 1]],
91
+ "type": ["Source"] * len(umap_embeds) + ["Requête"]
92
+ }
93
+ return pd.DataFrame(data)
94
 
95
  with gr.Blocks(title="Assistant Juridique pour le Code de l'éducation (Beta)") as demo:
96
  gr.Markdown(
 
114
  "meta-llama/Meta-Llama-3-8B-Instruct",
115
  "HuggingFaceH4/zephyr-7b-beta",
116
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
117
+ "mistralai/Mixtral-8x22B-v0.1",
118
+ "factice: question+contexte"
119
  ],
120
  value="meta-llama/Meta-Llama-3-70B-Instruct")
121
 
122
  submit_button = gr.Button("Envoyer")
123
 
124
+ with gr.Tab(label="Réponse"):
125
+ response_box = gr.Markdown()
126
+ with gr.Tab(label="Sources"):
127
+ sources_box = gr.Markdown()
128
+ with gr.Tab(label="Visualisation"):
129
+ scatter_plot = gr.ScatterPlot(articles_df,
130
+ x = "x", y = "y",
131
+ color="type",
132
+ label="Visualisation des embeddings",
133
+ height=500)
134
+ with gr.Tab(label="Paramètres"):
135
+ system_box = gr.Textbox(label="Invite systeme", value=system_prompt,
136
+ lines=20)
137
 
138
  submit_button.click(chat_interface,
139
  inputs=[query_box, model, system_box],
140
+ outputs=[response_box, sources_box])
141
+ submit_button.click(update_plot, inputs=[query_box], outputs=[scatter_plot])
142
 
143
  demo.launch()
faiss_code_education/index.faiss CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c2df7172919daa30d4dcb6b540cc96d5f5737da11588a0a53f09feb7391d6a2
3
- size 27717677
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7eeb956ab6ac2e4a131002847ea78318d7af3574dc73ac8cccc76f12424d13
3
+ size 21831725
faiss_code_education/index.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8101642c4967dbc8bc0977b2a3b53cde856a00cd4381f490d6232954fba077d
3
- size 13271108
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e796a35535640aa94b8521f629c08c40b9b04892c8dcd40a15459abf0833fe8e
3
+ size 6466363