Spaces:
Sleeping
Sleeping
Commit
·
f7b57e5
1
Parent(s):
d78dcaa
Update faiss DB with better chunking, added tabs and visualization
Browse files- app.py +50 -11
- faiss_code_education/index.faiss +2 -2
- faiss_code_education/index.pkl +2 -2
app.py
CHANGED
@@ -3,6 +3,8 @@ from huggingface_hub import login, InferenceClient
|
|
3 |
import os
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
|
6 |
|
7 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
8 |
|
@@ -16,6 +18,18 @@ db_code = FAISS.load_local("faiss_code_education",
|
|
16 |
embeddings,
|
17 |
allow_dangerous_deserialization=True)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
system_prompt = """Tu es un assistant juridique spécialisé dans le Code de l'éducation français.
|
20 |
Ta mission est d'aider les utilisateurs à comprendre la législation en répondant à leurs questions.
|
21 |
|
@@ -45,6 +59,9 @@ def query_rag(query, model, system_prompt):
|
|
45 |
messages = [ { "role" : "system", "content" : system_prompt } ]
|
46 |
messages.append( { "role" : "user", "content" : user } )
|
47 |
|
|
|
|
|
|
|
48 |
chat_completion = client.chat_completion(
|
49 |
messages=messages,
|
50 |
model=model,
|
@@ -53,17 +70,27 @@ def query_rag(query, model, system_prompt):
|
|
53 |
return chat_completion.choices[0].message.content, article_dict
|
54 |
|
55 |
def create_context_response(response, article_dict):
|
56 |
-
|
57 |
for i, article in enumerate(article_dict):
|
58 |
art = article_dict[article]
|
59 |
-
|
60 |
-
|
61 |
-
return response
|
62 |
|
63 |
def chat_interface(query, model, system_prompt):
|
64 |
response, article_dict = query_rag(query, model, system_prompt)
|
65 |
-
|
66 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
with gr.Blocks(title="Assistant Juridique pour le Code de l'éducation (Beta)") as demo:
|
69 |
gr.Markdown(
|
@@ -87,18 +114,30 @@ with gr.Blocks(title="Assistant Juridique pour le Code de l'éducation (Beta)")
|
|
87 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
88 |
"HuggingFaceH4/zephyr-7b-beta",
|
89 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
90 |
-
"mistralai/Mixtral-8x22B-v0.1"
|
|
|
91 |
],
|
92 |
value="meta-llama/Meta-Llama-3-70B-Instruct")
|
93 |
|
94 |
submit_button = gr.Button("Envoyer")
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
submit_button.click(chat_interface,
|
101 |
inputs=[query_box, model, system_box],
|
102 |
-
outputs=[response_box])
|
|
|
103 |
|
104 |
demo.launch()
|
|
|
3 |
import os
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
6 |
+
import umap
|
7 |
+
import pandas as pd
|
8 |
|
9 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
10 |
|
|
|
18 |
embeddings,
|
19 |
allow_dangerous_deserialization=True)
|
20 |
|
21 |
+
reducer = umap.UMAP()
|
22 |
+
index = db_code.index
|
23 |
+
ntotal = min(index.ntotal, 4998)
|
24 |
+
embeds = index.reconstruct_n(0, ntotal)
|
25 |
+
umap_embeds = reducer.fit_transform(embeds)
|
26 |
+
|
27 |
+
articles_df = pd.DataFrame({
|
28 |
+
"x" : umap_embeds[:,0],
|
29 |
+
"y" : umap_embeds[:,1],
|
30 |
+
"type" : [ "Source" ] * len(umap_embeds),
|
31 |
+
})
|
32 |
+
|
33 |
system_prompt = """Tu es un assistant juridique spécialisé dans le Code de l'éducation français.
|
34 |
Ta mission est d'aider les utilisateurs à comprendre la législation en répondant à leurs questions.
|
35 |
|
|
|
59 |
messages = [ { "role" : "system", "content" : system_prompt } ]
|
60 |
messages.append( { "role" : "user", "content" : user } )
|
61 |
|
62 |
+
if "factice" in model:
|
63 |
+
return user, article_dict
|
64 |
+
|
65 |
chat_completion = client.chat_completion(
|
66 |
messages=messages,
|
67 |
model=model,
|
|
|
70 |
return chat_completion.choices[0].message.content, article_dict
|
71 |
|
72 |
def create_context_response(response, article_dict):
|
73 |
+
context = '\n'
|
74 |
for i, article in enumerate(article_dict):
|
75 |
art = article_dict[article]
|
76 |
+
context += '* **' + art['chemin'] + '** : '+ art['texte'].replace('\n', '\n ')+'\n'
|
77 |
+
return context
|
|
|
78 |
|
79 |
def chat_interface(query, model, system_prompt):
|
80 |
response, article_dict = query_rag(query, model, system_prompt)
|
81 |
+
context = create_context_response(response, article_dict)
|
82 |
+
return response, context
|
83 |
+
|
84 |
+
def update_plot(query):
|
85 |
+
query_embed = embeddings.embed_documents([query])[0]
|
86 |
+
query_umap_embed = reducer.transform([query_embed])
|
87 |
+
|
88 |
+
data = {
|
89 |
+
"x": umap_embeds[:, 0].tolist() + [query_umap_embed[0, 0]],
|
90 |
+
"y": umap_embeds[:, 1].tolist() + [query_umap_embed[0, 1]],
|
91 |
+
"type": ["Source"] * len(umap_embeds) + ["Requête"]
|
92 |
+
}
|
93 |
+
return pd.DataFrame(data)
|
94 |
|
95 |
with gr.Blocks(title="Assistant Juridique pour le Code de l'éducation (Beta)") as demo:
|
96 |
gr.Markdown(
|
|
|
114 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
115 |
"HuggingFaceH4/zephyr-7b-beta",
|
116 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
117 |
+
"mistralai/Mixtral-8x22B-v0.1",
|
118 |
+
"factice: question+contexte"
|
119 |
],
|
120 |
value="meta-llama/Meta-Llama-3-70B-Instruct")
|
121 |
|
122 |
submit_button = gr.Button("Envoyer")
|
123 |
|
124 |
+
with gr.Tab(label="Réponse"):
|
125 |
+
response_box = gr.Markdown()
|
126 |
+
with gr.Tab(label="Sources"):
|
127 |
+
sources_box = gr.Markdown()
|
128 |
+
with gr.Tab(label="Visualisation"):
|
129 |
+
scatter_plot = gr.ScatterPlot(articles_df,
|
130 |
+
x = "x", y = "y",
|
131 |
+
color="type",
|
132 |
+
label="Visualisation des embeddings",
|
133 |
+
height=500)
|
134 |
+
with gr.Tab(label="Paramètres"):
|
135 |
+
system_box = gr.Textbox(label="Invite systeme", value=system_prompt,
|
136 |
+
lines=20)
|
137 |
|
138 |
submit_button.click(chat_interface,
|
139 |
inputs=[query_box, model, system_box],
|
140 |
+
outputs=[response_box, sources_box])
|
141 |
+
submit_button.click(update_plot, inputs=[query_box], outputs=[scatter_plot])
|
142 |
|
143 |
demo.launch()
|
faiss_code_education/index.faiss
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b7eeb956ab6ac2e4a131002847ea78318d7af3574dc73ac8cccc76f12424d13
|
3 |
+
size 21831725
|
faiss_code_education/index.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e796a35535640aa94b8521f629c08c40b9b04892c8dcd40a15459abf0833fe8e
|
3 |
+
size 6466363
|