Upload 4 files
Browse files- app_lite.py +42 -0
- data_planeta_february2024.csv +0 -0
- get_similar_program.py +84 -0
- requirements.txt +6 -0
app_lite.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from functools import partial
|
4 |
+
from get_similar_program import search_programs
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
title = "Planeta Sales Copilot Confidential Demo [Tatooine] 🛫"
|
8 |
+
desc = "IA copilot pour comerciaux. Cette application permet, a partir de la description d'un profil de chercher rapidement dans un vaste catalogue de formation celle qui est la plus pertinente"
|
9 |
+
# "This is a tool that helps you find quickly relevant Formations on a topic or a problem you have. Just type in plain English what you are looking for in the box below.\n\n"
|
10 |
+
|
11 |
+
|
12 |
+
with gr.Blocks(title=title) as demo:
|
13 |
+
gr.Markdown(f"# {title}")
|
14 |
+
with gr.Row():
|
15 |
+
with gr.Column(scale=8):
|
16 |
+
gr.Markdown("**"+desc+"**")
|
17 |
+
# with gr.Column(scale=8):
|
18 |
+
|
19 |
+
# status = gr.Markdown("## Statut du prospect")
|
20 |
+
gr.Markdown("## Catalogue de formation")
|
21 |
+
# with gr.Column(scale=4):
|
22 |
+
with gr.Row():
|
23 |
+
with gr.Column(scale = 8):
|
24 |
+
text_area = gr.Textbox(placeholder="Ecrivez ici'", lines=3, label="Décrivez le profil de votre prospect")
|
25 |
+
with gr.Accordion("Tous les résultats:", ):
|
26 |
+
output = gr.Markdown("Vide")
|
27 |
+
|
28 |
+
with gr.Column(scale = 2):
|
29 |
+
search_button = gr.Button(value="Trouver des Formations")
|
30 |
+
number_to_display = gr.Number(value=10,label = "Nombre de Formations à afficher")
|
31 |
+
augment = gr.Checkbox(label = "Enrichir le profil [IA] (Peut mettre plusieurs secondes))", info="Utiliser l'IA pour enrichir la description du profil")
|
32 |
+
filter = gr.CheckboxGroup(['OBS', 'UNIBA', 'OSTELEA','VIU', 'ESLSCA', 'EAE','ESDESIGN,', 'EGE', 'EDC','SMS', 'SUP DE LUXE', 'IFP','INESDI', 'ROME BUSINESS SCHOOL','BARCELONA CULINARY HUB'],
|
33 |
+
value = ['OBS', 'UNIBA', 'OSTELEA','VIU', 'ESLSCA', 'EAE','ESDESIGN,', 'EGE', 'EDC','SMS', 'SUP DE LUXE', 'IFP','INESDI', 'ROME BUSINESS SCHOOL','BARCELONA CULINARY HUB'],
|
34 |
+
label="Ecole", info="Filtrer par école")
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
search_function = partial(search_programs)
|
39 |
+
|
40 |
+
search_button.click(fn=search_function, inputs=[text_area,number_to_display, augment, filter], outputs=[output])
|
41 |
+
|
42 |
+
demo.launch(max_threads=40, show_error=True)
|
data_planeta_february2024.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
get_similar_program.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from openai import OpenAI
|
4 |
+
import config
|
5 |
+
|
6 |
+
client = OpenAI(api_key=)
|
7 |
+
|
8 |
+
def cosine_similarity(a, b):
|
9 |
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
10 |
+
|
11 |
+
def _get_embedding(text, model="text-embedding-3-large"):
|
12 |
+
try:
|
13 |
+
text = text.replace("\n", " ")
|
14 |
+
except:
|
15 |
+
None
|
16 |
+
return client.embeddings.create(input = [text], model=model).data[0].embedding
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
def augment_user_input(user_input):
|
21 |
+
|
22 |
+
prompt = f"""
|
23 |
+
Based on the profile of this student, propose a highly detailed bullet point list of training programs in French that could be good for him:
|
24 |
+
|
25 |
+
{user_input}
|
26 |
+
"""
|
27 |
+
augmented_input = client.chat.completions.create(
|
28 |
+
model="gpt-4-turbo-preview",
|
29 |
+
temperature=1,
|
30 |
+
max_tokens = 400,
|
31 |
+
messages=[
|
32 |
+
{"role": "user", "content": prompt},
|
33 |
+
],
|
34 |
+
).choices[0].message.content
|
35 |
+
return f"{user_input}\n{augmented_input}"
|
36 |
+
|
37 |
+
def search_programs(raw_input,nb_programs_to_display=10,augment_input = False, filters = [], path_to_csv = "data_planeta_february2024.csv",):
|
38 |
+
user_input = raw_input
|
39 |
+
if augment_input:
|
40 |
+
user_input = augment_user_input(raw_input)
|
41 |
+
df = pd.read_csv(path_to_csv).dropna(subset=["Embeddings"])
|
42 |
+
if len(filters) != 0:
|
43 |
+
formatted_filters = []
|
44 |
+
for filter in filters:
|
45 |
+
formatted_filters.append(f"\nÉCOLE: {filter}")
|
46 |
+
df = df[df["ÉCOLE"].isin(formatted_filters)].reset_index(drop=True).copy()
|
47 |
+
try:
|
48 |
+
df["embeddings"] = df.Embeddings.apply(lambda x: x["Embeddings"])
|
49 |
+
except:
|
50 |
+
pass
|
51 |
+
try:
|
52 |
+
df["embeddings"] = df.Embeddings.apply(lambda x: np.array(eval(x)))
|
53 |
+
except:
|
54 |
+
pass
|
55 |
+
embedding = _get_embedding(user_input, model="text-embedding-3-large")
|
56 |
+
def wrap_cos(x,y):
|
57 |
+
try:
|
58 |
+
res = cosine_similarity(x,y)
|
59 |
+
except:
|
60 |
+
res = 0
|
61 |
+
return res
|
62 |
+
try:
|
63 |
+
df['similarity'] = df.Embeddings.apply(lambda x: wrap_cos(eval(x), embedding))
|
64 |
+
except:
|
65 |
+
breakpoint()
|
66 |
+
results = df.sort_values('similarity', ascending=False).head(int(nb_programs_to_display)).to_dict(orient="records")
|
67 |
+
final_string = ""
|
68 |
+
i = 1
|
69 |
+
for result in results:
|
70 |
+
content = str(result["summary_french"])
|
71 |
+
extracted_string_program = ""
|
72 |
+
extracted_string_program += content.split("##")[1].split("\n\n")[0]
|
73 |
+
|
74 |
+
for sub_element in content.split("##")[2:]:
|
75 |
+
extracted_string_program += sub_element
|
76 |
+
|
77 |
+
extracted_string_program=extracted_string_program.replace("\n# ", "\n### ").replace("55555","###")
|
78 |
+
|
79 |
+
displayed_string = "##"+extracted_string_program + "\n\n------\n\n"
|
80 |
+
|
81 |
+
final_string += displayed_string
|
82 |
+
i += 1
|
83 |
+
|
84 |
+
return final_string
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
gradio
|
3 |
+
sounddevice
|
4 |
+
schedule
|
5 |
+
wavio
|
6 |
+
httpx==0.24.1
|