Adr740 commited on
Commit
06b126d
1 Parent(s): caa663b

Upload 4 files

Browse files
app_lite.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from functools import partial
4
+ from get_similar_program import search_programs
5
+ import pandas as pd
6
+
7
+ title = "Planeta Sales Copilot Confidential Demo [Tatooine] 🛫"
8
+ desc = "IA copilot pour comerciaux. Cette application permet, a partir de la description d'un profil de chercher rapidement dans un vaste catalogue de formation celle qui est la plus pertinente"
9
+ # "This is a tool that helps you find quickly relevant Formations on a topic or a problem you have. Just type in plain English what you are looking for in the box below.\n\n"
10
+
11
+
12
+ with gr.Blocks(title=title) as demo:
13
+ gr.Markdown(f"# {title}")
14
+ with gr.Row():
15
+ with gr.Column(scale=8):
16
+ gr.Markdown("**"+desc+"**")
17
+ # with gr.Column(scale=8):
18
+
19
+ # status = gr.Markdown("## Statut du prospect")
20
+ gr.Markdown("## Catalogue de formation")
21
+ # with gr.Column(scale=4):
22
+ with gr.Row():
23
+ with gr.Column(scale = 8):
24
+ text_area = gr.Textbox(placeholder="Ecrivez ici'", lines=3, label="Décrivez le profil de votre prospect")
25
+ with gr.Accordion("Tous les résultats:", ):
26
+ output = gr.Markdown("Vide")
27
+
28
+ with gr.Column(scale = 2):
29
+ search_button = gr.Button(value="Trouver des Formations")
30
+ number_to_display = gr.Number(value=10,label = "Nombre de Formations à afficher")
31
+ augment = gr.Checkbox(label = "Enrichir le profil [IA] (Peut mettre plusieurs secondes))", info="Utiliser l'IA pour enrichir la description du profil")
32
+ filter = gr.CheckboxGroup(['OBS', 'UNIBA', 'OSTELEA','VIU', 'ESLSCA', 'EAE','ESDESIGN,', 'EGE', 'EDC','SMS', 'SUP DE LUXE', 'IFP','INESDI', 'ROME BUSINESS SCHOOL','BARCELONA CULINARY HUB'],
33
+ value = ['OBS', 'UNIBA', 'OSTELEA','VIU', 'ESLSCA', 'EAE','ESDESIGN,', 'EGE', 'EDC','SMS', 'SUP DE LUXE', 'IFP','INESDI', 'ROME BUSINESS SCHOOL','BARCELONA CULINARY HUB'],
34
+ label="Ecole", info="Filtrer par école")
35
+
36
+
37
+
38
+ search_function = partial(search_programs)
39
+
40
+ search_button.click(fn=search_function, inputs=[text_area,number_to_display, augment, filter], outputs=[output])
41
+
42
+ demo.launch(max_threads=40, show_error=True)
data_planeta_february2024.csv ADDED
The diff for this file is too large to render. See raw diff
 
get_similar_program.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from openai import OpenAI
4
+ import config
5
+
6
+ client = OpenAI(api_key=)
7
+
8
+ def cosine_similarity(a, b):
9
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
10
+
11
+ def _get_embedding(text, model="text-embedding-3-large"):
12
+ try:
13
+ text = text.replace("\n", " ")
14
+ except:
15
+ None
16
+ return client.embeddings.create(input = [text], model=model).data[0].embedding
17
+
18
+
19
+
20
+ def augment_user_input(user_input):
21
+
22
+ prompt = f"""
23
+ Based on the profile of this student, propose a highly detailed bullet point list of training programs in French that could be good for him:
24
+
25
+ {user_input}
26
+ """
27
+ augmented_input = client.chat.completions.create(
28
+ model="gpt-4-turbo-preview",
29
+ temperature=1,
30
+ max_tokens = 400,
31
+ messages=[
32
+ {"role": "user", "content": prompt},
33
+ ],
34
+ ).choices[0].message.content
35
+ return f"{user_input}\n{augmented_input}"
36
+
37
+ def search_programs(raw_input,nb_programs_to_display=10,augment_input = False, filters = [], path_to_csv = "data_planeta_february2024.csv",):
38
+ user_input = raw_input
39
+ if augment_input:
40
+ user_input = augment_user_input(raw_input)
41
+ df = pd.read_csv(path_to_csv).dropna(subset=["Embeddings"])
42
+ if len(filters) != 0:
43
+ formatted_filters = []
44
+ for filter in filters:
45
+ formatted_filters.append(f"\nÉCOLE: {filter}")
46
+ df = df[df["ÉCOLE"].isin(formatted_filters)].reset_index(drop=True).copy()
47
+ try:
48
+ df["embeddings"] = df.Embeddings.apply(lambda x: x["Embeddings"])
49
+ except:
50
+ pass
51
+ try:
52
+ df["embeddings"] = df.Embeddings.apply(lambda x: np.array(eval(x)))
53
+ except:
54
+ pass
55
+ embedding = _get_embedding(user_input, model="text-embedding-3-large")
56
+ def wrap_cos(x,y):
57
+ try:
58
+ res = cosine_similarity(x,y)
59
+ except:
60
+ res = 0
61
+ return res
62
+ try:
63
+ df['similarity'] = df.Embeddings.apply(lambda x: wrap_cos(eval(x), embedding))
64
+ except:
65
+ breakpoint()
66
+ results = df.sort_values('similarity', ascending=False).head(int(nb_programs_to_display)).to_dict(orient="records")
67
+ final_string = ""
68
+ i = 1
69
+ for result in results:
70
+ content = str(result["summary_french"])
71
+ extracted_string_program = ""
72
+ extracted_string_program += content.split("##")[1].split("\n\n")[0]
73
+
74
+ for sub_element in content.split("##")[2:]:
75
+ extracted_string_program += sub_element
76
+
77
+ extracted_string_program=extracted_string_program.replace("\n# ", "\n### ").replace("55555","###")
78
+
79
+ displayed_string = "##"+extracted_string_program + "\n\n------\n\n"
80
+
81
+ final_string += displayed_string
82
+ i += 1
83
+
84
+ return final_string
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ sounddevice
4
+ schedule
5
+ wavio
6
+ httpx==0.24.1