Spaces:

Adr740
/

cross-selling-ai-finder-lite-demo

Sleeping

App Files Files Community

Adr740 commited on Feb 5

Commit

06b126d

•

1 Parent(s): caa663b

Upload 4 files

Browse files

Files changed (4) hide show

app_lite.py +42 -0
data_planeta_february2024.csv +0 -0
get_similar_program.py +84 -0
requirements.txt +6 -0

app_lite.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import gradio as gr
+from functools import partial
+from get_similar_program import search_programs
+import pandas as pd
+title = "Planeta Sales Copilot Confidential Demo [Tatooine] 🛫"
+desc = "IA copilot pour comerciaux. Cette application permet, a partir de la description d'un profil de chercher rapidement dans un vaste catalogue de formation celle qui est la plus pertinente"
+# "This is a tool that helps you find quickly relevant Formations on a topic or a problem you have. Just type in plain English what you are looking for in the box below.\n\n"
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"# {title}")
+    with gr.Row():
+        with gr.Column(scale=8):
+            gr.Markdown("**"+desc+"**")
+        # with gr.Column(scale=8):
+        # status = gr.Markdown("## Statut du prospect")
+    gr.Markdown("## Catalogue de formation")
+    # with gr.Column(scale=4):
+    with gr.Row():
+        with gr.Column(scale = 8):
+            text_area = gr.Textbox(placeholder="Ecrivez ici'", lines=3, label="Décrivez le profil de votre prospect")
+            with gr.Accordion("Tous les résultats:", ):
+                output = gr.Markdown("Vide")
+        with gr.Column(scale = 2):
+            search_button = gr.Button(value="Trouver des Formations")
+            number_to_display = gr.Number(value=10,label = "Nombre de Formations à afficher")
+            augment = gr.Checkbox(label = "Enrichir le profil [IA] (Peut mettre plusieurs secondes))", info="Utiliser l'IA pour enrichir la description du profil")
+            filter = gr.CheckboxGroup(['OBS', 'UNIBA', 'OSTELEA','VIU', 'ESLSCA', 'EAE','ESDESIGN,', 'EGE', 'EDC','SMS', 'SUP DE LUXE', 'IFP','INESDI', 'ROME BUSINESS SCHOOL','BARCELONA  CULINARY HUB'],
+                                      value = ['OBS', 'UNIBA', 'OSTELEA','VIU', 'ESLSCA', 'EAE','ESDESIGN,', 'EGE', 'EDC','SMS', 'SUP DE LUXE', 'IFP','INESDI', 'ROME BUSINESS SCHOOL','BARCELONA  CULINARY HUB'],
+                                      label="Ecole", info="Filtrer par école")
+    search_function = partial(search_programs)
+    search_button.click(fn=search_function, inputs=[text_area,number_to_display, augment, filter], outputs=[output])
+demo.launch(max_threads=40, show_error=True)

data_planeta_february2024.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

get_similar_program.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import numpy as np
+import pandas as pd
+from openai import OpenAI
+import config
+client = OpenAI(api_key=)
+def cosine_similarity(a, b):
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+def _get_embedding(text, model="text-embedding-3-large"):
+    try:
+        text = text.replace("\n", " ")
+    except:
+        None
+    return client.embeddings.create(input = [text], model=model).data[0].embedding
+def augment_user_input(user_input):
+    prompt = f"""
+    Based on the profile of this student, propose a highly detailed bullet point list of training programs in French that could be good for him:
+    {user_input}
+    """
+    augmented_input = client.chat.completions.create(
+            model="gpt-4-turbo-preview",
+            temperature=1,
+            max_tokens = 400,
+            messages=[
+                    {"role": "user", "content": prompt},
+                ],
+            ).choices[0].message.content
+    return f"{user_input}\n{augmented_input}"
+def search_programs(raw_input,nb_programs_to_display=10,augment_input = False, filters = [], path_to_csv = "data_planeta_february2024.csv",):
+    user_input = raw_input
+    if augment_input:
+        user_input = augment_user_input(raw_input)
+    df = pd.read_csv(path_to_csv).dropna(subset=["Embeddings"])
+    if len(filters) != 0:
+        formatted_filters = []
+        for filter in filters:
+            formatted_filters.append(f"\nÉCOLE: {filter}")
+        df = df[df["ÉCOLE"].isin(formatted_filters)].reset_index(drop=True).copy()
+    try:
+        df["embeddings"] = df.Embeddings.apply(lambda x: x["Embeddings"])
+    except:
+        pass
+    try:
+        df["embeddings"] = df.Embeddings.apply(lambda x: np.array(eval(x)))
+    except:
+        pass
+    embedding = _get_embedding(user_input, model="text-embedding-3-large")
+    def wrap_cos(x,y):
+        try:
+            res = cosine_similarity(x,y)
+        except:
+            res = 0
+        return res
+    try:
+        df['similarity'] = df.Embeddings.apply(lambda x: wrap_cos(eval(x), embedding))
+    except:
+        breakpoint()
+    results = df.sort_values('similarity', ascending=False).head(int(nb_programs_to_display)).to_dict(orient="records")
+    final_string = ""
+    i = 1
+    for result in results:
+        content = str(result["summary_french"])
+        extracted_string_program = ""
+        extracted_string_program += content.split("##")[1].split("\n\n")[0]
+        for sub_element in content.split("##")[2:]:
+            extracted_string_program += sub_element
+        extracted_string_program=extracted_string_program.replace("\n# ", "\n### ").replace("55555","###")
+        displayed_string = "##"+extracted_string_program + "\n\n------\n\n"
+        final_string += displayed_string
+        i += 1
+    return final_string

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+openai
+gradio
+sounddevice
+schedule
+wavio
+httpx==0.24.1