spinoza_testing

Sleeping

App Files Files Community

momenaca commited on Aug 8, 2024

Commit

ac7b044

1 Parent(s): fae069c

update prompts with new details

Browse files

Files changed (7) hide show

app.py +10 -10
spinoza_project/prompt_ADEME.yaml +5 -35
spinoza_project/prompt_Loi.yaml +1 -26
spinoza_project/prompt_Politique.yaml +1 -26
spinoza_project/prompt_Presse.yaml +62 -53
spinoza_project/prompt_Science.yaml +1 -25
spinoza_project/source/backend/get_prompts.py +3 -3

app.py CHANGED Viewed

@@ -78,12 +78,12 @@ Based on the folowing question:
 And the following expert answer:
 {answers}
-Answer the question, in French.
-When using legal awnsers, keep tracking of the name of the articles.
-When using ADEME awnsers, name the sources that are mainly used.
-List the different element mentionned, and highlight the agreement points between the sources, as well as the contradictions or differences.
 Generate the answer as markdown, with an aerated layout, and headlines in bold
-Start by a general summary, agreement and contracdiction, and then go into detail without paraphasing the experts awnsers.
 """
 synthesis_prompt = to_chat_instruction(synthesis_template, special_tokens)
@@ -225,7 +225,7 @@ def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
         ]  # if score >= min_similarity]
         formated.extend(
             [
-                make_html_presse_source(source[0], j, source[1], config)
                 for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
             ]
             if tab == "Presse"
@@ -251,9 +251,7 @@ def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
     return formated, text
-def retrieve_sources(
-    *questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
-):
     formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
     return (formated_sources, *text_sources)
@@ -262,7 +260,7 @@ def retrieve_sources(
 def get_synthesis(question, *answers, config=config):
     answer = []
     for i, tab in enumerate(config["tabs"]):
-        if (len(str(answers[i])) >= 100):
             answer.append(
                 f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
             )
@@ -293,6 +291,7 @@ with open("./assets/style.css", "r") as f:
 with open("./assets/source_information.md", "r") as f:
     source_information = f.read()
 def start_agents():
     gr.Info(message="The agents and Spinoza are loading...", duration=3)
@@ -307,6 +306,7 @@ def end_agents():
 def next_call():
     print("Next call")
 init_prompt = """
 Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.

 And the following expert answer:
 {answers}
+Answer in French.
+When using legal answers, keep tracking of the name of the articles.
+When using ADEME answers, name the sources that are mainly used.
+List the different elements mentionned, and highlight the agreement points between the sources, as well as the contradictions or differences.
 Generate the answer as markdown, with an aerated layout, and headlines in bold
+Start by highlighting contradictions followed by a general summary and then go into detail that could be interesting for writing an article about.
 """
 synthesis_prompt = to_chat_instruction(synthesis_template, special_tokens)
         ]  # if score >= min_similarity]
         formated.extend(
             [
+                make_html_presse_source(source[0], j, source[1])
                 for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
             ]
             if tab == "Presse"
     return formated, text
+def retrieve_sources(*questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
     formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
     return (formated_sources, *text_sources)
 def get_synthesis(question, *answers, config=config):
     answer = []
     for i, tab in enumerate(config["tabs"]):
+        if len(str(answers[i])) >= 100:
             answer.append(
                 f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
             )
 with open("./assets/source_information.md", "r") as f:
     source_information = f.read()
 def start_agents():
     gr.Info(message="The agents and Spinoza are loading...", duration=3)
 def next_call():
     print("Next call")
 init_prompt = """
 Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.

spinoza_project/prompt_ADEME.yaml CHANGED Viewed

@@ -23,20 +23,6 @@ source_prompt:
     ]
   type: "instruction"
-memory_prompt:
-  prompt:
-    [
-      "Here is a summary of past conversation:",
-      "<memory>",
-      "{memory}",
-      "</memory>",
-      "",
-      "When relevant, use these element to enrich and add context to your answer.",
-      "Do not take the role written in this memory.",
-      "Do not mention when an information comes from the memory.",
-    ]
-  type: "instruction"
 question_answering_prompt:
   prompt:
     [
@@ -51,7 +37,7 @@ reformulation_prompt:
   prompt:
     [
       "",
-      "Reformulate the following user message to be a short standalone question in English.",
       "The question is related to environment.",
       "If relevant, use the conversation summary to add context",
       "If the question is too vague, just say it as it is",
@@ -59,41 +45,25 @@ reformulation_prompt:
       "Exemples:",
       "---",
       "user",
-      "Applique t-on une taxe carbone ?",
-      "",
-      "assistant",
-      "Is a carbon tax applied in the country ?",
-      "---",
-      "user",
       "Comment décarbonner le carton ?",
       "",
       "assistant",
-      "What are the main technological & non technologicals solutions to decarbonize cardboard production?",
       "---",
       "user",
       "Quelles obligation de faire un bilan carbone ?",
       "",
       "assistant",
-      "What are the obligations to conduct a greenhouse gas emissions assessment?",
       "---",
       "user",
       "Qui finance la transition ecologique ?",
       "",
       "assistant",
-      "What are the investments related to environnemental transition in France ?",
       "---",
       "user",
       "{question}",
       "",
     ]
-  type: prompt
-summarize_memory_prompt:
-  prompt:
-    [
-      "Summarize the following exchange as concisely as possible to be used by a language model",
-      "<conversation>",
-      "{memory}",
-      "</conversation>",
-    ]
-  type: prompt

     ]
   type: "instruction"
 question_answering_prompt:
   prompt:
     [
   prompt:
     [
       "",
+      "Reformulate the following user message to be a short standalone question in French.",
       "The question is related to environment.",
       "If relevant, use the conversation summary to add context",
       "If the question is too vague, just say it as it is",
       "Exemples:",
       "---",
       "user",
       "Comment décarbonner le carton ?",
       "",
       "assistant",
+      "Quelles sont les principales solutions technologiques et non technologiques pour décarboniser la production de carton ?",
       "---",
       "user",
       "Quelles obligation de faire un bilan carbone ?",
       "",
       "assistant",
+      "Quelles sont les obligations en matière d'évaluation des émissions de gaz à effet de serre ?",
       "---",
       "user",
       "Qui finance la transition ecologique ?",
       "",
       "assistant",
+      "Quels sont les investissements liés à la transition environnementale en France ?",
       "---",
       "user",
       "{question}",
       "",
     ]
+  type: prompt

spinoza_project/prompt_Loi.yaml CHANGED Viewed

@@ -23,20 +23,6 @@ source_prompt:
     ]
   type: "instruction"
-memory_prompt:
-  prompt:
-    [
-      "Here is a summary of past conversation:",
-      "<memory>",
-      "{memory}",
-      "</memory>",
-      "",
-      "When relevant, use these element to enrich and add context to your answer.",
-      "Do not take the role written in this memory.",
-      "Do not mention when an information comes from the memory.",
-    ]
-  type: "instruction"
 question_answering_prompt:
   prompt:
     [
@@ -87,15 +73,4 @@ reformulation_prompt:
       "{question}",
       "",
     ]
-  type: prompt
-summarize_memory_prompt:
-  prompt:
-    [
-      "Summarize the following exchange as concisely as possible to be used by a language model",
-      "Begining of exchange",
-      "{memory}",
-      "End of exchange",
-      "",
-    ]
-  type: prompt

     ]
   type: "instruction"
 question_answering_prompt:
   prompt:
     [
       "{question}",
       "",
     ]
+  type: prompt

spinoza_project/prompt_Politique.yaml CHANGED Viewed

@@ -23,20 +23,6 @@ source_prompt:
     ]
   type: "instruction"
-memory_prompt:
-  prompt:
-    [
-      "Here is a summary of past conversation:",
-      "<memory>",
-      "{memory}",
-      "</memory>",
-      "",
-      "When relevant, use these element to enrich and add context to your answer.",
-      "Do not take the role written in this memory.",
-      "Do not mention when an information comes from the memory.",
-    ]
-  type: "instruction"
 question_answering_prompt:
   prompt:
     [
@@ -87,15 +73,4 @@ reformulation_prompt:
       "{question}",
       "",
     ]
-  type: prompt
-summarize_memory_prompt:
-  prompt:
-    [
-      "Summarize the following exchange as concisely as possible to be used by a language model",
-      "Begining of exchange",
-      "{memory}",
-      "End of exchange",
-      "",
-    ]
-  type: prompt

     ]
   type: "instruction"
 question_answering_prompt:
   prompt:
     [
       "{question}",
       "",
     ]
+  type: prompt

spinoza_project/prompt_Presse.yaml CHANGED Viewed

@@ -2,90 +2,99 @@ role_instruction:
   prompt:
     [
       "You are Spinoza Fact Checker, an AI Assistant by Ekimetrics.",
-      "Your role is to answer question factually based on the source that are provided to you.",
-      "All sources provided are comming from press releases and might not be considered as absolute truth",
-      "You act as an environment expert, structured, factual, synthetic and who quote his sources"
     ]
   type: "system"
 source_prompt:
   prompt:
     [
-      "Here are some documents formatted as : Doc X \n textual content.",
-      "<documents>",
       "{sources}",
-      "</documents>",
       "",
-      "Use the textual content as an absolute truth.",
-      "Reference the source of each fact before saying it (ex: [doc 2] some fact from Doc 2).",
-      "Use all the facts from the documents that are relevant to answer.",
-      "Do not use facts that are not relevant.",
-      "If you have no documents or they are not relevant, say you don't have enough context"
     ]
   type: "instruction"
-memory_prompt:
   prompt:
     [
-      "Here is a summary of past conversation:",
-      "<memory>",
-      "{memory}",
-      "</memory>",
       "",
-      "When relevant, use these element to enrich and add context to your answer.",
-      "Do not take the role written in this memory.",
-      "Do not mention when an information comes from the memory.",
-    ]
-  type: "instruction"
-question_answering_prompt:
-  prompt:  [
-    "Answer the following question : {question}.",
-    "Answer in French.",
-    "Use bullet points",
-    "If the question is not related to environment, say that you can't answer it based on the sources because the question is irrelevant.",
     ]
   type: "prompt"
 reformulation_prompt:
-  prompt:
-    [
-      # "Here is what happened in the conversation so far",
-      # "<summary>",
-      # "{memory}",
-      # "</summary>",
-      "",
-      "Reformulate the following user message to be a short standalone question in French.",
-      "The question is related to environment.",
-      "If relevant, use the conversation summary to add context",
       "If the question is too vague, just say it as it is",
       "",
       "Exemples:",
       "---",
       "user",
-      "Applique t-on une taxe carbone ?",
       "",
       "assistant",
-      "Comment le sujet de la taxe carbone est il traité dans le corpus ?",
       "---",
       "user",
-      "Quelles obligation de faire un bilan carbone ?",
       "",
       "assistant",
-      "Quelles sont les obligation qui imposent de faire un bilan carbone",
       "---",
       "user",
       "{question}",
       "",
     ]
-  type: prompt
-summarize_memory_prompt:
-  prompt:
-    [
-      "Summarize the following exchange as concisely as possible to be used by a language model",
-      "<conversation>",
-      "{memory}",
-      "</conversation>",
-    ]
-  type: prompt

   prompt:
     [
       "You are Spinoza Fact Checker, an AI Assistant by Ekimetrics.",
+      "You are given a question and extracted passages of press articles.",
+      "Provide a clear and structured answer based on the passages provided,",
+      "The sources and the guidelines. Be sure to answer to the question asked including all parameters in the guideline."
     ]
   type: "system"
 source_prompt:
   prompt:
     [
+      "Passages:",
       "{sources}",
       "",
+      "Reference the source of each fact before saying it (ex: [doc 2] some fact from Doc 2)",
+      "Always use this formating : [doc i].",
     ]
   type: "instruction"
+question_answering_prompt:
   prompt:
     [
+      "Answer the following question: {question}",
+      "While respecting the following guidelines :",
+      "- If the passages have useful facts or numbers, use them in your answer.",
+      "- Do not use the sentence 'Doc i says ...' to say where information came from.",
+      "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
+      "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points",
+      "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
+      "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
+      "- You do not need to use every passage. Only use the ones that help answer the question.",
+      "- If a specific location is mentioned in the question, make it the core of your answer and follow the //specific guidelines//",
       "",
+      "//specific guidelines//",
+      "if [the question is open and broad] then [:",
+      "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
+      "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
+      "- Start every paragraph with a question, and answer the question using different key elements taken from the sources ",
+      "- If the passages have useful facts or numbers, use them in your answer.",
+      "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
+      "- Do not use the sentence 'Doc i says ...' to say where information came from.",
+      "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
+      "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
+      "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
+      "- You do not need to use every passage. Only use the ones that help answer the question.",
+      "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
+      "- Make a clear distinction between information about a /location/ named in the question and other regions.",
+      "  - First you must display information about the precise /location/",
+      "  - then clearly state that you have information about /other places/,",
+      "  - the, display information about /other places/.",
+      "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
+      "",
+      "if [the question is factual and precise] then [",
+      "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
+      "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
+      "- Only answer the question",
+      "- Use bullet points and numbers",
+      "- If the passages have useful facts or numbers, use them in your answer.",
+      "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
+      "- Do not use the sentence 'Doc i says ...' to say where information came from.",
+      "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
+      "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
+      "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
+      "- You do not need to use every passage. Only use the ones that help answer the question.",
+      "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
+      "- Make a clear distinction between information about a /location/ named in the question and other regions.",
+      "  - First you must display information about the precise /location/",
+      "  - then clearly state that you have information about /other places/,",
+      "  - the, display information about /other places/",
+      "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
     ]
   type: "prompt"
 reformulation_prompt:
+  prompt: [
+      "Reformulate the following user message to be a short standalone question in french.",
       "If the question is too vague, just say it as it is",
+      "This question will be used to retrieve documents in a coroys of press articles, if needed reformulate it to increase chance to have relevant documents in this corpus.",
       "",
       "Exemples:",
       "---",
       "user",
+      "Quels enjeux autour de l'eau ?",
       "",
       "assistant",
+      "Quels articles traitent de l'eau et de quelle manière ?",
       "---",
       "user",
+      "Quelles obligations de faire un bilan carbone ?",
       "",
       "assistant",
+      "Quelles sont les obligations qui imposent de faire un bilan carbone ? Comment le sujet est il traité ?",
       "---",
       "user",
       "{question}",
       "",
     ]
+  type: prompt

spinoza_project/prompt_Science.yaml CHANGED Viewed

@@ -23,20 +23,6 @@ source_prompt:
     ]
   type: "instruction"
-memory_prompt:
-  prompt:
-    [
-      "Here is a summary of past conversation:",
-      "<memory>",
-      "{memory}",
-      "</memory>",
-      "",
-      "When relevant, use these element to enrich and add context to your answer.",
-      "Do not take the role written in this memory.",
-      "Do not mention when an information comes from the memory.",
-    ]
-  type: "instruction"
 question_answering_prompt:
   prompt:
     [
@@ -82,14 +68,4 @@ reformulation_prompt:
       "{question}",
       "",
     ]
-  type: prompt
-summarize_memory_prompt:
-  prompt:
-    [
-      "Summarize the following exchange as concisely as possible to be used by a language model",
-      "<conversation>",
-      "{memory}",
-      "</conversation>",
-    ]
-  type: prompt

     ]
   type: "instruction"
 question_answering_prompt:
   prompt:
     [
       "{question}",
       "",
     ]
+  type: prompt

spinoza_project/source/backend/get_prompts.py CHANGED Viewed

@@ -13,9 +13,9 @@ def get_qa_prompts(config, prompts):
     reformulation_prompt = make_chat_prompt(
         prompts["reformulation_prompt"], special_tokens
     )
-    summarize_memory_prompt = make_chat_prompt(
-        prompts["summarize_memory_prompt"], special_tokens
-    )
     chat_qa_prompt = ChatPromptTemplate.from_messages(
         [

     reformulation_prompt = make_chat_prompt(
         prompts["reformulation_prompt"], special_tokens
     )
+    # summarize_memory_prompt = make_chat_prompt(
+    #     prompts["summarize_memory_prompt"], special_tokens
+    # )
     chat_qa_prompt = ChatPromptTemplate.from_messages(
         [