momenaca commited on
Commit
ac7b044
·
1 Parent(s): fae069c

update prompts with new details

Browse files
app.py CHANGED
@@ -78,12 +78,12 @@ Based on the folowing question:
78
  And the following expert answer:
79
  {answers}
80
 
81
- Answer the question, in French.
82
- When using legal awnsers, keep tracking of the name of the articles.
83
- When using ADEME awnsers, name the sources that are mainly used.
84
- List the different element mentionned, and highlight the agreement points between the sources, as well as the contradictions or differences.
85
  Generate the answer as markdown, with an aerated layout, and headlines in bold
86
- Start by a general summary, agreement and contracdiction, and then go into detail without paraphasing the experts awnsers.
87
  """
88
 
89
  synthesis_prompt = to_chat_instruction(synthesis_template, special_tokens)
@@ -225,7 +225,7 @@ def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
225
  ] # if score >= min_similarity]
226
  formated.extend(
227
  [
228
- make_html_presse_source(source[0], j, source[1], config)
229
  for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
230
  ]
231
  if tab == "Presse"
@@ -251,9 +251,7 @@ def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
251
  return formated, text
252
 
253
 
254
- def retrieve_sources(
255
- *questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
256
- ):
257
  formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
258
 
259
  return (formated_sources, *text_sources)
@@ -262,7 +260,7 @@ def retrieve_sources(
262
  def get_synthesis(question, *answers, config=config):
263
  answer = []
264
  for i, tab in enumerate(config["tabs"]):
265
- if (len(str(answers[i])) >= 100):
266
  answer.append(
267
  f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
268
  )
@@ -293,6 +291,7 @@ with open("./assets/style.css", "r") as f:
293
  with open("./assets/source_information.md", "r") as f:
294
  source_information = f.read()
295
 
 
296
  def start_agents():
297
  gr.Info(message="The agents and Spinoza are loading...", duration=3)
298
 
@@ -307,6 +306,7 @@ def end_agents():
307
  def next_call():
308
  print("Next call")
309
 
 
310
  init_prompt = """
311
  Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.
312
 
 
78
  And the following expert answer:
79
  {answers}
80
 
81
+ Answer in French.
82
+ When using legal answers, keep tracking of the name of the articles.
83
+ When using ADEME answers, name the sources that are mainly used.
84
+ List the different elements mentionned, and highlight the agreement points between the sources, as well as the contradictions or differences.
85
  Generate the answer as markdown, with an aerated layout, and headlines in bold
86
+ Start by highlighting contradictions followed by a general summary and then go into detail that could be interesting for writing an article about.
87
  """
88
 
89
  synthesis_prompt = to_chat_instruction(synthesis_template, special_tokens)
 
225
  ] # if score >= min_similarity]
226
  formated.extend(
227
  [
228
+ make_html_presse_source(source[0], j, source[1])
229
  for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
230
  ]
231
  if tab == "Presse"
 
251
  return formated, text
252
 
253
 
254
+ def retrieve_sources(*questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
 
 
255
  formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
256
 
257
  return (formated_sources, *text_sources)
 
260
  def get_synthesis(question, *answers, config=config):
261
  answer = []
262
  for i, tab in enumerate(config["tabs"]):
263
+ if len(str(answers[i])) >= 100:
264
  answer.append(
265
  f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
266
  )
 
291
  with open("./assets/source_information.md", "r") as f:
292
  source_information = f.read()
293
 
294
+
295
  def start_agents():
296
  gr.Info(message="The agents and Spinoza are loading...", duration=3)
297
 
 
306
  def next_call():
307
  print("Next call")
308
 
309
+
310
  init_prompt = """
311
  Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.
312
 
spinoza_project/prompt_ADEME.yaml CHANGED
@@ -23,20 +23,6 @@ source_prompt:
23
  ]
24
  type: "instruction"
25
 
26
- memory_prompt:
27
- prompt:
28
- [
29
- "Here is a summary of past conversation:",
30
- "<memory>",
31
- "{memory}",
32
- "</memory>",
33
- "",
34
- "When relevant, use these element to enrich and add context to your answer.",
35
- "Do not take the role written in this memory.",
36
- "Do not mention when an information comes from the memory.",
37
- ]
38
- type: "instruction"
39
-
40
  question_answering_prompt:
41
  prompt:
42
  [
@@ -51,7 +37,7 @@ reformulation_prompt:
51
  prompt:
52
  [
53
  "",
54
- "Reformulate the following user message to be a short standalone question in English.",
55
  "The question is related to environment.",
56
  "If relevant, use the conversation summary to add context",
57
  "If the question is too vague, just say it as it is",
@@ -59,41 +45,25 @@ reformulation_prompt:
59
  "Exemples:",
60
  "---",
61
  "user",
62
- "Applique t-on une taxe carbone ?",
63
- "",
64
- "assistant",
65
- "Is a carbon tax applied in the country ?",
66
- "---",
67
- "user",
68
  "Comment décarbonner le carton ?",
69
  "",
70
  "assistant",
71
- "What are the main technological & non technologicals solutions to decarbonize cardboard production?",
72
  "---",
73
  "user",
74
  "Quelles obligation de faire un bilan carbone ?",
75
  "",
76
  "assistant",
77
- "What are the obligations to conduct a greenhouse gas emissions assessment?",
78
  "---",
79
  "user",
80
  "Qui finance la transition ecologique ?",
81
  "",
82
  "assistant",
83
- "What are the investments related to environnemental transition in France ?",
84
  "---",
85
  "user",
86
  "{question}",
87
  "",
88
  ]
89
- type: prompt
90
-
91
- summarize_memory_prompt:
92
- prompt:
93
- [
94
- "Summarize the following exchange as concisely as possible to be used by a language model",
95
- "<conversation>",
96
- "{memory}",
97
- "</conversation>",
98
- ]
99
- type: prompt
 
23
  ]
24
  type: "instruction"
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  question_answering_prompt:
27
  prompt:
28
  [
 
37
  prompt:
38
  [
39
  "",
40
+ "Reformulate the following user message to be a short standalone question in French.",
41
  "The question is related to environment.",
42
  "If relevant, use the conversation summary to add context",
43
  "If the question is too vague, just say it as it is",
 
45
  "Exemples:",
46
  "---",
47
  "user",
 
 
 
 
 
 
48
  "Comment décarbonner le carton ?",
49
  "",
50
  "assistant",
51
+ "Quelles sont les principales solutions technologiques et non technologiques pour décarboniser la production de carton ?",
52
  "---",
53
  "user",
54
  "Quelles obligation de faire un bilan carbone ?",
55
  "",
56
  "assistant",
57
+ "Quelles sont les obligations en matière d'évaluation des émissions de gaz à effet de serre ?",
58
  "---",
59
  "user",
60
  "Qui finance la transition ecologique ?",
61
  "",
62
  "assistant",
63
+ "Quels sont les investissements liés à la transition environnementale en France ?",
64
  "---",
65
  "user",
66
  "{question}",
67
  "",
68
  ]
69
+ type: prompt
 
 
 
 
 
 
 
 
 
 
spinoza_project/prompt_Loi.yaml CHANGED
@@ -23,20 +23,6 @@ source_prompt:
23
  ]
24
  type: "instruction"
25
 
26
- memory_prompt:
27
- prompt:
28
- [
29
- "Here is a summary of past conversation:",
30
- "<memory>",
31
- "{memory}",
32
- "</memory>",
33
- "",
34
- "When relevant, use these element to enrich and add context to your answer.",
35
- "Do not take the role written in this memory.",
36
- "Do not mention when an information comes from the memory.",
37
- ]
38
- type: "instruction"
39
-
40
  question_answering_prompt:
41
  prompt:
42
  [
@@ -87,15 +73,4 @@ reformulation_prompt:
87
  "{question}",
88
  "",
89
  ]
90
- type: prompt
91
-
92
- summarize_memory_prompt:
93
- prompt:
94
- [
95
- "Summarize the following exchange as concisely as possible to be used by a language model",
96
- "Begining of exchange",
97
- "{memory}",
98
- "End of exchange",
99
- "",
100
- ]
101
- type: prompt
 
23
  ]
24
  type: "instruction"
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  question_answering_prompt:
27
  prompt:
28
  [
 
73
  "{question}",
74
  "",
75
  ]
76
+ type: prompt
 
 
 
 
 
 
 
 
 
 
 
spinoza_project/prompt_Politique.yaml CHANGED
@@ -23,20 +23,6 @@ source_prompt:
23
  ]
24
  type: "instruction"
25
 
26
- memory_prompt:
27
- prompt:
28
- [
29
- "Here is a summary of past conversation:",
30
- "<memory>",
31
- "{memory}",
32
- "</memory>",
33
- "",
34
- "When relevant, use these element to enrich and add context to your answer.",
35
- "Do not take the role written in this memory.",
36
- "Do not mention when an information comes from the memory.",
37
- ]
38
- type: "instruction"
39
-
40
  question_answering_prompt:
41
  prompt:
42
  [
@@ -87,15 +73,4 @@ reformulation_prompt:
87
  "{question}",
88
  "",
89
  ]
90
- type: prompt
91
-
92
- summarize_memory_prompt:
93
- prompt:
94
- [
95
- "Summarize the following exchange as concisely as possible to be used by a language model",
96
- "Begining of exchange",
97
- "{memory}",
98
- "End of exchange",
99
- "",
100
- ]
101
- type: prompt
 
23
  ]
24
  type: "instruction"
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  question_answering_prompt:
27
  prompt:
28
  [
 
73
  "{question}",
74
  "",
75
  ]
76
+ type: prompt
 
 
 
 
 
 
 
 
 
 
 
spinoza_project/prompt_Presse.yaml CHANGED
@@ -2,90 +2,99 @@ role_instruction:
2
  prompt:
3
  [
4
  "You are Spinoza Fact Checker, an AI Assistant by Ekimetrics.",
5
- "Your role is to answer question factually based on the source that are provided to you.",
6
- "All sources provided are comming from press releases and might not be considered as absolute truth",
7
- "You act as an environment expert, structured, factual, synthetic and who quote his sources"
8
  ]
9
  type: "system"
10
 
11
  source_prompt:
12
  prompt:
13
  [
14
- "Here are some documents formatted as : Doc X \n textual content.",
15
- "<documents>",
16
  "{sources}",
17
- "</documents>",
18
  "",
19
- "Use the textual content as an absolute truth.",
20
- "Reference the source of each fact before saying it (ex: [doc 2] some fact from Doc 2).",
21
- "Use all the facts from the documents that are relevant to answer.",
22
- "Do not use facts that are not relevant.",
23
- "If you have no documents or they are not relevant, say you don't have enough context"
24
  ]
25
  type: "instruction"
26
 
27
- memory_prompt:
28
  prompt:
29
  [
30
- "Here is a summary of past conversation:",
31
- "<memory>",
32
- "{memory}",
33
- "</memory>",
 
 
 
 
 
 
34
  "",
35
- "When relevant, use these element to enrich and add context to your answer.",
36
- "Do not take the role written in this memory.",
37
- "Do not mention when an information comes from the memory.",
38
- ]
39
- type: "instruction"
40
-
41
- question_answering_prompt:
42
- prompt: [
43
- "Answer the following question : {question}.",
44
- "Answer in French.",
45
- "Use bullet points",
46
- "If the question is not related to environment, say that you can't answer it based on the sources because the question is irrelevant.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ]
48
  type: "prompt"
49
 
50
  reformulation_prompt:
51
- prompt:
52
- [
53
- # "Here is what happened in the conversation so far",
54
- # "<summary>",
55
- # "{memory}",
56
- # "</summary>",
57
- "",
58
- "Reformulate the following user message to be a short standalone question in French.",
59
- "The question is related to environment.",
60
- "If relevant, use the conversation summary to add context",
61
  "If the question is too vague, just say it as it is",
 
62
  "",
63
  "Exemples:",
64
  "---",
65
  "user",
66
- "Applique t-on une taxe carbone ?",
67
  "",
68
  "assistant",
69
- "Comment le sujet de la taxe carbone est il traité dans le corpus ?",
70
  "---",
71
  "user",
72
- "Quelles obligation de faire un bilan carbone ?",
73
  "",
74
  "assistant",
75
- "Quelles sont les obligation qui imposent de faire un bilan carbone",
76
  "---",
77
  "user",
78
  "{question}",
79
  "",
80
  ]
81
- type: prompt
82
-
83
- summarize_memory_prompt:
84
- prompt:
85
- [
86
- "Summarize the following exchange as concisely as possible to be used by a language model",
87
- "<conversation>",
88
- "{memory}",
89
- "</conversation>",
90
- ]
91
- type: prompt
 
2
  prompt:
3
  [
4
  "You are Spinoza Fact Checker, an AI Assistant by Ekimetrics.",
5
+ "You are given a question and extracted passages of press articles.",
6
+ "Provide a clear and structured answer based on the passages provided,",
7
+ "The sources and the guidelines. Be sure to answer to the question asked including all parameters in the guideline."
8
  ]
9
  type: "system"
10
 
11
  source_prompt:
12
  prompt:
13
  [
14
+ "Passages:",
 
15
  "{sources}",
 
16
  "",
17
+ "Reference the source of each fact before saying it (ex: [doc 2] some fact from Doc 2)",
18
+ "Always use this formating : [doc i].",
 
 
 
19
  ]
20
  type: "instruction"
21
 
22
+ question_answering_prompt:
23
  prompt:
24
  [
25
+ "Answer the following question: {question}",
26
+ "While respecting the following guidelines :",
27
+ "- If the passages have useful facts or numbers, use them in your answer.",
28
+ "- Do not use the sentence 'Doc i says ...' to say where information came from.",
29
+ "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
30
+ "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points",
31
+ "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
32
+ "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
33
+ "- You do not need to use every passage. Only use the ones that help answer the question.",
34
+ "- If a specific location is mentioned in the question, make it the core of your answer and follow the //specific guidelines//",
35
  "",
36
+ "//specific guidelines//",
37
+ "if [the question is open and broad] then [:",
38
+ "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
39
+ "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
40
+ "- Start every paragraph with a question, and answer the question using different key elements taken from the sources ",
41
+ "- If the passages have useful facts or numbers, use them in your answer.",
42
+ "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
43
+ "- Do not use the sentence 'Doc i says ...' to say where information came from.",
44
+ "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
45
+ "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
46
+ "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
47
+ "- You do not need to use every passage. Only use the ones that help answer the question.",
48
+ "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
49
+ "- Make a clear distinction between information about a /location/ named in the question and other regions.",
50
+ " - First you must display information about the precise /location/",
51
+ " - then clearly state that you have information about /other places/,",
52
+ " - the, display information about /other places/.",
53
+ "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
54
+ "",
55
+ "if [the question is factual and precise] then [",
56
+ "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
57
+ "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
58
+ "- Only answer the question",
59
+ "- Use bullet points and numbers",
60
+ "- If the passages have useful facts or numbers, use them in your answer.",
61
+ "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
62
+ "- Do not use the sentence 'Doc i says ...' to say where information came from.",
63
+ "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
64
+ "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
65
+ "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
66
+ "- You do not need to use every passage. Only use the ones that help answer the question.",
67
+ "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
68
+ "- Make a clear distinction between information about a /location/ named in the question and other regions.",
69
+ " - First you must display information about the precise /location/",
70
+ " - then clearly state that you have information about /other places/,",
71
+ " - the, display information about /other places/",
72
+ "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
73
  ]
74
  type: "prompt"
75
 
76
  reformulation_prompt:
77
+ prompt: [
78
+ "Reformulate the following user message to be a short standalone question in french.",
 
 
 
 
 
 
 
 
79
  "If the question is too vague, just say it as it is",
80
+ "This question will be used to retrieve documents in a coroys of press articles, if needed reformulate it to increase chance to have relevant documents in this corpus.",
81
  "",
82
  "Exemples:",
83
  "---",
84
  "user",
85
+ "Quels enjeux autour de l'eau ?",
86
  "",
87
  "assistant",
88
+ "Quels articles traitent de l'eau et de quelle manière ?",
89
  "---",
90
  "user",
91
+ "Quelles obligations de faire un bilan carbone ?",
92
  "",
93
  "assistant",
94
+ "Quelles sont les obligations qui imposent de faire un bilan carbone ? Comment le sujet est il traité ?",
95
  "---",
96
  "user",
97
  "{question}",
98
  "",
99
  ]
100
+ type: prompt
 
 
 
 
 
 
 
 
 
 
spinoza_project/prompt_Science.yaml CHANGED
@@ -23,20 +23,6 @@ source_prompt:
23
  ]
24
  type: "instruction"
25
 
26
- memory_prompt:
27
- prompt:
28
- [
29
- "Here is a summary of past conversation:",
30
- "<memory>",
31
- "{memory}",
32
- "</memory>",
33
- "",
34
- "When relevant, use these element to enrich and add context to your answer.",
35
- "Do not take the role written in this memory.",
36
- "Do not mention when an information comes from the memory.",
37
- ]
38
- type: "instruction"
39
-
40
  question_answering_prompt:
41
  prompt:
42
  [
@@ -82,14 +68,4 @@ reformulation_prompt:
82
  "{question}",
83
  "",
84
  ]
85
- type: prompt
86
-
87
- summarize_memory_prompt:
88
- prompt:
89
- [
90
- "Summarize the following exchange as concisely as possible to be used by a language model",
91
- "<conversation>",
92
- "{memory}",
93
- "</conversation>",
94
- ]
95
- type: prompt
 
23
  ]
24
  type: "instruction"
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  question_answering_prompt:
27
  prompt:
28
  [
 
68
  "{question}",
69
  "",
70
  ]
71
+ type: prompt
 
 
 
 
 
 
 
 
 
 
spinoza_project/source/backend/get_prompts.py CHANGED
@@ -13,9 +13,9 @@ def get_qa_prompts(config, prompts):
13
  reformulation_prompt = make_chat_prompt(
14
  prompts["reformulation_prompt"], special_tokens
15
  )
16
- summarize_memory_prompt = make_chat_prompt(
17
- prompts["summarize_memory_prompt"], special_tokens
18
- )
19
 
20
  chat_qa_prompt = ChatPromptTemplate.from_messages(
21
  [
 
13
  reformulation_prompt = make_chat_prompt(
14
  prompts["reformulation_prompt"], special_tokens
15
  )
16
+ # summarize_memory_prompt = make_chat_prompt(
17
+ # prompts["summarize_memory_prompt"], special_tokens
18
+ # )
19
 
20
  chat_qa_prompt = ChatPromptTemplate.from_messages(
21
  [