prompt are now open source
Browse files+ got rid of tqdm bars for every retriever call
app.py
CHANGED
@@ -13,17 +13,44 @@ import numpy as np
|
|
13 |
from datetime import datetime
|
14 |
from azure.storage.fileshare import ShareServiceClient
|
15 |
|
16 |
-
# from dotenv import load_dotenv
|
17 |
-
# load_dotenv()
|
18 |
-
|
19 |
-
print(os.environ["content"], os.environ["sources"], sep="\n-\n" * 2)
|
20 |
|
21 |
theme = gr.themes.Soft(
|
22 |
primary_hue="sky",
|
23 |
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
|
24 |
)
|
25 |
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
openai.api_type = "azure"
|
29 |
openai.api_key = os.environ["api_key"]
|
@@ -37,6 +64,7 @@ retrieve_all = EmbeddingRetriever(
|
|
37 |
),
|
38 |
embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
39 |
model_format="sentence_transformers",
|
|
|
40 |
)
|
41 |
|
42 |
retrieve_giec = EmbeddingRetriever(
|
@@ -88,7 +116,7 @@ def chat(
|
|
88 |
|
89 |
reformulated_query = openai.Completion.create(
|
90 |
engine="climateGPT",
|
91 |
-
prompt=
|
92 |
temperature=0,
|
93 |
max_tokens=128,
|
94 |
stop=["\n---\n", "<|im_end|>"],
|
@@ -105,7 +133,7 @@ def chat(
|
|
105 |
for i, d in enumerate(docs, 1)
|
106 |
]
|
107 |
)
|
108 |
-
messages.append({"role": "system", "content": f"{
|
109 |
|
110 |
response = openai.Completion.create(
|
111 |
engine="climateGPT",
|
@@ -255,11 +283,7 @@ Version 0.2-beta - This tool is under active development
|
|
255 |
with gr.Column(scale=1, variant="panel"):
|
256 |
gr.Markdown("### Sources")
|
257 |
sources_textbox = gr.Textbox(interactive=False, show_label=False, max_lines=50)
|
258 |
-
|
259 |
-
# ["IPCC only", "All available"],
|
260 |
-
# default="All available",
|
261 |
-
# label="Select reports",
|
262 |
-
# ),
|
263 |
ask.submit(
|
264 |
fn=chat,
|
265 |
inputs=[
|
|
|
13 |
from datetime import datetime
|
14 |
from azure.storage.fileshare import ShareServiceClient
|
15 |
|
|
|
|
|
|
|
|
|
16 |
|
17 |
theme = gr.themes.Soft(
|
18 |
primary_hue="sky",
|
19 |
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
|
20 |
)
|
21 |
|
22 |
+
init_prompt = (
|
23 |
+
"You are ClimateGPT, an AI Assistant by Ekimetrics. "
|
24 |
+
"You are given extracted parts of IPCC reports and a question."
|
25 |
+
" Provide a clear and structured answer based on the context provided. "
|
26 |
+
"When relevant, use bullet points and lists to structure your answers."
|
27 |
+
)
|
28 |
+
sources_prompt = (
|
29 |
+
"When relevant, use facts and numbers from the following documents in your answer. "
|
30 |
+
"Whenever you use information from a document, reference it at the end of the sentence (ex: [doc 2]). "
|
31 |
+
"You don't have to use all documents, only if it makes sense in the conversation. "
|
32 |
+
"If no relevant information to answer the question is present in the documents, "
|
33 |
+
"just say you don't have enough information to answer."
|
34 |
+
)
|
35 |
+
|
36 |
+
|
37 |
+
def get_reformulation_prompt(query: str) -> str:
|
38 |
+
return f"""Reformulate the following user message to be a short standalone question in English, in the context of an educationnal discussion about climate change.
|
39 |
+
---
|
40 |
+
query: La technologie nous sauvera-t-elle ?
|
41 |
+
standalone question: Can technology help humanity mitigate the effects of climate change?
|
42 |
+
---
|
43 |
+
query: what are our reserves in fossil fuel?
|
44 |
+
standalone question: What are the current reserves of fossil fuels and how long will they last?
|
45 |
+
---
|
46 |
+
query: {query}
|
47 |
+
standalone question:"""
|
48 |
+
|
49 |
+
|
50 |
+
system_template = {
|
51 |
+
"role": "system",
|
52 |
+
"content": init_prompt,
|
53 |
+
}
|
54 |
|
55 |
openai.api_type = "azure"
|
56 |
openai.api_key = os.environ["api_key"]
|
|
|
64 |
),
|
65 |
embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
66 |
model_format="sentence_transformers",
|
67 |
+
progress_bar=False,
|
68 |
)
|
69 |
|
70 |
retrieve_giec = EmbeddingRetriever(
|
|
|
116 |
|
117 |
reformulated_query = openai.Completion.create(
|
118 |
engine="climateGPT",
|
119 |
+
prompt=get_reformulation_prompt(query),
|
120 |
temperature=0,
|
121 |
max_tokens=128,
|
122 |
stop=["\n---\n", "<|im_end|>"],
|
|
|
133 |
for i, d in enumerate(docs, 1)
|
134 |
]
|
135 |
)
|
136 |
+
messages.append({"role": "system", "content": f"{sources_prompt}\n\n{sources}"})
|
137 |
|
138 |
response = openai.Completion.create(
|
139 |
engine="climateGPT",
|
|
|
283 |
with gr.Column(scale=1, variant="panel"):
|
284 |
gr.Markdown("### Sources")
|
285 |
sources_textbox = gr.Textbox(interactive=False, show_label=False, max_lines=50)
|
286 |
+
|
|
|
|
|
|
|
|
|
287 |
ask.submit(
|
288 |
fn=chat,
|
289 |
inputs=[
|