prompt are now open source
Browse files+ got rid of tqdm bars for every retriever call
app.py
CHANGED
|
@@ -13,17 +13,44 @@ import numpy as np
|
|
| 13 |
from datetime import datetime
|
| 14 |
from azure.storage.fileshare import ShareServiceClient
|
| 15 |
|
| 16 |
-
# from dotenv import load_dotenv
|
| 17 |
-
# load_dotenv()
|
| 18 |
-
|
| 19 |
-
print(os.environ["content"], os.environ["sources"], sep="\n-\n" * 2)
|
| 20 |
|
| 21 |
theme = gr.themes.Soft(
|
| 22 |
primary_hue="sky",
|
| 23 |
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
|
| 24 |
)
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
openai.api_type = "azure"
|
| 29 |
openai.api_key = os.environ["api_key"]
|
|
@@ -37,6 +64,7 @@ retrieve_all = EmbeddingRetriever(
|
|
| 37 |
),
|
| 38 |
embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
| 39 |
model_format="sentence_transformers",
|
|
|
|
| 40 |
)
|
| 41 |
|
| 42 |
retrieve_giec = EmbeddingRetriever(
|
|
@@ -88,7 +116,7 @@ def chat(
|
|
| 88 |
|
| 89 |
reformulated_query = openai.Completion.create(
|
| 90 |
engine="climateGPT",
|
| 91 |
-
prompt=
|
| 92 |
temperature=0,
|
| 93 |
max_tokens=128,
|
| 94 |
stop=["\n---\n", "<|im_end|>"],
|
|
@@ -105,7 +133,7 @@ def chat(
|
|
| 105 |
for i, d in enumerate(docs, 1)
|
| 106 |
]
|
| 107 |
)
|
| 108 |
-
messages.append({"role": "system", "content": f"{
|
| 109 |
|
| 110 |
response = openai.Completion.create(
|
| 111 |
engine="climateGPT",
|
|
@@ -255,11 +283,7 @@ Version 0.2-beta - This tool is under active development
|
|
| 255 |
with gr.Column(scale=1, variant="panel"):
|
| 256 |
gr.Markdown("### Sources")
|
| 257 |
sources_textbox = gr.Textbox(interactive=False, show_label=False, max_lines=50)
|
| 258 |
-
|
| 259 |
-
# ["IPCC only", "All available"],
|
| 260 |
-
# default="All available",
|
| 261 |
-
# label="Select reports",
|
| 262 |
-
# ),
|
| 263 |
ask.submit(
|
| 264 |
fn=chat,
|
| 265 |
inputs=[
|
|
|
|
| 13 |
from datetime import datetime
|
| 14 |
from azure.storage.fileshare import ShareServiceClient
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
theme = gr.themes.Soft(
|
| 18 |
primary_hue="sky",
|
| 19 |
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
|
| 20 |
)
|
| 21 |
|
| 22 |
+
init_prompt = (
|
| 23 |
+
"You are ClimateGPT, an AI Assistant by Ekimetrics. "
|
| 24 |
+
"You are given extracted parts of IPCC reports and a question."
|
| 25 |
+
" Provide a clear and structured answer based on the context provided. "
|
| 26 |
+
"When relevant, use bullet points and lists to structure your answers."
|
| 27 |
+
)
|
| 28 |
+
sources_prompt = (
|
| 29 |
+
"When relevant, use facts and numbers from the following documents in your answer. "
|
| 30 |
+
"Whenever you use information from a document, reference it at the end of the sentence (ex: [doc 2]). "
|
| 31 |
+
"You don't have to use all documents, only if it makes sense in the conversation. "
|
| 32 |
+
"If no relevant information to answer the question is present in the documents, "
|
| 33 |
+
"just say you don't have enough information to answer."
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_reformulation_prompt(query: str) -> str:
|
| 38 |
+
return f"""Reformulate the following user message to be a short standalone question in English, in the context of an educationnal discussion about climate change.
|
| 39 |
+
---
|
| 40 |
+
query: La technologie nous sauvera-t-elle ?
|
| 41 |
+
standalone question: Can technology help humanity mitigate the effects of climate change?
|
| 42 |
+
---
|
| 43 |
+
query: what are our reserves in fossil fuel?
|
| 44 |
+
standalone question: What are the current reserves of fossil fuels and how long will they last?
|
| 45 |
+
---
|
| 46 |
+
query: {query}
|
| 47 |
+
standalone question:"""
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
system_template = {
|
| 51 |
+
"role": "system",
|
| 52 |
+
"content": init_prompt,
|
| 53 |
+
}
|
| 54 |
|
| 55 |
openai.api_type = "azure"
|
| 56 |
openai.api_key = os.environ["api_key"]
|
|
|
|
| 64 |
),
|
| 65 |
embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
| 66 |
model_format="sentence_transformers",
|
| 67 |
+
progress_bar=False,
|
| 68 |
)
|
| 69 |
|
| 70 |
retrieve_giec = EmbeddingRetriever(
|
|
|
|
| 116 |
|
| 117 |
reformulated_query = openai.Completion.create(
|
| 118 |
engine="climateGPT",
|
| 119 |
+
prompt=get_reformulation_prompt(query),
|
| 120 |
temperature=0,
|
| 121 |
max_tokens=128,
|
| 122 |
stop=["\n---\n", "<|im_end|>"],
|
|
|
|
| 133 |
for i, d in enumerate(docs, 1)
|
| 134 |
]
|
| 135 |
)
|
| 136 |
+
messages.append({"role": "system", "content": f"{sources_prompt}\n\n{sources}"})
|
| 137 |
|
| 138 |
response = openai.Completion.create(
|
| 139 |
engine="climateGPT",
|
|
|
|
| 283 |
with gr.Column(scale=1, variant="panel"):
|
| 284 |
gr.Markdown("### Sources")
|
| 285 |
sources_textbox = gr.Textbox(interactive=False, show_label=False, max_lines=50)
|
| 286 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
ask.submit(
|
| 288 |
fn=chat,
|
| 289 |
inputs=[
|