Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,59 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
"""
|
5 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
6 |
-
"""
|
7 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def respond(
|
11 |
-
message,
|
12 |
-
history:
|
13 |
-
system_message,
|
14 |
-
max_tokens,
|
15 |
-
temperature,
|
16 |
-
top_p,
|
17 |
):
|
18 |
-
system_message = "You are
|
19 |
messages = [{"role": "system", "content": system_message}]
|
20 |
|
21 |
for val in history:
|
@@ -26,46 +64,44 @@ def respond(
|
|
26 |
|
27 |
messages.append({"role": "user", "content": message})
|
28 |
|
29 |
-
|
|
|
|
|
|
|
30 |
|
|
|
31 |
for message in client.chat_completion(
|
32 |
messages,
|
33 |
-
max_tokens=
|
34 |
stream=True,
|
35 |
-
temperature=
|
36 |
-
top_p=
|
37 |
):
|
38 |
token = message.choices[0].delta.content
|
39 |
-
|
40 |
response += token
|
41 |
yield response
|
42 |
|
43 |
-
|
44 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
45 |
-
"""
|
46 |
-
demo = gr.ChatInterface(
|
47 |
-
respond,
|
48 |
-
additional_inputs=[
|
49 |
-
gr.Textbox(value = "You are a Extreme Weather Phenomena Analyst. You advise others about Weather.", label="System message"),
|
50 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
51 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
52 |
-
gr.Slider(
|
53 |
-
minimum=0.1,
|
54 |
-
maximum=1.0,
|
55 |
-
value=0.95,
|
56 |
-
step=0.05,
|
57 |
-
label="Top-p (nucleus sampling)",
|
58 |
-
),
|
59 |
-
],
|
60 |
-
|
61 |
-
examples = [
|
62 |
-
["Can you explain the phenomenon of ball lightning?"],
|
63 |
-
["What the differences between a cyclone, a typhoon, and a hurricane?"],
|
64 |
-
["How are heatwaves predicted and measured?"]
|
65 |
-
],
|
66 |
-
title = 'Extreme Weather Phenomena Analyst'
|
67 |
-
)
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
if __name__ == "__main__":
|
71 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
+
from typing import List, Tuple
|
4 |
+
import fitz # PyMuPDF
|
5 |
+
from sentence_transformers import SentenceTransformer, util
|
6 |
+
import numpy as np
|
7 |
+
import faiss
|
8 |
|
|
|
|
|
|
|
9 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
10 |
|
11 |
+
# Placeholder for the app's state
|
12 |
+
class MyApp:
|
13 |
+
def __init__(self) -> None:
|
14 |
+
self.documents = []
|
15 |
+
self.embeddings = None
|
16 |
+
self.index = None
|
17 |
+
self.load_pdf("3182_ORCA.pdf")
|
18 |
+
self.build_vector_db()
|
19 |
+
|
20 |
+
def load_pdf(self, file_path: str) -> None:
|
21 |
+
"""Extracts text from a PDF file and stores it in the app's documents."""
|
22 |
+
doc = fitz.open(file_path)
|
23 |
+
self.documents = []
|
24 |
+
for page_num in range(len(doc)):
|
25 |
+
page = doc[page_num]
|
26 |
+
text = page.get_text()
|
27 |
+
self.documents.append({"page": page_num + 1, "content": text})
|
28 |
+
print("PDF processed successfully!")
|
29 |
+
|
30 |
+
def build_vector_db(self) -> None:
|
31 |
+
"""Builds a vector database using the content of the PDF."""
|
32 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
33 |
+
self.embeddings = model.encode([doc["content"] for doc in self.documents])
|
34 |
+
self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
|
35 |
+
self.index.add(np.array(self.embeddings))
|
36 |
+
print("Vector database built successfully!")
|
37 |
+
|
38 |
+
def search_documents(self, query: str, k: int = 3) -> List[str]:
|
39 |
+
"""Searches for relevant documents using vector similarity."""
|
40 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
41 |
+
query_embedding = model.encode([query])
|
42 |
+
D, I = self.index.search(np.array(query_embedding), k)
|
43 |
+
results = [self.documents[i]["content"] for i in I[0]]
|
44 |
+
return results if results else ["No relevant documents found."]
|
45 |
+
|
46 |
+
app = MyApp()
|
47 |
|
48 |
def respond(
|
49 |
+
message: str,
|
50 |
+
history: List[Tuple[str, str]],
|
51 |
+
system_message: str,
|
52 |
+
max_tokens: int,
|
53 |
+
temperature: float,
|
54 |
+
top_p: float,
|
55 |
):
|
56 |
+
system_message = "You are an Extreme Weather Phenomena Analyst, your role involves monitoring, analyzing, and interpreting data related to extreme weather events and their potential links to climate change. Your work supports informed decision-making to mitigate risks and enhance societal resilience against these events. "
|
57 |
messages = [{"role": "system", "content": system_message}]
|
58 |
|
59 |
for val in history:
|
|
|
64 |
|
65 |
messages.append({"role": "user", "content": message})
|
66 |
|
67 |
+
# RAG - Retrieve relevant documents
|
68 |
+
retrieved_docs = app.search_documents(message)
|
69 |
+
context = "\n".join(retrieved_docs)
|
70 |
+
messages.append({"role": "system", "content": "Relevant documents: " + context})
|
71 |
|
72 |
+
response = ""
|
73 |
for message in client.chat_completion(
|
74 |
messages,
|
75 |
+
max_tokens=100,
|
76 |
stream=True,
|
77 |
+
temperature=0.98,
|
78 |
+
top_p=0.7,
|
79 |
):
|
80 |
token = message.choices[0].delta.content
|
|
|
81 |
response += token
|
82 |
yield response
|
83 |
|
84 |
+
demo = gr.Blocks()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
with demo:
|
87 |
+
gr.Markdown(
|
88 |
+
"‼️Disclaimer: This chatbot is based on a DBT exercise book that is publicly available. and just to test RAG implementation.‼️"
|
89 |
+
)
|
90 |
+
|
91 |
+
chatbot = gr.ChatInterface(
|
92 |
+
respond,
|
93 |
+
examples=[
|
94 |
+
["How can climate science provide risk-based information for decision-makers, and why is this approach beneficial?"],
|
95 |
+
["What are the dual approaches required to manage the risks of extreme weather in a warmer climate, and how do these approaches function?"],
|
96 |
+
["What are the three lines of evidence used to establish the connection between climate change and the increased risk of extreme weather events, and how do they apply to different types of weather events?"],
|
97 |
+
["How does climate change affect the probability distribution of temperature extremes, and what are the implications of this shift for future weather events?"],
|
98 |
+
["How do rising global temperatures contribute to the increasing frequency and intensity of extreme weather events, and why is a probability-based risk management framework important for addressing these changes?"],
|
99 |
+
["What are the observed climate trends related to extreme weather events over recent decades, and how do these trends align with predictions of global warming?"],
|
100 |
+
["What recent extreme weather events illustrate the increasing severity and frequency of such events due to global warming, and what have been their social and economic impacts?"],
|
101 |
+
["How does global warming influence the frequency and severity of extreme weather events, and what is the recommended approach for managing the associated risks?"]
|
102 |
+
],
|
103 |
+
title='Extreme Weather Phenomena Analyst'
|
104 |
+
)
|
105 |
|
106 |
if __name__ == "__main__":
|
107 |
demo.launch()
|