umangchaudhry
commited on
Upload 3 files
Browse files- app.py +111 -0
- summary_tool_questions.md +21 -0
- summary_tool_system_prompt.md +30 -0
app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from io import BytesIO
|
4 |
+
from tempfile import NamedTemporaryFile
|
5 |
+
from langchain.chains import create_retrieval_chain
|
6 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
from langchain_community.document_loaders import PyPDFLoader
|
10 |
+
from langchain_core.vectorstores import InMemoryVectorStore
|
11 |
+
from langchain_openai import OpenAIEmbeddings
|
12 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
13 |
+
|
14 |
+
# Function to process PDF, run Q&A, and return results
|
15 |
+
def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
|
16 |
+
# Set up OpenAI API key
|
17 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
18 |
+
|
19 |
+
# Temporarily save the uploaded file to disk
|
20 |
+
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
|
21 |
+
temp_pdf.write(uploaded_file.read()) # Write the uploaded file to the temp file
|
22 |
+
temp_pdf_path = temp_pdf.name
|
23 |
+
|
24 |
+
# Load the PDF document using PyPDFLoader
|
25 |
+
loader = PyPDFLoader(temp_pdf_path)
|
26 |
+
docs = loader.load()
|
27 |
+
|
28 |
+
# Split the document into smaller chunks for embedding
|
29 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
30 |
+
splits = text_splitter.split_documents(docs)
|
31 |
+
|
32 |
+
# Create vector store and retriever
|
33 |
+
vectorstore = InMemoryVectorStore.from_documents(
|
34 |
+
documents=splits, embedding=OpenAIEmbeddings()
|
35 |
+
)
|
36 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
|
37 |
+
|
38 |
+
# Read the system prompt from a Markdown (.md) file
|
39 |
+
if os.path.exists(prompt_path):
|
40 |
+
with open(prompt_path, "r") as file:
|
41 |
+
system_prompt = file.read()
|
42 |
+
else:
|
43 |
+
raise FileNotFoundError(f"The specified file was not found: {prompt_path}")
|
44 |
+
|
45 |
+
# Ensure the system prompt includes {context} for document input
|
46 |
+
prompt = ChatPromptTemplate.from_messages(
|
47 |
+
[
|
48 |
+
("system", system_prompt),
|
49 |
+
("human", "{input}"),
|
50 |
+
]
|
51 |
+
)
|
52 |
+
|
53 |
+
# Create the retrieval and question-answering chains
|
54 |
+
llm = ChatOpenAI(model="gpt-4o")
|
55 |
+
question_answer_chain = create_stuff_documents_chain(llm, prompt, document_variable_name="context")
|
56 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
57 |
+
|
58 |
+
# Load questions from a Markdown file
|
59 |
+
if os.path.exists(questions_path):
|
60 |
+
with open(questions_path, "r") as file:
|
61 |
+
questions = [line.strip() for line in file.readlines() if line.strip()]
|
62 |
+
else:
|
63 |
+
raise FileNotFoundError(f"The specified file was not found: {questions_path}")
|
64 |
+
|
65 |
+
# Generate question and answer pairs incrementally
|
66 |
+
qa_results = []
|
67 |
+
for question in questions:
|
68 |
+
result = rag_chain.invoke({"input": question})
|
69 |
+
answer = result["answer"]
|
70 |
+
qa_text = f"### Question: {question}\n**Answer:** {answer}\n"
|
71 |
+
qa_results.append(qa_text)
|
72 |
+
# Update the placeholder with each new Q&A pair
|
73 |
+
display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
|
74 |
+
|
75 |
+
# Clean up the temporary file
|
76 |
+
os.remove(temp_pdf_path)
|
77 |
+
|
78 |
+
return qa_results
|
79 |
+
|
80 |
+
# Streamlit app layout
|
81 |
+
st.title("Climate Policy Summary Tool")
|
82 |
+
|
83 |
+
# Input OpenAI API key
|
84 |
+
api_key = st.text_input("Enter your OpenAI API key:", type="password")
|
85 |
+
|
86 |
+
# File upload section for PDF
|
87 |
+
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
|
88 |
+
|
89 |
+
# Define static paths for prompt and questions
|
90 |
+
prompt_file_path = "summary_tool_system_prompt.md"
|
91 |
+
questions_file_path = "summary_tool_questions.md"
|
92 |
+
|
93 |
+
# When user clicks "Generate"
|
94 |
+
if st.button("Generate") and api_key and uploaded_file:
|
95 |
+
# Create a placeholder to update with each Q&A
|
96 |
+
display_placeholder = st.empty()
|
97 |
+
|
98 |
+
with st.spinner("Processing..."):
|
99 |
+
try:
|
100 |
+
results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
|
101 |
+
|
102 |
+
# Allow the user to download the results as a Markdown file
|
103 |
+
markdown_text = "\n".join(results)
|
104 |
+
st.download_button(
|
105 |
+
label="Download Results as Markdown",
|
106 |
+
data=markdown_text,
|
107 |
+
file_name="qa_results.md",
|
108 |
+
mime="text/markdown"
|
109 |
+
)
|
110 |
+
except Exception as e:
|
111 |
+
st.error(f"An error occurred: {e}")
|
summary_tool_questions.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Provide a summary of information available from the plan including the city name, population, geography, major industries, per capita income, or similar summary information.
|
2 |
+
|
3 |
+
To what extent does the plan address resilience, mitigation, and adaptation?
|
4 |
+
|
5 |
+
Does the plan describe a scenario of climate change the planning jurisdiction will face and for which it is planning climate policies for resilience, mitigation, and adaptation? If so, please summarize any such scenario.
|
6 |
+
|
7 |
+
What climate impacts does the plan identify as presenting climate risks for the planning jurisdiction? For each identified climate impact, summarize the plan’s description of: 1. The climate impact. 2. The climate risks it poses to the planning jurisdiction. 3. Areas or populations with the most climate vulnerability to the identified climate impact.
|
8 |
+
|
9 |
+
What resilience options does the plan identify? For each identified resilience option, summarize the information provided in the plan regarding cost, timing, location, and other important attributes.
|
10 |
+
|
11 |
+
What mitigation options does the plan identify? For each identified mitigation option, summarize the information provided in the plan regarding cost, timing, location, and other important attributes.
|
12 |
+
|
13 |
+
What adaptation options does the plan identify? For each identified adaptation option, summarize: 1. The climate threat it is addressing. 2. Information provided in the plan regarding cost, timing, location, and other important attributes.
|
14 |
+
|
15 |
+
What climate policies does the plan identify to advance climate justice? For each climate policy, summarize: 1. The climate threat it is addressing. 2. Areas or populations with the most climate vulnerability to the identified climate impact.
|
16 |
+
|
17 |
+
Is there any climate policy identified in the plan that could be a potential maladaptation? For each identified climate policy, summarize: 1. The climate threat it is addressing. 2. The resilience, mitigation, or adaptation option that the climate policy proposes.
|
18 |
+
|
19 |
+
Summarize the plan's description of how the planning jurisdiction will monitor and evaluate progress on the implementation of resilience options, mitigation options, and adaptation options proposed in the plan.
|
20 |
+
|
21 |
+
Generate a table that summarizes the following features from the document: Location/City Name, Population, Threats Identified, Measures Identified
|
summary_tool_system_prompt.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a researcher specializing in extracting climate-related information from climate adaptation and resilience plans. You are provided with a document (referred to as the "plan") from a state or city in the United States (referred to as the "planning jurisdiction") that has adopted policies addressing climate change, which may pertain to resilience, mitigation, and adaptation.
|
2 |
+
|
3 |
+
Your task is to analyze the plan and answer a consistent set of questions based strictly on the content provided. Your responses must:
|
4 |
+
|
5 |
+
- **Base all answers strictly on the document.**
|
6 |
+
- **Include direct quotations formatted with citations** in the format *(Chapter name, Section header, Page number etc. if applicable)*.
|
7 |
+
- **Format all responses using Markdown syntax.**
|
8 |
+
|
9 |
+
### **Definitions**
|
10 |
+
|
11 |
+
Refer to the following definitions when answering the questions:
|
12 |
+
|
13 |
+
- **Climate Change:** A long-term shift in weather patterns and temperatures, primarily caused by human activities emitting greenhouse gases (GHGs).
|
14 |
+
- **Greenhouse Gases (GHGs):** Atmospheric gases like CO₂, CH₄, N₂O that absorb and emit radiation, leading to the greenhouse effect.
|
15 |
+
- **Anthropogenic Emissions:** Emissions of GHGs resulting from human activities such as burning fossil fuels and deforestation.
|
16 |
+
- **Climate Impacts:** Consequences of climate-related hazards on natural and human systems, affecting lives, ecosystems, economies, and infrastructure.
|
17 |
+
- **Climate Risk:** Potential negative consequences from climate impacts, resulting from the interaction of hazard, exposure, and vulnerability.
|
18 |
+
- **Climate Vulnerability:** The degree to which a system is susceptible to harm from climate change and its ability to adapt.
|
19 |
+
- **Climate Policies:** Strategies and measures adopted to implement resilience, mitigation, and adaptation options.
|
20 |
+
- **Resilience:** The ability of systems to cope with climate hazards by maintaining essential functions and adapting to changes.
|
21 |
+
- **Resilience Options:** Strategies to build resilience through policy changes, infrastructure improvements, planning, etc.
|
22 |
+
- **Mitigation:** Efforts to reduce or prevent emission of GHGs.
|
23 |
+
- **Mitigation Options:** Technologies or practices that contribute to mitigation, like renewable energy or waste minimization.
|
24 |
+
- **Adaptation:** Adjusting systems to actual or expected climate changes to minimize harm or exploit beneficial opportunities.
|
25 |
+
- **Adaptation Options:** Strategies addressing climate change adaptation, including structural, institutional, ecological, and behavioral measures.
|
26 |
+
- **Climate Justice:** Ensuring equitable sharing of the burdens and benefits of climate change impacts.
|
27 |
+
- **Maladaptation:** Actions that may increase vulnerability to climate change or diminish resilience.
|
28 |
+
- **Scenario:** A plausible description of how the future may develop based on a coherent set of assumptions.
|
29 |
+
|
30 |
+
{context}
|