import os | |
# import json | |
import numpy as np | |
import pandas as pd | |
import openai | |
from haystack.schema import Document | |
import streamlit as st | |
from tenacity import retry, stop_after_attempt, wait_random_exponential | |
# Get openai API key | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
model_select = "gpt-4-0125-preview" | |
# define a special function for putting the prompt together (as we can't use haystack) | |
def get_prompt(context, label): | |
base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \ | |
Summarize only elements of the context that address vulnerability of "+label+" to climate change. \ | |
If there is no mention of "+label+" in the context, return nothing. \ | |
Formatting example: \ | |
- Bullet point 1 \ | |
- Bullet point 2 \ | |
" | |
# Add the meta data for references | |
# context = ' - '.join([d.content for d in docs]) | |
prompt = base_prompt+"; Context: "+context+"; Answer:" | |
return prompt | |
# def get_prompt(context, label): | |
# base_prompt="Summarize the following context efficiently in bullet points, the less the better - but keep concrete goals. \ | |
# Summarize only elements of the context that address vulnerability to climate change. \ | |
# Formatting example: \ | |
# - Bullet point 1 \ | |
# - Bullet point 2 \ | |
# " | |
# # Add the meta data for references | |
# # context = ' - '.join([d.content for d in docs]) | |
# prompt = base_prompt+"; Context: "+context+"; Answer:" | |
# return prompt | |
# base_prompt="Summarize the following context efficiently in bullet points, the less the better- but keep concrete goals. \ | |
# Summarize only activities that address the vulnerability of "+label+" to climate change. \ | |
# Formatting example: \ | |
# - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \ | |
# - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \ | |
# " | |
# # convert df rows to Document object so we can feed it into the summarizer easily | |
# def get_document(df): | |
# # we take a list of each extract | |
# ls_dict = [] | |
# for index, row in df.iterrows(): | |
# # Create a Document object for each row (we only need the text) | |
# doc = Document( | |
# row['text'], | |
# meta={ | |
# 'label': row['Vulnerability Label']} | |
# ) | |
# # Append the Document object to the documents list | |
# ls_dict.append(doc) | |
# return ls_dict | |
# exception handling for issuing multiple API calls to openai (exponential backoff) | |
def completion_with_backoff(**kwargs): | |
return openai.ChatCompletion.create(**kwargs) | |
# construct RAG query, send to openai and process response | |
def run_query(context, label): | |
''' | |
For non-streamed completion, enable the following 2 lines and comment out the code below | |
''' | |
# res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}]) | |
# result = res.choices[0].message.content | |
# instantiate ChatCompletion as a generator object (stream is set to True) | |
response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(context, label)}], stream=True) | |
# iterate through the streamed output | |
report = [] | |
res_box = st.empty() | |
for chunk in response: | |
# extract the object containing the text (totally different structure when streaming) | |
chunk_message = chunk['choices'][0]['delta'] | |
# test to make sure there is text in the object (some don't have) | |
if 'content' in chunk_message: | |
report.append(chunk_message.content) # extract the message | |
# add the latest text and merge it with all previous | |
result = "".join(report).strip() | |
# res_box.success(result) # output to response text box | |
res_box.success(result) | |