|
import os |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import openai |
|
from haystack.schema import Document |
|
import streamlit as st |
|
from tenacity import retry, stop_after_attempt, wait_random_exponential |
|
|
|
|
|
|
|
openai.api_key = os.environ["OPENAI_API_KEY"] |
|
model_select = "gpt-3.5-turbo-1106" |
|
|
|
|
|
|
|
def get_prompt(docs): |
|
base_prompt="Provide a single paragraph summary of the documents provided below. \ |
|
Formulate your answer in the style of an academic report." |
|
|
|
context = ' - '.join([d.content for d in docs]) |
|
prompt = base_prompt+"; Context: "+context+"; Answer:" |
|
|
|
return prompt |
|
|
|
|
|
|
|
def get_document(df): |
|
|
|
ls_dict = [] |
|
for index, row in df.iterrows(): |
|
|
|
doc = Document( |
|
row['text'], |
|
meta={ |
|
'filename': row['filename']} |
|
) |
|
|
|
ls_dict.append(doc) |
|
|
|
return ls_dict |
|
|
|
|
|
|
|
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) |
|
def completion_with_backoff(**kwargs): |
|
return openai.ChatCompletion.create(**kwargs) |
|
|
|
|
|
|
|
def run_query(df): |
|
docs = get_document(df) |
|
|
|
''' |
|
For non-streamed completion, enable the following 2 lines and comment out the code below |
|
''' |
|
|
|
|
|
|
|
|
|
response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}], stream=True) |
|
|
|
report = [] |
|
res_box = st.empty() |
|
for chunk in response: |
|
|
|
chunk_message = chunk['choices'][0]['delta'] |
|
|
|
if 'content' in chunk_message: |
|
report.append(chunk_message.content) |
|
|
|
result = "".join(report).strip() |
|
|
|
res_box.success(result) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|