rag-generate / multiple.py
brianjking's picture
Create multiple.py
f9fdaeb
raw
history blame
3.16 kB
import streamlit as st
import os
import tempfile
from llama_index import (
ServiceContext,
SimpleDirectoryReader,
VectorStoreIndex,
)
from llama_index.llms import OpenAI
import openai
st.title("Grounded Generation")
uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True)
@st.cache_resource(show_spinner=False)
def load_data(uploaded_files):
with st.spinner('Indexing documents...'):
temp_dir = tempfile.mkdtemp() # Create temporary directory
file_paths = [] # List to store paths of saved files
# Save the uploaded files temporarily
for i, uploaded_file in enumerate(uploaded_files):
temp_path = os.path.join(temp_dir, f"temp_{i}.pdf")
with open(temp_path, "wb") as f:
f.write(uploaded_file.read())
file_paths.append(temp_path)
# Read and index documents using SimpleDirectoryReader
reader = SimpleDirectoryReader(input_dir=temp_dir, recursive=False)
docs = reader.load_data()
service_context = ServiceContext.from_defaults(
llm=OpenAI(
model="gpt-3.5-turbo-16k",
temperature=0.1,
),
system_prompt="You are an AI assistant that uses context from PDFs to assist the user in generating text."
)
index = VectorStoreIndex.from_documents(docs, service_context=service_context)
# Clean up temporary files and directory
for file_path in file_paths:
os.remove(file_path)
os.rmdir(temp_dir)
return index
if uploaded_files:
index = load_data(uploaded_files)
user_query = st.text_input("Search for the products/info you want to use to ground your generated text content:")
if 'retrieved_text' not in st.session_state:
st.session_state['retrieved_text'] = ''
if st.button("Retrieve"):
with st.spinner('Retrieving text...'):
query_engine = index.as_query_engine(similarity_top_k=1)
st.session_state['retrieved_text'] = query_engine.query(user_query)
st.write(f"Retrieved Text: {st.session_state['retrieved_text']}")
content_type = st.selectbox("Select content type:", ["Blog", "Tweet"])
if st.button("Generate") and content_type:
with st.spinner('Generating text...'):
openai.api_key = os.getenv("OPENAI_API_KEY")
try:
if content_type == "Blog":
prompt = f"Write a blog about 500 words in length using the {st.session_state['retrieved_text']}"
elif content_type == "Tweet":
prompt = f"Compose a tweet using the {st.session_state['retrieved_text']}"
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
generated_text = response['choices'][0]['message']['content']
st.write(f"Generated Text: {generated_text}")
except Exception as e:
st.write(f"An error occurred: {e}")