openai_demo / app_old.py
Hamish's picture
pdfs
ba93d3e
raw
history blame
4.9 kB
# import os
# import streamlit as st
# from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
# from langchain.document_loaders import TextLoader
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.chat_models import ChatOpenAI
# from langchain.chains import RetrievalQA
# # from langchain.llms import OpenAI
# import pandas as pd
# import umap
# import matplotlib.pyplot as plt
# import extra_streamlit_components as stx
# import fitz
# st.set_page_config(page_title="CoreMind AI", layout="wide")
# st.header("CoreMind AI")
# # ====================================================================================================
# # SIDEBAR
# st.sidebar.title("Options")
# openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key")
# os.environ["OPENAI_API_KEY"] = openai_key
# qa_temperature = st.sidebar.slider("QA Temperature", min_value=0.0, max_value=2.0, value=0.8, step=0.01, key="temperature")
# qa_model = st.sidebar.selectbox("QA Model", ["gpt-3.5-turbo"], key="model")
# # ====================================================================================================
# if openai_key:
# loader = TextLoader("raw_data.txt")
# embeddings = OpenAIEmbeddings()
# docsearch = Chroma(persist_directory="data", embedding_function=embeddings)
# # ====================================================================================================
# def question_answer(user_text, qa_temperature):
# qa = RetrievalQA.from_chain_type(
# llm=ChatOpenAI(temperature=qa_temperature, model_name=qa_model),
# retriever=docsearch.as_retriever()
# )
# response = qa.run(user_text)
# return response
# # MAIN TABS
# # add 3 tabs to the main part of the streamlit app
# qa_tab, understanding_tab = st.tabs(["Document Querying", "Understanding"])
# with qa_tab:
# st.header("Question Answering")
# st.write("Find the information you need right from your documents.")
# qa_query = st.text_area("Enter your query", value="What is GEICO?", key="qa_query", help="Got a question you think your docs can answer? Just ask!")
# qa_button = st.button("Query docs", disabled=not (openai_key and qa_query), key="qa_button", help="Make sure you have entered your OpenAI API key and a query.")
# if qa_query and qa_button:
# response = question_answer(qa_query, qa_temperature)
# # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway."
# st.write(response)
# with understanding_tab:
# st.header("PDF Understanding")
# st.write("Understand your PDFs better.")
# pdf_file = st.file_uploader("Upload a PDF", type=["pdf"], key="pdf_file")
# # save file
# if pdf_file:
# # with open("your_file.pdf", "wb") as f:
# # f.write(pdf_file.getbuffer())
# # # Open the PDF file
# # # with open('your_file.pdf', 'rb') as file:
# # # Create a PDF reader object
# # with fitz.open('your_file.pdf') as doc:
# # all_text = ""
# # # Iterate over each page
# # for page in doc:
# # # Extract the text from the page
# # text = page.get_text()
# # all_text += text
# # all_text += "\n\n"
# # with open("pdf_data.txt", "a") as f:
# # f.write(all_text)
# # # Print the extracted text
# # st.write("file uploaded")
# # # chat = ChatAnthropic()
# # loader = TextLoader("pdf_data.txt")
# # documents = loader.load()
# # text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=300)
# # texts = text_splitter.split_documents(documents)
# # docsearch.add_documents(texts)
# # docsearch.persist()
# pdf_query = st.text_area("Query your pdf", key="pdf_query")
# if pdf_query:
# pdf_llm = RetrievalQA.from_chain_type(
# llm=ChatOpenAI(temperature=0.8, model_name=qa_model),
# retriever=docsearch.as_retriever(),
# # reduce_k_below_max_tokens=True,
# # return_source_documents=True,
# # max_tokens = 2000
# )
# pdf_response = pdf_llm.run(pdf_query)
# # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway."
# st.write(pdf_response)