# import os # import streamlit as st # from langchain.embeddings.openai import OpenAIEmbeddings # from langchain.vectorstores import Chroma # from langchain.document_loaders import TextLoader # from langchain.text_splitter import CharacterTextSplitter # from langchain.chat_models import ChatOpenAI # from langchain.chains import RetrievalQA # # from langchain.llms import OpenAI # import pandas as pd # import umap # import matplotlib.pyplot as plt # import extra_streamlit_components as stx # import fitz # st.set_page_config(page_title="CoreMind AI", layout="wide") # st.header("CoreMind AI") # # ==================================================================================================== # # SIDEBAR # st.sidebar.title("Options") # openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key") # os.environ["OPENAI_API_KEY"] = openai_key # qa_temperature = st.sidebar.slider("QA Temperature", min_value=0.0, max_value=2.0, value=0.8, step=0.01, key="temperature") # qa_model = st.sidebar.selectbox("QA Model", ["gpt-3.5-turbo"], key="model") # # ==================================================================================================== # if openai_key: # loader = TextLoader("raw_data.txt") # embeddings = OpenAIEmbeddings() # docsearch = Chroma(persist_directory="data", embedding_function=embeddings) # # ==================================================================================================== # def question_answer(user_text, qa_temperature): # qa = RetrievalQA.from_chain_type( # llm=ChatOpenAI(temperature=qa_temperature, model_name=qa_model), # retriever=docsearch.as_retriever() # ) # response = qa.run(user_text) # return response # # MAIN TABS # # add 3 tabs to the main part of the streamlit app # qa_tab, understanding_tab = st.tabs(["Document Querying", "Understanding"]) # with qa_tab: # st.header("Question Answering") # st.write("Find the information you need right from your documents.") # qa_query = st.text_area("Enter your query", value="What is GEICO?", key="qa_query", help="Got a question you think your docs can answer? Just ask!") # qa_button = st.button("Query docs", disabled=not (openai_key and qa_query), key="qa_button", help="Make sure you have entered your OpenAI API key and a query.") # if qa_query and qa_button: # response = question_answer(qa_query, qa_temperature) # # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway." # st.write(response) # with understanding_tab: # st.header("PDF Understanding") # st.write("Understand your PDFs better.") # pdf_file = st.file_uploader("Upload a PDF", type=["pdf"], key="pdf_file") # # save file # if pdf_file: # # with open("your_file.pdf", "wb") as f: # # f.write(pdf_file.getbuffer()) # # # Open the PDF file # # # with open('your_file.pdf', 'rb') as file: # # # Create a PDF reader object # # with fitz.open('your_file.pdf') as doc: # # all_text = "" # # # Iterate over each page # # for page in doc: # # # Extract the text from the page # # text = page.get_text() # # all_text += text # # all_text += "\n\n" # # with open("pdf_data.txt", "a") as f: # # f.write(all_text) # # # Print the extracted text # # st.write("file uploaded") # # # chat = ChatAnthropic() # # loader = TextLoader("pdf_data.txt") # # documents = loader.load() # # text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=300) # # texts = text_splitter.split_documents(documents) # # docsearch.add_documents(texts) # # docsearch.persist() # pdf_query = st.text_area("Query your pdf", key="pdf_query") # if pdf_query: # pdf_llm = RetrievalQA.from_chain_type( # llm=ChatOpenAI(temperature=0.8, model_name=qa_model), # retriever=docsearch.as_retriever(), # # reduce_k_below_max_tokens=True, # # return_source_documents=True, # # max_tokens = 2000 # ) # pdf_response = pdf_llm.run(pdf_query) # # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway." # st.write(pdf_response)