Spaces:
Sleeping
Sleeping
# import os | |
# import streamlit as st | |
# from langchain.embeddings.openai import OpenAIEmbeddings | |
# from langchain.vectorstores import Chroma | |
# from langchain.document_loaders import TextLoader | |
# from langchain.text_splitter import CharacterTextSplitter | |
# from langchain.chat_models import ChatOpenAI | |
# from langchain.chains import RetrievalQA | |
# # from langchain.llms import OpenAI | |
# import pandas as pd | |
# import umap | |
# import matplotlib.pyplot as plt | |
# import extra_streamlit_components as stx | |
# import fitz | |
# st.set_page_config(page_title="CoreMind AI", layout="wide") | |
# st.header("CoreMind AI") | |
# # ==================================================================================================== | |
# # SIDEBAR | |
# st.sidebar.title("Options") | |
# openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key") | |
# os.environ["OPENAI_API_KEY"] = openai_key | |
# qa_temperature = st.sidebar.slider("QA Temperature", min_value=0.0, max_value=2.0, value=0.8, step=0.01, key="temperature") | |
# qa_model = st.sidebar.selectbox("QA Model", ["gpt-3.5-turbo"], key="model") | |
# # ==================================================================================================== | |
# if openai_key: | |
# loader = TextLoader("raw_data.txt") | |
# embeddings = OpenAIEmbeddings() | |
# docsearch = Chroma(persist_directory="data", embedding_function=embeddings) | |
# # ==================================================================================================== | |
# def question_answer(user_text, qa_temperature): | |
# qa = RetrievalQA.from_chain_type( | |
# llm=ChatOpenAI(temperature=qa_temperature, model_name=qa_model), | |
# retriever=docsearch.as_retriever() | |
# ) | |
# response = qa.run(user_text) | |
# return response | |
# # MAIN TABS | |
# # add 3 tabs to the main part of the streamlit app | |
# qa_tab, understanding_tab = st.tabs(["Document Querying", "Understanding"]) | |
# with qa_tab: | |
# st.header("Question Answering") | |
# st.write("Find the information you need right from your documents.") | |
# qa_query = st.text_area("Enter your query", value="What is GEICO?", key="qa_query", help="Got a question you think your docs can answer? Just ask!") | |
# qa_button = st.button("Query docs", disabled=not (openai_key and qa_query), key="qa_button", help="Make sure you have entered your OpenAI API key and a query.") | |
# if qa_query and qa_button: | |
# response = question_answer(qa_query, qa_temperature) | |
# # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway." | |
# st.write(response) | |
# with understanding_tab: | |
# st.header("PDF Understanding") | |
# st.write("Understand your PDFs better.") | |
# pdf_file = st.file_uploader("Upload a PDF", type=["pdf"], key="pdf_file") | |
# # save file | |
# if pdf_file: | |
# # with open("your_file.pdf", "wb") as f: | |
# # f.write(pdf_file.getbuffer()) | |
# # # Open the PDF file | |
# # # with open('your_file.pdf', 'rb') as file: | |
# # # Create a PDF reader object | |
# # with fitz.open('your_file.pdf') as doc: | |
# # all_text = "" | |
# # # Iterate over each page | |
# # for page in doc: | |
# # # Extract the text from the page | |
# # text = page.get_text() | |
# # all_text += text | |
# # all_text += "\n\n" | |
# # with open("pdf_data.txt", "a") as f: | |
# # f.write(all_text) | |
# # # Print the extracted text | |
# # st.write("file uploaded") | |
# # # chat = ChatAnthropic() | |
# # loader = TextLoader("pdf_data.txt") | |
# # documents = loader.load() | |
# # text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=300) | |
# # texts = text_splitter.split_documents(documents) | |
# # docsearch.add_documents(texts) | |
# # docsearch.persist() | |
# pdf_query = st.text_area("Query your pdf", key="pdf_query") | |
# if pdf_query: | |
# pdf_llm = RetrievalQA.from_chain_type( | |
# llm=ChatOpenAI(temperature=0.8, model_name=qa_model), | |
# retriever=docsearch.as_retriever(), | |
# # reduce_k_below_max_tokens=True, | |
# # return_source_documents=True, | |
# # max_tokens = 2000 | |
# ) | |
# pdf_response = pdf_llm.run(pdf_query) | |
# # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway." | |
# st.write(pdf_response) | |