File size: 4,899 Bytes
ba93d3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# import os

# import streamlit as st
# from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
# from langchain.document_loaders import TextLoader
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.chat_models import ChatOpenAI

# from langchain.chains import RetrievalQA
# # from langchain.llms import OpenAI

# import pandas as pd
# import umap
# import matplotlib.pyplot as plt

# import extra_streamlit_components as stx

# import fitz


# st.set_page_config(page_title="CoreMind AI", layout="wide")

# st.header("CoreMind AI")

# # ====================================================================================================

# # SIDEBAR
# st.sidebar.title("Options")

# openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key")

# os.environ["OPENAI_API_KEY"] = openai_key


# qa_temperature = st.sidebar.slider("QA Temperature", min_value=0.0, max_value=2.0, value=0.8, step=0.01, key="temperature")
# qa_model = st.sidebar.selectbox("QA Model", ["gpt-3.5-turbo"], key="model")

# # ====================================================================================================

# if openai_key:
#     loader = TextLoader("raw_data.txt")
#     embeddings = OpenAIEmbeddings()
#     docsearch = Chroma(persist_directory="data", embedding_function=embeddings)

# # ====================================================================================================

# def question_answer(user_text, qa_temperature):
#     qa = RetrievalQA.from_chain_type(
#         llm=ChatOpenAI(temperature=qa_temperature, model_name=qa_model), 
#         retriever=docsearch.as_retriever()
#     )
#     response = qa.run(user_text)
#     return response


# # MAIN TABS
# # add 3 tabs to the main part of the streamlit app
# qa_tab, understanding_tab = st.tabs(["Document Querying", "Understanding"])

# with qa_tab:
#     st.header("Question Answering")
#     st.write("Find the information you need right from your documents.")

#     qa_query = st.text_area("Enter your query", value="What is GEICO?", key="qa_query", help="Got a question you think your docs can answer? Just ask!")
#     qa_button = st.button("Query docs", disabled=not (openai_key and qa_query), key="qa_button", help="Make sure you have entered your OpenAI API key and a query.")

#     if qa_query and qa_button:
#         response = question_answer(qa_query, qa_temperature)
#         # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway."
#         st.write(response)



# with understanding_tab:
#     st.header("PDF Understanding")
#     st.write("Understand your PDFs better.")

#     pdf_file = st.file_uploader("Upload a PDF", type=["pdf"], key="pdf_file")

#     # save file
#     if pdf_file:
#         # with open("your_file.pdf", "wb") as f:
#         #     f.write(pdf_file.getbuffer())

#         # # Open the PDF file
#         # # with open('your_file.pdf', 'rb') as file:
#         #     # Create a PDF reader object
#         # with fitz.open('your_file.pdf') as doc:
#         #     all_text = ""

#         #     # Iterate over each page
#         #     for page in doc:
#         #         # Extract the text from the page
#         #         text = page.get_text()
#         #         all_text += text
#         #         all_text += "\n\n"

#         # with open("pdf_data.txt", "a") as f:
#         #     f.write(all_text)

#         # # Print the extracted text
#         # st.write("file uploaded")

#         # # chat = ChatAnthropic()

#         # loader = TextLoader("pdf_data.txt")
#         # documents = loader.load()
#         # text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=300)
#         # texts = text_splitter.split_documents(documents)
#         # docsearch.add_documents(texts)
#         # docsearch.persist()

#         pdf_query = st.text_area("Query your pdf", key="pdf_query")

#         if pdf_query:
#             pdf_llm = RetrievalQA.from_chain_type(
#                     llm=ChatOpenAI(temperature=0.8, model_name=qa_model), 
#                     retriever=docsearch.as_retriever(), 
#                     # reduce_k_below_max_tokens=True,
#                     # return_source_documents=True,
#                     # max_tokens = 2000
#                 )
#             pdf_response = pdf_llm.run(pdf_query)
#                 # response = "GEICO is the seventh largest auto insurer in the United States, with about 3.7 million cars insured. It is a low-cost operator and its competitive strength flows directly from this position. It is now a wholly-owned subsidiary of Berkshire Hathaway."
#             st.write(pdf_response)