Spaces:
Runtime error
Runtime error
first files
Browse files- .gitattributes +1 -0
- README.md +1 -12
- app.py +51 -0
- chatbot.py +24 -0
- config.toml +18 -0
- embedding.py +61 -0
- gehoert2022-11 November.pdf +3 -0
- requirements.txt +22 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
gehoert2022-11[[:space:]]November.pdf filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,12 +1 @@
|
|
1 |
-
|
2 |
-
title: Chatwithfiles
|
3 |
-
emoji: π
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: green
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.25.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# streamlit_chatwithfiles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from gui.history import ChatHistory
|
6 |
+
from gui.layout import Layout
|
7 |
+
from gui.sidebar import Sidebar, Utilities
|
8 |
+
|
9 |
+
if __name__ == '__main__':
|
10 |
+
st.set_page_config(layout="wide", page_icon="π¬", page_title="ChatPDF")
|
11 |
+
layout, sidebar, utils = Layout(), Sidebar(), Utilities()
|
12 |
+
|
13 |
+
layout.show_header()
|
14 |
+
user_api_key = utils.load_api_key()
|
15 |
+
|
16 |
+
if not user_api_key:
|
17 |
+
layout.show_api_key_missing()
|
18 |
+
else:
|
19 |
+
os.environ["OPENAI_API_KEY"] = user_api_key
|
20 |
+
pdf = utils.handle_upload()
|
21 |
+
|
22 |
+
if pdf:
|
23 |
+
sidebar.show_options()
|
24 |
+
|
25 |
+
try:
|
26 |
+
history = ChatHistory()
|
27 |
+
chatbot = utils.setup_chatbot(
|
28 |
+
pdf, st.session_state["model"], st.session_state["temperature"]
|
29 |
+
)
|
30 |
+
st.session_state["chatbot"] = chatbot
|
31 |
+
if st.session_state["ready"]:
|
32 |
+
history.initialize(pdf.name)
|
33 |
+
|
34 |
+
response_container, prompt_container = st.container(), st.container()
|
35 |
+
|
36 |
+
with prompt_container:
|
37 |
+
is_ready, user_input = layout.prompt_form()
|
38 |
+
|
39 |
+
if st.session_state["reset_chat"]:
|
40 |
+
history.reset()
|
41 |
+
|
42 |
+
if is_ready:
|
43 |
+
output = st.session_state["chatbot"].conversational_chat(user_input)
|
44 |
+
|
45 |
+
history.generate_messages(response_container)
|
46 |
+
|
47 |
+
except Exception as e:
|
48 |
+
st.error(f"{e}")
|
49 |
+
st.stop()
|
50 |
+
|
51 |
+
sidebar.about()
|
chatbot.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain.chains import ConversationalRetrievalChain
|
3 |
+
from langchain.chat_models import ChatOpenAI
|
4 |
+
|
5 |
+
|
6 |
+
class Chatbot:
|
7 |
+
|
8 |
+
def __init__(self, model_name, temperature, vectors):
|
9 |
+
self.model_name = model_name
|
10 |
+
self.temperature = temperature
|
11 |
+
self.vectors = vectors
|
12 |
+
|
13 |
+
def conversational_chat(self, query):
|
14 |
+
"""
|
15 |
+
Starts a conversational chat with a model via Langchain
|
16 |
+
"""
|
17 |
+
chain = ConversationalRetrievalChain.from_llm(
|
18 |
+
llm=ChatOpenAI(model_name=self.model_name, temperature=self.temperature),
|
19 |
+
memory=st.session_state["history"],
|
20 |
+
retriever=self.vectors.as_retriever(),
|
21 |
+
)
|
22 |
+
result = chain({"question": query})
|
23 |
+
|
24 |
+
return result["answer"]
|
config.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
|
3 |
+
# Primary accent for interactive elements
|
4 |
+
primaryColor = '#548eff'
|
5 |
+
|
6 |
+
# Background color for the main content area
|
7 |
+
backgroundColor = '#f5f7fb'
|
8 |
+
|
9 |
+
# Background color for sidebar and most interactive widgets
|
10 |
+
secondaryBackgroundColor = '#FFFFFF'
|
11 |
+
|
12 |
+
# Color used for almost all text
|
13 |
+
textColor = '#2d3643'
|
14 |
+
|
15 |
+
# Font family for all text in the app, except code blocks
|
16 |
+
# Accepted values (serif | sans serif | monospace)
|
17 |
+
# Default: "sans serif"
|
18 |
+
font = "sans serif"
|
embedding.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
from langchain.document_loaders import PyPDFLoader
|
6 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
7 |
+
from langchain.vectorstores import FAISS
|
8 |
+
|
9 |
+
|
10 |
+
class Embedder:
|
11 |
+
def __init__(self):
|
12 |
+
self.PATH = "embeddings"
|
13 |
+
self.createEmbeddingsDir()
|
14 |
+
|
15 |
+
def createEmbeddingsDir(self):
|
16 |
+
"""
|
17 |
+
Creates a directory to store the embeddings vectors
|
18 |
+
"""
|
19 |
+
if not os.path.exists(self.PATH):
|
20 |
+
os.mkdir(self.PATH)
|
21 |
+
|
22 |
+
def storeDocEmbeds(self, file, filename):
|
23 |
+
"""
|
24 |
+
Stores document embeddings using Langchain and FAISS
|
25 |
+
"""
|
26 |
+
# Write the uploaded file to a temporary file
|
27 |
+
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
|
28 |
+
tmp_file.write(file)
|
29 |
+
tmp_file_path = tmp_file.name
|
30 |
+
|
31 |
+
# Load the data from the file using Langchain
|
32 |
+
loader = PyPDFLoader(file_path=tmp_file_path)
|
33 |
+
data = loader.load_and_split()
|
34 |
+
print(f"Loaded {len(data)} documents from {tmp_file_path}")
|
35 |
+
|
36 |
+
# Create an embeddings object using Langchain
|
37 |
+
embeddings = OpenAIEmbeddings(allowed_special={'<|endofprompt|>'})
|
38 |
+
|
39 |
+
# Store the embeddings vectors using FAISS
|
40 |
+
vectors = FAISS.from_documents(data, embeddings)
|
41 |
+
os.remove(tmp_file_path)
|
42 |
+
|
43 |
+
# Save the vectors to a pickle file
|
44 |
+
with open(f"{self.PATH}/{filename}.pkl", "wb") as f:
|
45 |
+
pickle.dump(vectors, f)
|
46 |
+
|
47 |
+
def getDocEmbeds(self, file, filename):
|
48 |
+
"""
|
49 |
+
Retrieves document embeddings
|
50 |
+
"""
|
51 |
+
# Check if embeddings vectors have already been stored in a pickle file
|
52 |
+
pkl_file = f"{self.PATH}/{filename}.pkl"
|
53 |
+
if not os.path.isfile(pkl_file):
|
54 |
+
# If not, store the vectors using the storeDocEmbeds function
|
55 |
+
self.storeDocEmbeds(file, filename)
|
56 |
+
|
57 |
+
# Load the vectors from the pickle file
|
58 |
+
with open(pkl_file, "rb") as f:
|
59 |
+
vectors = pickle.load(f)
|
60 |
+
|
61 |
+
return vectors
|
gehoert2022-11 November.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:973eed315c62e02dda711577266d4199fc6ab923ebbfdad7d7b51d316f9c1701
|
3 |
+
size 8963614
|
requirements.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Automatically generated by https://github.com/damnever/pigar.
|
2 |
+
|
3 |
+
# ChatPDF/chatbot.py: 2,3,4
|
4 |
+
# ChatPDF/embedding.py: 5,6,7
|
5 |
+
# ChatPDF/gui/history.py: 4
|
6 |
+
# ChatPDF/notebook/pdf_chat.ipynb: 1,3,10,11,19,20,21,22
|
7 |
+
langchain==0.0.153
|
8 |
+
|
9 |
+
# ChatPDF/app.py: 3
|
10 |
+
# ChatPDF/chatbot.py: 1
|
11 |
+
# ChatPDF/gui/history.py: 1
|
12 |
+
# ChatPDF/gui/layout.py: 1
|
13 |
+
# ChatPDF/gui/sidebar.py: 3
|
14 |
+
streamlit==1.22.0
|
15 |
+
|
16 |
+
# ChatPDF/gui/history.py: 5
|
17 |
+
streamlit_chat_media==0.0.4
|
18 |
+
|
19 |
+
pypdf==3.8.1
|
20 |
+
openai==0.27.5
|
21 |
+
tiktoken==0.3.3
|
22 |
+
faiss-cpu==1.7.4
|