Unterwexi commited on
Commit
c0eb7b0
β€’
1 Parent(s): fc6e1a4

first files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. README.md +1 -12
  3. app.py +51 -0
  4. chatbot.py +24 -0
  5. config.toml +18 -0
  6. embedding.py +61 -0
  7. gehoert2022-11 November.pdf +3 -0
  8. requirements.txt +22 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ gehoert2022-11[[:space:]]November.pdf filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1 @@
1
- ---
2
- title: Chatwithfiles
3
- emoji: πŸŒ–
4
- colorFrom: gray
5
- colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.25.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # streamlit_chatwithfiles
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+
5
+ from gui.history import ChatHistory
6
+ from gui.layout import Layout
7
+ from gui.sidebar import Sidebar, Utilities
8
+
9
+ if __name__ == '__main__':
10
+ st.set_page_config(layout="wide", page_icon="πŸ’¬", page_title="ChatPDF")
11
+ layout, sidebar, utils = Layout(), Sidebar(), Utilities()
12
+
13
+ layout.show_header()
14
+ user_api_key = utils.load_api_key()
15
+
16
+ if not user_api_key:
17
+ layout.show_api_key_missing()
18
+ else:
19
+ os.environ["OPENAI_API_KEY"] = user_api_key
20
+ pdf = utils.handle_upload()
21
+
22
+ if pdf:
23
+ sidebar.show_options()
24
+
25
+ try:
26
+ history = ChatHistory()
27
+ chatbot = utils.setup_chatbot(
28
+ pdf, st.session_state["model"], st.session_state["temperature"]
29
+ )
30
+ st.session_state["chatbot"] = chatbot
31
+ if st.session_state["ready"]:
32
+ history.initialize(pdf.name)
33
+
34
+ response_container, prompt_container = st.container(), st.container()
35
+
36
+ with prompt_container:
37
+ is_ready, user_input = layout.prompt_form()
38
+
39
+ if st.session_state["reset_chat"]:
40
+ history.reset()
41
+
42
+ if is_ready:
43
+ output = st.session_state["chatbot"].conversational_chat(user_input)
44
+
45
+ history.generate_messages(response_container)
46
+
47
+ except Exception as e:
48
+ st.error(f"{e}")
49
+ st.stop()
50
+
51
+ sidebar.about()
chatbot.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.chains import ConversationalRetrievalChain
3
+ from langchain.chat_models import ChatOpenAI
4
+
5
+
6
+ class Chatbot:
7
+
8
+ def __init__(self, model_name, temperature, vectors):
9
+ self.model_name = model_name
10
+ self.temperature = temperature
11
+ self.vectors = vectors
12
+
13
+ def conversational_chat(self, query):
14
+ """
15
+ Starts a conversational chat with a model via Langchain
16
+ """
17
+ chain = ConversationalRetrievalChain.from_llm(
18
+ llm=ChatOpenAI(model_name=self.model_name, temperature=self.temperature),
19
+ memory=st.session_state["history"],
20
+ retriever=self.vectors.as_retriever(),
21
+ )
22
+ result = chain({"question": query})
23
+
24
+ return result["answer"]
config.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+
3
+ # Primary accent for interactive elements
4
+ primaryColor = '#548eff'
5
+
6
+ # Background color for the main content area
7
+ backgroundColor = '#f5f7fb'
8
+
9
+ # Background color for sidebar and most interactive widgets
10
+ secondaryBackgroundColor = '#FFFFFF'
11
+
12
+ # Color used for almost all text
13
+ textColor = '#2d3643'
14
+
15
+ # Font family for all text in the app, except code blocks
16
+ # Accepted values (serif | sans serif | monospace)
17
+ # Default: "sans serif"
18
+ font = "sans serif"
embedding.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import tempfile
4
+
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+
9
+
10
+ class Embedder:
11
+ def __init__(self):
12
+ self.PATH = "embeddings"
13
+ self.createEmbeddingsDir()
14
+
15
+ def createEmbeddingsDir(self):
16
+ """
17
+ Creates a directory to store the embeddings vectors
18
+ """
19
+ if not os.path.exists(self.PATH):
20
+ os.mkdir(self.PATH)
21
+
22
+ def storeDocEmbeds(self, file, filename):
23
+ """
24
+ Stores document embeddings using Langchain and FAISS
25
+ """
26
+ # Write the uploaded file to a temporary file
27
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
28
+ tmp_file.write(file)
29
+ tmp_file_path = tmp_file.name
30
+
31
+ # Load the data from the file using Langchain
32
+ loader = PyPDFLoader(file_path=tmp_file_path)
33
+ data = loader.load_and_split()
34
+ print(f"Loaded {len(data)} documents from {tmp_file_path}")
35
+
36
+ # Create an embeddings object using Langchain
37
+ embeddings = OpenAIEmbeddings(allowed_special={'<|endofprompt|>'})
38
+
39
+ # Store the embeddings vectors using FAISS
40
+ vectors = FAISS.from_documents(data, embeddings)
41
+ os.remove(tmp_file_path)
42
+
43
+ # Save the vectors to a pickle file
44
+ with open(f"{self.PATH}/{filename}.pkl", "wb") as f:
45
+ pickle.dump(vectors, f)
46
+
47
+ def getDocEmbeds(self, file, filename):
48
+ """
49
+ Retrieves document embeddings
50
+ """
51
+ # Check if embeddings vectors have already been stored in a pickle file
52
+ pkl_file = f"{self.PATH}/{filename}.pkl"
53
+ if not os.path.isfile(pkl_file):
54
+ # If not, store the vectors using the storeDocEmbeds function
55
+ self.storeDocEmbeds(file, filename)
56
+
57
+ # Load the vectors from the pickle file
58
+ with open(pkl_file, "rb") as f:
59
+ vectors = pickle.load(f)
60
+
61
+ return vectors
gehoert2022-11 November.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:973eed315c62e02dda711577266d4199fc6ab923ebbfdad7d7b51d316f9c1701
3
+ size 8963614
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Automatically generated by https://github.com/damnever/pigar.
2
+
3
+ # ChatPDF/chatbot.py: 2,3,4
4
+ # ChatPDF/embedding.py: 5,6,7
5
+ # ChatPDF/gui/history.py: 4
6
+ # ChatPDF/notebook/pdf_chat.ipynb: 1,3,10,11,19,20,21,22
7
+ langchain==0.0.153
8
+
9
+ # ChatPDF/app.py: 3
10
+ # ChatPDF/chatbot.py: 1
11
+ # ChatPDF/gui/history.py: 1
12
+ # ChatPDF/gui/layout.py: 1
13
+ # ChatPDF/gui/sidebar.py: 3
14
+ streamlit==1.22.0
15
+
16
+ # ChatPDF/gui/history.py: 5
17
+ streamlit_chat_media==0.0.4
18
+
19
+ pypdf==3.8.1
20
+ openai==0.27.5
21
+ tiktoken==0.3.3
22
+ faiss-cpu==1.7.4