Spaces:

vinhnx90
/

inkchatgpt

Sleeping

App Files Files Community

vinhnx90 commited on Apr 1, 2024

Commit

1ce831d

1 Parent(s): 68eaa27

Handle epub loader

Browse files

Files changed (2) hide show

app.py +16 -9
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,18 +2,22 @@ import os
 import tempfile
 import streamlit as st
-from langchain.callbacks.base import BaseCallbackHandler
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.memory import ConversationBufferMemory
 from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader, TextLoader
 from langchain_community.vectorstores import DocArrayInMemorySearch
 from chat_profile import ChatProfileRoleEnum
-from calback_handler import StreamHandler, PrintRetrievalHandler
 # configs
 LLM_MODEL_NAME = "gpt-3.5-turbo"
@@ -42,11 +46,11 @@ msgs = StreamlitChatMessageHistory()
 @st.cache_resource(ttl="1h")
-def configure_retriever(uploaded_files):
     # Read documents
     docs = []
     temp_dir = tempfile.TemporaryDirectory()
-    for file in uploaded_files:
         temp_filepath = os.path.join(temp_dir.name, file.name)
         with open(temp_filepath, "wb") as f:
             f.write(file.getvalue())
@@ -60,6 +64,8 @@ def configure_retriever(uploaded_files):
             loader = Docx2txtLoader(temp_filepath)
         elif extension == ".txt":
             loader = TextLoader(temp_filepath)
         else:
             st.write("This document format is not supported!")
             return None
@@ -86,10 +92,11 @@ def configure_retriever(uploaded_files):
 with st.sidebar.expander("Documents"):
     st.subheader("Files")
     uploaded_files = st.file_uploader(
-        label="Select files", type=["pdf", "txt", "docx"], accept_multiple_files=True
     )
 with st.sidebar.expander("Setup"):
     st.subheader("API Key")
     openai_api_key = st.text_input("OpenAI API Key", type="password")
@@ -104,7 +111,7 @@ if not openai_api_key:
     st.stop()
 if uploaded_files:
-    retriever = configure_retriever(uploaded_files)
     memory = ConversationBufferMemory(
         memory_key="chat_history", chat_memory=msgs, return_messages=True
@@ -119,7 +126,7 @@ if uploaded_files:
     )
     chain = ConversationalRetrievalChain.from_llm(
-        llm, retriever=retriever, memory=memory, verbose=False
     )
     avatars = {

 import tempfile
 import streamlit as st
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.memory import ConversationBufferMemory
 from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import (
+    Docx2txtLoader,
+    PyPDFLoader,
+    TextLoader,
+    UnstructuredEPubLoader,
+)
 from langchain_community.vectorstores import DocArrayInMemorySearch
+from calback_handler import PrintRetrievalHandler, StreamHandler
 from chat_profile import ChatProfileRoleEnum
 # configs
 LLM_MODEL_NAME = "gpt-3.5-turbo"
 @st.cache_resource(ttl="1h")
+def configure_retriever(files):
     # Read documents
     docs = []
     temp_dir = tempfile.TemporaryDirectory()
+    for file in files:
         temp_filepath = os.path.join(temp_dir.name, file.name)
         with open(temp_filepath, "wb") as f:
             f.write(file.getvalue())
             loader = Docx2txtLoader(temp_filepath)
         elif extension == ".txt":
             loader = TextLoader(temp_filepath)
+        elif extension == ".epub":
+            loader = UnstructuredEPubLoader(temp_filepath)
         else:
             st.write("This document format is not supported!")
             return None
 with st.sidebar.expander("Documents"):
     st.subheader("Files")
     uploaded_files = st.file_uploader(
+        label="Select files",
+        type=["pdf", "txt", "docx", "epub"],
+        accept_multiple_files=True,
     )
 with st.sidebar.expander("Setup"):
     st.subheader("API Key")
     openai_api_key = st.text_input("OpenAI API Key", type="password")
     st.stop()
 if uploaded_files:
+    result_retriever = configure_retriever(uploaded_files)
     memory = ConversationBufferMemory(
         memory_key="chat_history", chat_memory=msgs, return_messages=True
     )
     chain = ConversationalRetrievalChain.from_llm(
+        llm, retriever=result_retriever, memory=memory, verbose=False
     )
     avatars = {

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ streamlit
 streamlit_chat
 streamlit-extras
 pypdf
-docx2txt

 streamlit_chat
 streamlit-extras
 pypdf
+docx2txt
+unstructured