Spaces:
Runtime error
Runtime error
add pdf capability
Browse files- app.py +18 -7
- requirements.txt +3 -1
app.py
CHANGED
@@ -10,7 +10,11 @@ from aimakerspace.openai_utils.prompts import (
|
|
10 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
11 |
from aimakerspace.vectordatabase import VectorDatabase
|
12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
|
|
|
|
13 |
import chainlit as cl
|
|
|
|
|
14 |
|
15 |
system_template = """\
|
16 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
@@ -47,24 +51,31 @@ class RetrievalAugmentedQAPipeline:
|
|
47 |
|
48 |
return {"response": generate_response(), "context": context_list}
|
49 |
|
50 |
-
text_splitter = CharacterTextSplitter()
|
51 |
-
|
52 |
|
53 |
def process_text_file(file: AskFileResponse):
|
54 |
-
|
55 |
|
56 |
-
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=
|
57 |
temp_file_path = temp_file.name
|
58 |
|
59 |
with open(temp_file_path, "wb") as f:
|
60 |
f.write(file.content)
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
return texts
|
66 |
|
67 |
|
|
|
68 |
@cl.on_chat_start
|
69 |
async def on_chat_start():
|
70 |
files = None
|
|
|
10 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
11 |
from aimakerspace.vectordatabase import VectorDatabase
|
12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
13 |
+
from langchain_experimental.text_splitter import SemanticChunker
|
14 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
15 |
import chainlit as cl
|
16 |
+
import tempfile
|
17 |
+
from langchain_community.document_loaders.pdf import PyPDFLoader
|
18 |
|
19 |
system_template = """\
|
20 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
|
51 |
|
52 |
return {"response": generate_response(), "context": context_list}
|
53 |
|
54 |
+
# text_splitter = CharacterTextSplitter()
|
55 |
+
text_splitter = SemanticChunker(OpenAIEmbeddings(), breakpoint_threshold_type="standard_deviation")
|
56 |
|
57 |
def process_text_file(file: AskFileResponse):
|
58 |
+
|
59 |
|
60 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=file.name) as temp_file:
|
61 |
temp_file_path = temp_file.name
|
62 |
|
63 |
with open(temp_file_path, "wb") as f:
|
64 |
f.write(file.content)
|
65 |
|
66 |
+
if file.type == 'text/plain':
|
67 |
+
text_loader = TextFileLoader(temp_file_path)
|
68 |
+
documents = text_loader.load_documents()
|
69 |
+
elif file.type == 'application/pdf':
|
70 |
+
pdf_loader = PyPDFLoader(temp_file_path)
|
71 |
+
documents = pdf_loader.load()
|
72 |
+
else:
|
73 |
+
raise ValueError("Provide a .txt or .pdf file")
|
74 |
+
texts = [x.page_content for x in text_splitter.transform_documents(documents)]
|
75 |
return texts
|
76 |
|
77 |
|
78 |
+
|
79 |
@cl.on_chat_start
|
80 |
async def on_chat_start():
|
81 |
files = None
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
numpy
|
2 |
chainlit==0.7.700
|
3 |
-
openai
|
|
|
|
|
|
1 |
numpy
|
2 |
chainlit==0.7.700
|
3 |
+
openai
|
4 |
+
langchain_experimental
|
5 |
+
langchain_openai
|