Spaces:
Sleeping
Sleeping
gizemsarsinlar
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,33 +1,38 @@
|
|
1 |
import os
|
2 |
from typing import List
|
3 |
-
|
4 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.vectorstores import Chroma
|
7 |
-
from langchain.chains import
|
8 |
-
ConversationalRetrievalChain,
|
9 |
-
)
|
10 |
from langchain.chat_models import ChatOpenAI
|
11 |
-
|
12 |
from langchain.docstore.document import Document
|
13 |
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
|
14 |
-
|
15 |
import chainlit as cl
|
16 |
|
|
|
17 |
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
|
18 |
|
|
|
19 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
@cl.on_chat_start
|
23 |
async def on_chat_start():
|
24 |
files = None
|
25 |
|
26 |
-
#
|
27 |
-
while files
|
28 |
files = await cl.AskFileMessage(
|
29 |
-
content="Please upload a
|
30 |
-
accept=["
|
31 |
max_size_mb=20,
|
32 |
timeout=180,
|
33 |
).send()
|
@@ -37,23 +42,23 @@ async def on_chat_start():
|
|
37 |
msg = cl.Message(content=f"Processing `{file.name}`...")
|
38 |
await msg.send()
|
39 |
|
40 |
-
|
41 |
-
|
42 |
|
43 |
-
#
|
44 |
texts = text_splitter.split_text(text)
|
45 |
|
46 |
-
#
|
47 |
metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
|
48 |
|
49 |
-
#
|
50 |
embeddings = OpenAIEmbeddings()
|
51 |
docsearch = await cl.make_async(Chroma.from_texts)(
|
52 |
texts, embeddings, metadatas=metadatas
|
53 |
)
|
54 |
|
|
|
55 |
message_history = ChatMessageHistory()
|
56 |
-
|
57 |
memory = ConversationBufferMemory(
|
58 |
memory_key="chat_history",
|
59 |
output_key="answer",
|
@@ -61,7 +66,7 @@ async def on_chat_start():
|
|
61 |
return_messages=True,
|
62 |
)
|
63 |
|
64 |
-
#
|
65 |
chain = ConversationalRetrievalChain.from_llm(
|
66 |
ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
|
67 |
chain_type="stuff",
|
@@ -70,18 +75,19 @@ async def on_chat_start():
|
|
70 |
return_source_documents=True,
|
71 |
)
|
72 |
|
73 |
-
#
|
74 |
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
75 |
await msg.update()
|
76 |
|
|
|
77 |
cl.user_session.set("chain", chain)
|
78 |
|
79 |
-
|
80 |
@cl.on_message
|
81 |
async def main(message: cl.Message):
|
82 |
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
|
83 |
cb = cl.AsyncLangchainCallbackHandler()
|
84 |
|
|
|
85 |
res = await chain.acall(message.content, callbacks=[cb])
|
86 |
answer = res["answer"]
|
87 |
source_documents = res["source_documents"] # type: List[Document]
|
@@ -91,7 +97,7 @@ async def main(message: cl.Message):
|
|
91 |
if source_documents:
|
92 |
for source_idx, source_doc in enumerate(source_documents):
|
93 |
source_name = f"source_{source_idx}"
|
94 |
-
#
|
95 |
text_elements.append(
|
96 |
cl.Text(content=source_doc.page_content, name=source_name, display="side")
|
97 |
)
|
@@ -102,4 +108,5 @@ async def main(message: cl.Message):
|
|
102 |
else:
|
103 |
answer += "\nNo sources found"
|
104 |
|
105 |
-
|
|
|
|
1 |
import os
|
2 |
from typing import List
|
3 |
+
import fitz # PyMuPDF
|
4 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.vectorstores import Chroma
|
7 |
+
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
|
8 |
from langchain.chat_models import ChatOpenAI
|
|
|
9 |
from langchain.docstore.document import Document
|
10 |
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
|
|
|
11 |
import chainlit as cl
|
12 |
|
13 |
+
# OpenAI API anahtarını ayarla
|
14 |
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
|
15 |
|
16 |
+
# Metin bölme işlemi için ayarlar
|
17 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
18 |
|
19 |
+
# PDF dosyasını metne dönüştürme fonksiyonu
|
20 |
+
def extract_text_from_pdf(pdf_path: str) -> str:
|
21 |
+
doc = fitz.open(pdf_path)
|
22 |
+
text = ""
|
23 |
+
for page in doc:
|
24 |
+
text += page.get_text()
|
25 |
+
return text
|
26 |
|
27 |
@cl.on_chat_start
|
28 |
async def on_chat_start():
|
29 |
files = None
|
30 |
|
31 |
+
# Kullanıcının dosya yüklemesini bekle
|
32 |
+
while files is None:
|
33 |
files = await cl.AskFileMessage(
|
34 |
+
content="Please upload a PDF file to begin!",
|
35 |
+
accept=["application/pdf"], # PDF dosyalarını kabul et
|
36 |
max_size_mb=20,
|
37 |
timeout=180,
|
38 |
).send()
|
|
|
42 |
msg = cl.Message(content=f"Processing `{file.name}`...")
|
43 |
await msg.send()
|
44 |
|
45 |
+
# PDF dosyasını metne dönüştür
|
46 |
+
text = extract_text_from_pdf(file.path)
|
47 |
|
48 |
+
# Metni böl
|
49 |
texts = text_splitter.split_text(text)
|
50 |
|
51 |
+
# Her bir metin parçası için metadata oluştur
|
52 |
metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
|
53 |
|
54 |
+
# Chroma vektör depolama oluştur
|
55 |
embeddings = OpenAIEmbeddings()
|
56 |
docsearch = await cl.make_async(Chroma.from_texts)(
|
57 |
texts, embeddings, metadatas=metadatas
|
58 |
)
|
59 |
|
60 |
+
# Sohbet geçmişi ve hafıza yönetimi
|
61 |
message_history = ChatMessageHistory()
|
|
|
62 |
memory = ConversationBufferMemory(
|
63 |
memory_key="chat_history",
|
64 |
output_key="answer",
|
|
|
66 |
return_messages=True,
|
67 |
)
|
68 |
|
69 |
+
# Chroma vektör depolamayı kullanan bir zincir oluştur
|
70 |
chain = ConversationalRetrievalChain.from_llm(
|
71 |
ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
|
72 |
chain_type="stuff",
|
|
|
75 |
return_source_documents=True,
|
76 |
)
|
77 |
|
78 |
+
# Sistemin hazır olduğunu kullanıcıya bildir
|
79 |
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
80 |
await msg.update()
|
81 |
|
82 |
+
# Zinciri kullanıcı oturumunda sakla
|
83 |
cl.user_session.set("chain", chain)
|
84 |
|
|
|
85 |
@cl.on_message
|
86 |
async def main(message: cl.Message):
|
87 |
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
|
88 |
cb = cl.AsyncLangchainCallbackHandler()
|
89 |
|
90 |
+
# Kullanıcının mesajını işle
|
91 |
res = await chain.acall(message.content, callbacks=[cb])
|
92 |
answer = res["answer"]
|
93 |
source_documents = res["source_documents"] # type: List[Document]
|
|
|
97 |
if source_documents:
|
98 |
for source_idx, source_doc in enumerate(source_documents):
|
99 |
source_name = f"source_{source_idx}"
|
100 |
+
# Mesajda gösterilecek metin öğesini oluştur
|
101 |
text_elements.append(
|
102 |
cl.Text(content=source_doc.page_content, name=source_name, display="side")
|
103 |
)
|
|
|
108 |
else:
|
109 |
answer += "\nNo sources found"
|
110 |
|
111 |
+
# Sonucu kullanıcıya gönder
|
112 |
+
await cl.Message(content=answer, elements=text_elements).send()
|