Spaces:

gizemsarsinlar
/

BeyondGPT-Demo

Sleeping

App Files Files Community

gizemsarsinlar commited on Oct 2, 2024

Commit

b185d1e

verified ·

1 Parent(s): 418727d

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -21

app.py CHANGED Viewed

@@ -1,33 +1,38 @@
 import os
 from typing import List
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
-from langchain.chains import (
-    ConversationalRetrievalChain,
-)
 from langchain.chat_models import ChatOpenAI
 from langchain.docstore.document import Document
 from langchain.memory import ChatMessageHistory, ConversationBufferMemory
 import chainlit as cl
 os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 @cl.on_chat_start
 async def on_chat_start():
     files = None
-    # Wait for the user to upload a file
-    while files == None:
         files = await cl.AskFileMessage(
-            content="Please upload a text file to begin!",
-            accept=["text/plain"],
             max_size_mb=20,
             timeout=180,
         ).send()
@@ -37,23 +42,23 @@ async def on_chat_start():
     msg = cl.Message(content=f"Processing `{file.name}`...")
     await msg.send()
-    with open(file.path, "r", encoding="utf-8") as f:
-        text = f.read()
-    # Split the text into chunks
     texts = text_splitter.split_text(text)
-    # Create a metadata for each chunk
     metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
-    # Create a Chroma vector store
     embeddings = OpenAIEmbeddings()
     docsearch = await cl.make_async(Chroma.from_texts)(
         texts, embeddings, metadatas=metadatas
     )
     message_history = ChatMessageHistory()
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         output_key="answer",
@@ -61,7 +66,7 @@ async def on_chat_start():
         return_messages=True,
     )
-    # Create a chain that uses the Chroma vector store
     chain = ConversationalRetrievalChain.from_llm(
         ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
         chain_type="stuff",
@@ -70,18 +75,19 @@ async def on_chat_start():
         return_source_documents=True,
     )
-    # Let the user know that the system is ready
     msg.content = f"Processing `{file.name}` done. You can now ask questions!"
     await msg.update()
     cl.user_session.set("chain", chain)
 @cl.on_message
 async def main(message: cl.Message):
     chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
     cb = cl.AsyncLangchainCallbackHandler()
     res = await chain.acall(message.content, callbacks=[cb])
     answer = res["answer"]
     source_documents = res["source_documents"]  # type: List[Document]
@@ -91,7 +97,7 @@ async def main(message: cl.Message):
     if source_documents:
         for source_idx, source_doc in enumerate(source_documents):
             source_name = f"source_{source_idx}"
-            # Create the text element referenced in the message
             text_elements.append(
                 cl.Text(content=source_doc.page_content, name=source_name, display="side")
             )
@@ -102,4 +108,5 @@ async def main(message: cl.Message):
         else:
             answer += "\nNo sources found"
-    await cl.Message(content=answer, elements=text_elements).send()

 import os
 from typing import List
+import fitz  # PyMuPDF
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
+from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 from langchain.docstore.document import Document
 from langchain.memory import ChatMessageHistory, ConversationBufferMemory
 import chainlit as cl
+# OpenAI API anahtarını ayarla
 os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
+# Metin bölme işlemi için ayarlar
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+# PDF dosyasını metne dönüştürme fonksiyonu
+def extract_text_from_pdf(pdf_path: str) -> str:
+    doc = fitz.open(pdf_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
 @cl.on_chat_start
 async def on_chat_start():
     files = None
+    # Kullanıcının dosya yüklemesini bekle
+    while files is None:
         files = await cl.AskFileMessage(
+            content="Please upload a PDF file to begin!",
+            accept=["application/pdf"],  # PDF dosyalarını kabul et
             max_size_mb=20,
             timeout=180,
         ).send()
     msg = cl.Message(content=f"Processing `{file.name}`...")
     await msg.send()
+    # PDF dosyasını metne dönüştür
+    text = extract_text_from_pdf(file.path)
+    # Metni böl
     texts = text_splitter.split_text(text)
+    # Her bir metin parçası için metadata oluştur
     metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
+    # Chroma vektör depolama oluştur
     embeddings = OpenAIEmbeddings()
     docsearch = await cl.make_async(Chroma.from_texts)(
         texts, embeddings, metadatas=metadatas
     )
+    # Sohbet geçmişi ve hafıza yönetimi
     message_history = ChatMessageHistory()
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         output_key="answer",
         return_messages=True,
     )
+    # Chroma vektör depolamayı kullanan bir zincir oluştur
     chain = ConversationalRetrievalChain.from_llm(
         ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
         chain_type="stuff",
         return_source_documents=True,
     )
+    # Sistemin hazır olduğunu kullanıcıya bildir
     msg.content = f"Processing `{file.name}` done. You can now ask questions!"
     await msg.update()
+    # Zinciri kullanıcı oturumunda sakla
     cl.user_session.set("chain", chain)
 @cl.on_message
 async def main(message: cl.Message):
     chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
     cb = cl.AsyncLangchainCallbackHandler()
+    # Kullanıcının mesajını işle
     res = await chain.acall(message.content, callbacks=[cb])
     answer = res["answer"]
     source_documents = res["source_documents"]  # type: List[Document]
     if source_documents:
         for source_idx, source_doc in enumerate(source_documents):
             source_name = f"source_{source_idx}"
+            # Mesajda gösterilecek metin öğesini oluştur
             text_elements.append(
                 cl.Text(content=source_doc.page_content, name=source_name, display="side")
             )
         else:
             answer += "\nNo sources found"
+    # Sonucu kullanıcıya gönder
+    await cl.Message(content=answer, elements=text_elements).send()