gizemsarsinlar commited on
Commit
b185d1e
·
verified ·
1 Parent(s): 418727d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -1,33 +1,38 @@
1
  import os
2
  from typing import List
3
-
4
  from langchain.embeddings.openai import OpenAIEmbeddings
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.vectorstores import Chroma
7
- from langchain.chains import (
8
- ConversationalRetrievalChain,
9
- )
10
  from langchain.chat_models import ChatOpenAI
11
-
12
  from langchain.docstore.document import Document
13
  from langchain.memory import ChatMessageHistory, ConversationBufferMemory
14
-
15
  import chainlit as cl
16
 
 
17
  os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
18
 
 
19
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
20
 
 
 
 
 
 
 
 
21
 
22
  @cl.on_chat_start
23
  async def on_chat_start():
24
  files = None
25
 
26
- # Wait for the user to upload a file
27
- while files == None:
28
  files = await cl.AskFileMessage(
29
- content="Please upload a text file to begin!",
30
- accept=["text/plain"],
31
  max_size_mb=20,
32
  timeout=180,
33
  ).send()
@@ -37,23 +42,23 @@ async def on_chat_start():
37
  msg = cl.Message(content=f"Processing `{file.name}`...")
38
  await msg.send()
39
 
40
- with open(file.path, "r", encoding="utf-8") as f:
41
- text = f.read()
42
 
43
- # Split the text into chunks
44
  texts = text_splitter.split_text(text)
45
 
46
- # Create a metadata for each chunk
47
  metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
48
 
49
- # Create a Chroma vector store
50
  embeddings = OpenAIEmbeddings()
51
  docsearch = await cl.make_async(Chroma.from_texts)(
52
  texts, embeddings, metadatas=metadatas
53
  )
54
 
 
55
  message_history = ChatMessageHistory()
56
-
57
  memory = ConversationBufferMemory(
58
  memory_key="chat_history",
59
  output_key="answer",
@@ -61,7 +66,7 @@ async def on_chat_start():
61
  return_messages=True,
62
  )
63
 
64
- # Create a chain that uses the Chroma vector store
65
  chain = ConversationalRetrievalChain.from_llm(
66
  ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
67
  chain_type="stuff",
@@ -70,18 +75,19 @@ async def on_chat_start():
70
  return_source_documents=True,
71
  )
72
 
73
- # Let the user know that the system is ready
74
  msg.content = f"Processing `{file.name}` done. You can now ask questions!"
75
  await msg.update()
76
 
 
77
  cl.user_session.set("chain", chain)
78
 
79
-
80
  @cl.on_message
81
  async def main(message: cl.Message):
82
  chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
83
  cb = cl.AsyncLangchainCallbackHandler()
84
 
 
85
  res = await chain.acall(message.content, callbacks=[cb])
86
  answer = res["answer"]
87
  source_documents = res["source_documents"] # type: List[Document]
@@ -91,7 +97,7 @@ async def main(message: cl.Message):
91
  if source_documents:
92
  for source_idx, source_doc in enumerate(source_documents):
93
  source_name = f"source_{source_idx}"
94
- # Create the text element referenced in the message
95
  text_elements.append(
96
  cl.Text(content=source_doc.page_content, name=source_name, display="side")
97
  )
@@ -102,4 +108,5 @@ async def main(message: cl.Message):
102
  else:
103
  answer += "\nNo sources found"
104
 
105
- await cl.Message(content=answer, elements=text_elements).send()
 
 
1
  import os
2
  from typing import List
3
+ import fitz # PyMuPDF
4
  from langchain.embeddings.openai import OpenAIEmbeddings
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.vectorstores import Chroma
7
+ from langchain.chains import ConversationalRetrievalChain
 
 
8
  from langchain.chat_models import ChatOpenAI
 
9
  from langchain.docstore.document import Document
10
  from langchain.memory import ChatMessageHistory, ConversationBufferMemory
 
11
  import chainlit as cl
12
 
13
+ # OpenAI API anahtarını ayarla
14
  os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
15
 
16
+ # Metin bölme işlemi için ayarlar
17
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
18
 
19
+ # PDF dosyasını metne dönüştürme fonksiyonu
20
+ def extract_text_from_pdf(pdf_path: str) -> str:
21
+ doc = fitz.open(pdf_path)
22
+ text = ""
23
+ for page in doc:
24
+ text += page.get_text()
25
+ return text
26
 
27
  @cl.on_chat_start
28
  async def on_chat_start():
29
  files = None
30
 
31
+ # Kullanıcının dosya yüklemesini bekle
32
+ while files is None:
33
  files = await cl.AskFileMessage(
34
+ content="Please upload a PDF file to begin!",
35
+ accept=["application/pdf"], # PDF dosyalarını kabul et
36
  max_size_mb=20,
37
  timeout=180,
38
  ).send()
 
42
  msg = cl.Message(content=f"Processing `{file.name}`...")
43
  await msg.send()
44
 
45
+ # PDF dosyasını metne dönüştür
46
+ text = extract_text_from_pdf(file.path)
47
 
48
+ # Metni böl
49
  texts = text_splitter.split_text(text)
50
 
51
+ # Her bir metin parçası için metadata oluştur
52
  metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
53
 
54
+ # Chroma vektör depolama oluştur
55
  embeddings = OpenAIEmbeddings()
56
  docsearch = await cl.make_async(Chroma.from_texts)(
57
  texts, embeddings, metadatas=metadatas
58
  )
59
 
60
+ # Sohbet geçmişi ve hafıza yönetimi
61
  message_history = ChatMessageHistory()
 
62
  memory = ConversationBufferMemory(
63
  memory_key="chat_history",
64
  output_key="answer",
 
66
  return_messages=True,
67
  )
68
 
69
+ # Chroma vektör depolamayı kullanan bir zincir oluştur
70
  chain = ConversationalRetrievalChain.from_llm(
71
  ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
72
  chain_type="stuff",
 
75
  return_source_documents=True,
76
  )
77
 
78
+ # Sistemin hazır olduğunu kullanıcıya bildir
79
  msg.content = f"Processing `{file.name}` done. You can now ask questions!"
80
  await msg.update()
81
 
82
+ # Zinciri kullanıcı oturumunda sakla
83
  cl.user_session.set("chain", chain)
84
 
 
85
  @cl.on_message
86
  async def main(message: cl.Message):
87
  chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
88
  cb = cl.AsyncLangchainCallbackHandler()
89
 
90
+ # Kullanıcının mesajını işle
91
  res = await chain.acall(message.content, callbacks=[cb])
92
  answer = res["answer"]
93
  source_documents = res["source_documents"] # type: List[Document]
 
97
  if source_documents:
98
  for source_idx, source_doc in enumerate(source_documents):
99
  source_name = f"source_{source_idx}"
100
+ # Mesajda gösterilecek metin öğesini oluştur
101
  text_elements.append(
102
  cl.Text(content=source_doc.page_content, name=source_name, display="side")
103
  )
 
108
  else:
109
  answer += "\nNo sources found"
110
 
111
+ # Sonucu kullanıcıya gönder
112
+ await cl.Message(content=answer, elements=text_elements).send()