tdeshane commited on
Commit
95b96e9
1 Parent(s): f9c8754

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -20
app.py CHANGED
@@ -59,40 +59,57 @@ def process_file(file: cl.AskFileMessage):
59
  return texts
60
 
61
 
 
62
  @cl.on_chat_start
63
  async def on_chat_start():
64
  files = None
65
 
66
  # Wait for the user to upload a file
67
- while files == None:
 
68
  files = await cl.AskFileMessage(
69
- content="Please upload a PDF file to begin!",
70
- accept=["application/pdf"],
71
- max_size_mb=20,
72
  timeout=180,
73
  ).send()
74
 
75
  file = files[0]
76
 
77
- msg = cl.Message(
78
- content=f"Processing `{file.name}`..."
79
- )
80
  await msg.send()
81
 
82
- # load the file
83
- texts = process_file(file)
84
-
85
- print(texts[0])
86
 
87
- # Create a metadata for each chunk
88
- metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
89
-
90
- # Create a Chroma vector store
91
- embeddings = OpenAIEmbeddings()
92
- docsearch = await cl.make_async(Chroma.from_texts)(
93
- texts, embeddings, metadatas=metadatas
94
- )
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  message_history = ChatMessageHistory()
97
 
98
  memory = ConversationBufferMemory(
@@ -102,7 +119,6 @@ async def on_chat_start():
102
  return_messages=True,
103
  )
104
 
105
- # Create a chain that uses the Chroma vector store
106
  chain = ConversationalRetrievalChain.from_llm(
107
  ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
108
  chain_type="stuff",
 
59
  return texts
60
 
61
 
62
+
63
  @cl.on_chat_start
64
  async def on_chat_start():
65
  files = None
66
 
67
  # Wait for the user to upload a file
68
+ while files is None:
69
+ # Note: This now accepts both text/plain and application/pdf files
70
  files = await cl.AskFileMessage(
71
+ content="Please upload a text or PDF file to begin!",
72
+ accept=["text/plain", "application/pdf"],
73
+ max_size_mb=20, # Assuming PDFs might be larger
74
  timeout=180,
75
  ).send()
76
 
77
  file = files[0]
78
 
79
+ # Notify the user that their file is being processed
80
+ msg = cl.Message(content=f"Processing `{file.name}`...")
 
81
  await msg.send()
82
 
83
+ # Initialize an empty list for texts, this will be populated based on file type
84
+ texts = []
 
 
85
 
86
+ # Check the file type and process accordingly
87
+ if file.content_type == "text/plain":
88
+ # Handle text file
89
+ with open(file.path, "r", encoding="utf-8") as f:
90
+ text = f.read()
91
+ texts.append(text) # Add the text to the texts list
 
 
92
 
93
+ # Update the user about the text file
94
+ await cl.Message(
95
+ content=f"`{file.name}` uploaded, it contains {len(text)} characters!"
96
+ ).send()
97
+
98
+ elif file.content_type == "application/pdf":
99
+ # Handle PDF file
100
+ texts = process_file(file) # Assuming process_file() is a function you've defined to extract text from PDF
101
+
102
+ # Create metadata for each chunk
103
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
104
+
105
+ # Create a Chroma vector store
106
+ embeddings = OpenAIEmbeddings()
107
+ docsearch = await cl.make_async(Chroma.from_texts)(
108
+ texts, embeddings, metadatas=metadatas
109
+ )
110
+
111
+ # The rest of your setup, like creating the chain, goes here
112
+ # This part is unchanged from your second snippet
113
  message_history = ChatMessageHistory()
114
 
115
  memory = ConversationBufferMemory(
 
119
  return_messages=True,
120
  )
121
 
 
122
  chain = ConversationalRetrievalChain.from_llm(
123
  ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
124
  chain_type="stuff",