yiyii commited on
Commit
5668875
1 Parent(s): f00cb87
Files changed (2) hide show
  1. README.md +2 -1
  2. app.py +6 -2
README.md CHANGED
@@ -24,5 +24,6 @@ let user decide the value of chunk_size and top-k
24
 
25
  tell it in the prompt directly how long the story should approximately be.
26
 
27
-
 
28
 
 
24
 
25
  tell it in the prompt directly how long the story should approximately be.
26
 
27
+ 14.05.2024
28
+ https://huggingface.co/spaces/cvachet/pdf-chatbot/blob/main/app.py
29
 
app.py CHANGED
@@ -81,12 +81,16 @@ def generate(image, pdfs, temperature=0.9, max_new_tokens=1500, top_p=0.95, repe
81
  for file in pdfs:
82
  with open(file.name, "rb") as f:
83
  state_of_the_union += loader.load(f)
 
 
 
84
 
85
  # split the content into chunks
86
  text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
87
  # TokenTextSplitter() can ensure the integrity of words
88
  # each chunk to overlap with the previous chunk by 20 tokens
89
- texts = text_splitter.split_text(state_of_the_union)
 
90
  print("...........................................")
91
  # print the first chunk
92
  print("text[0]: ", texts[0])
@@ -191,7 +195,7 @@ demo = gr.Interface(fn=generate,
191
  #gr.Video(sources=["webcam"], label="video")
192
  gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
193
 
194
- gr.Files(label="Upload PDFs", type="file", accept=".pdf", multiple=True),
195
 
196
  gr.Slider(
197
  label="temperature",
 
81
  for file in pdfs:
82
  with open(file.name, "rb") as f:
83
  state_of_the_union += loader.load(f)
84
+ # r: read
85
+ # b: binary. the file is opened in binary mode. for non-text file
86
+
87
 
88
  # split the content into chunks
89
  text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
90
  # TokenTextSplitter() can ensure the integrity of words
91
  # each chunk to overlap with the previous chunk by 20 tokens
92
+ #texts = text_splitter.split_text(state_of_the_union)
93
+ texts = text_splitter.split_documents(state_of_the_union)
94
  print("...........................................")
95
  # print the first chunk
96
  print("text[0]: ", texts[0])
 
195
  #gr.Video(sources=["webcam"], label="video")
196
  gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
197
 
198
+ gr.Files(file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDFs"),
199
 
200
  gr.Slider(
201
  label="temperature",