eight
Browse files
README.md
CHANGED
@@ -24,5 +24,6 @@ let user decide the value of chunk_size and top-k
|
|
24 |
|
25 |
tell it in the prompt directly how long the story should approximately be.
|
26 |
|
27 |
-
|
|
|
28 |
|
|
|
24 |
|
25 |
tell it in the prompt directly how long the story should approximately be.
|
26 |
|
27 |
+
14.05.2024
|
28 |
+
https://huggingface.co/spaces/cvachet/pdf-chatbot/blob/main/app.py
|
29 |
|
app.py
CHANGED
@@ -81,12 +81,16 @@ def generate(image, pdfs, temperature=0.9, max_new_tokens=1500, top_p=0.95, repe
|
|
81 |
for file in pdfs:
|
82 |
with open(file.name, "rb") as f:
|
83 |
state_of_the_union += loader.load(f)
|
|
|
|
|
|
|
84 |
|
85 |
# split the content into chunks
|
86 |
text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
87 |
# TokenTextSplitter() can ensure the integrity of words
|
88 |
# each chunk to overlap with the previous chunk by 20 tokens
|
89 |
-
texts = text_splitter.split_text(state_of_the_union)
|
|
|
90 |
print("...........................................")
|
91 |
# print the first chunk
|
92 |
print("text[0]: ", texts[0])
|
@@ -191,7 +195,7 @@ demo = gr.Interface(fn=generate,
|
|
191 |
#gr.Video(sources=["webcam"], label="video")
|
192 |
gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
|
193 |
|
194 |
-
gr.Files(
|
195 |
|
196 |
gr.Slider(
|
197 |
label="temperature",
|
|
|
81 |
for file in pdfs:
|
82 |
with open(file.name, "rb") as f:
|
83 |
state_of_the_union += loader.load(f)
|
84 |
+
# r: read
|
85 |
+
# b: binary. the file is opened in binary mode. for non-text file
|
86 |
+
|
87 |
|
88 |
# split the content into chunks
|
89 |
text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
90 |
# TokenTextSplitter() can ensure the integrity of words
|
91 |
# each chunk to overlap with the previous chunk by 20 tokens
|
92 |
+
#texts = text_splitter.split_text(state_of_the_union)
|
93 |
+
texts = text_splitter.split_documents(state_of_the_union)
|
94 |
print("...........................................")
|
95 |
# print the first chunk
|
96 |
print("text[0]: ", texts[0])
|
|
|
195 |
#gr.Video(sources=["webcam"], label="video")
|
196 |
gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
|
197 |
|
198 |
+
gr.Files(file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDFs"),
|
199 |
|
200 |
gr.Slider(
|
201 |
label="temperature",
|