Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
6 |
import os
|
7 |
from threading import Thread
|
8 |
|
9 |
-
import
|
10 |
import docx
|
11 |
from pptx import Presentation
|
12 |
|
@@ -56,7 +56,7 @@ def extract_text(path):
|
|
56 |
return open(path, 'r').read()
|
57 |
|
58 |
def extract_pdf(path):
|
59 |
-
doc =
|
60 |
text = ""
|
61 |
for page in doc:
|
62 |
text += page.get_text()
|
@@ -82,12 +82,13 @@ def extract_pptx(path):
|
|
82 |
def mode_load(path):
|
83 |
choice = ""
|
84 |
file_type = path.split(".")[-1]
|
|
|
85 |
if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
|
86 |
-
if file_type.endswith("
|
87 |
content = extract_pdf(path)
|
88 |
-
elif file_type.endswith("
|
89 |
content = extract_docx(path)
|
90 |
-
elif file_type.endswith("
|
91 |
content = extract_pptx(path)
|
92 |
else:
|
93 |
content = extract_text(path)
|
|
|
6 |
import os
|
7 |
from threading import Thread
|
8 |
|
9 |
+
import pymupdf
|
10 |
import docx
|
11 |
from pptx import Presentation
|
12 |
|
|
|
56 |
return open(path, 'r').read()
|
57 |
|
58 |
def extract_pdf(path):
|
59 |
+
doc = pymupdf.open(path)
|
60 |
text = ""
|
61 |
for page in doc:
|
62 |
text += page.get_text()
|
|
|
82 |
def mode_load(path):
|
83 |
choice = ""
|
84 |
file_type = path.split(".")[-1]
|
85 |
+
print(file_type)
|
86 |
if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
|
87 |
+
if file_type.endswith("pdf"):
|
88 |
content = extract_pdf(path)
|
89 |
+
elif file_type.endswith("docx"):
|
90 |
content = extract_docx(path)
|
91 |
+
elif file_type.endswith("pptx"):
|
92 |
content = extract_pptx(path)
|
93 |
else:
|
94 |
content = extract_text(path)
|