Spaces:
Runtime error
Runtime error
fix: not classifying .pdf files
Browse files
main.py
CHANGED
@@ -12,13 +12,8 @@ from transformers import pipeline
|
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
@app.get("/infer_t5")
|
19 |
-
def t5(input):
|
20 |
-
output = pipe_flan(input)
|
21 |
-
return {"output": output[0]["generated_text"]}
|
22 |
|
23 |
|
24 |
@app.post("/classify")
|
@@ -32,22 +27,22 @@ async def classify_doc(files: List[UploadFile] = File(...)):
|
|
32 |
|
33 |
if filename.endswith('.pdf'):
|
34 |
try:
|
35 |
-
pages = convert_from_bytes(
|
36 |
-
|
37 |
for pagenum, image in enumerate(pages):
|
38 |
-
|
39 |
-
|
40 |
-
classificationRes = classify_acct_dtype_str(image, filename)
|
41 |
-
|
42 |
# add/update classification result dictionary
|
43 |
if (classificationRes in classificationResults):
|
44 |
classificationResults.update({classificationRes : classificationResults[classificationRes] + 1})
|
45 |
else:
|
46 |
classificationResults.update({classificationRes : 1})
|
47 |
-
|
48 |
-
except:
|
49 |
-
|
|
|
50 |
|
|
|
51 |
else:
|
52 |
classificationRes = classify_acct_dtype_str(contents, filename)
|
53 |
|
@@ -281,8 +276,8 @@ def classify_acct_dtype_str(content, filename):
|
|
281 |
try:
|
282 |
ipt = Image.open(BytesIO(content))
|
283 |
dtype_inf, dtype_conf = doctype_classify(ipt, filename)
|
284 |
-
except:
|
285 |
-
return f"Error in opening {filename}"
|
286 |
|
287 |
return dtype_inf
|
288 |
|
|
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
15 |
+
"""## Poppler dir"""
|
16 |
+
poppler_path = "poppler-23.11.0/Library/bin"
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
@app.post("/classify")
|
|
|
27 |
|
28 |
if filename.endswith('.pdf'):
|
29 |
try:
|
30 |
+
pages = convert_from_bytes(contents, poppler_path = poppler_path)
|
31 |
+
print(pages)
|
32 |
for pagenum, image in enumerate(pages):
|
33 |
+
classificationRes, dtype_conf = doctype_classify(image.convert('RGB'), filename)
|
34 |
+
|
|
|
|
|
35 |
# add/update classification result dictionary
|
36 |
if (classificationRes in classificationResults):
|
37 |
classificationResults.update({classificationRes : classificationResults[classificationRes] + 1})
|
38 |
else:
|
39 |
classificationResults.update({classificationRes : 1})
|
40 |
+
|
41 |
+
except Exception as err:
|
42 |
+
print(err)
|
43 |
+
return f"Error in opening {filename}, {err}"
|
44 |
|
45 |
+
# png, jpg, jpeg files
|
46 |
else:
|
47 |
classificationRes = classify_acct_dtype_str(contents, filename)
|
48 |
|
|
|
276 |
try:
|
277 |
ipt = Image.open(BytesIO(content))
|
278 |
dtype_inf, dtype_conf = doctype_classify(ipt, filename)
|
279 |
+
except Exception as err:
|
280 |
+
return f"Error in opening {filename}, {err}"
|
281 |
|
282 |
return dtype_inf
|
283 |
|