ConradLax commited on
Commit
aa41255
·
1 Parent(s): c089b85

fix: not classifying .pdf files

Browse files
Files changed (1) hide show
  1. main.py +13 -18
main.py CHANGED
@@ -12,13 +12,8 @@ from transformers import pipeline
12
 
13
  app = FastAPI()
14
 
15
- pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")
16
-
17
-
18
- @app.get("/infer_t5")
19
- def t5(input):
20
- output = pipe_flan(input)
21
- return {"output": output[0]["generated_text"]}
22
 
23
 
24
  @app.post("/classify")
@@ -32,22 +27,22 @@ async def classify_doc(files: List[UploadFile] = File(...)):
32
 
33
  if filename.endswith('.pdf'):
34
  try:
35
- pages = convert_from_bytes(open(file, 'rb').read())
36
-
37
  for pagenum, image in enumerate(pages):
38
- # if pagenum != 0 and pagenum < len(pages):
39
- # if pagenum == 0:
40
- classificationRes = classify_acct_dtype_str(image, filename)
41
-
42
  # add/update classification result dictionary
43
  if (classificationRes in classificationResults):
44
  classificationResults.update({classificationRes : classificationResults[classificationRes] + 1})
45
  else:
46
  classificationResults.update({classificationRes : 1})
47
- # break
48
- except:
49
- return f"Error in opening {filename}"
 
50
 
 
51
  else:
52
  classificationRes = classify_acct_dtype_str(contents, filename)
53
 
@@ -281,8 +276,8 @@ def classify_acct_dtype_str(content, filename):
281
  try:
282
  ipt = Image.open(BytesIO(content))
283
  dtype_inf, dtype_conf = doctype_classify(ipt, filename)
284
- except:
285
- return f"Error in opening {filename}"
286
 
287
  return dtype_inf
288
 
 
12
 
13
  app = FastAPI()
14
 
15
+ """## Poppler dir"""
16
+ poppler_path = "poppler-23.11.0/Library/bin"
 
 
 
 
 
17
 
18
 
19
  @app.post("/classify")
 
27
 
28
  if filename.endswith('.pdf'):
29
  try:
30
+ pages = convert_from_bytes(contents, poppler_path = poppler_path)
31
+ print(pages)
32
  for pagenum, image in enumerate(pages):
33
+ classificationRes, dtype_conf = doctype_classify(image.convert('RGB'), filename)
34
+
 
 
35
  # add/update classification result dictionary
36
  if (classificationRes in classificationResults):
37
  classificationResults.update({classificationRes : classificationResults[classificationRes] + 1})
38
  else:
39
  classificationResults.update({classificationRes : 1})
40
+
41
+ except Exception as err:
42
+ print(err)
43
+ return f"Error in opening {filename}, {err}"
44
 
45
+ # png, jpg, jpeg files
46
  else:
47
  classificationRes = classify_acct_dtype_str(contents, filename)
48
 
 
276
  try:
277
  ipt = Image.open(BytesIO(content))
278
  dtype_inf, dtype_conf = doctype_classify(ipt, filename)
279
+ except Exception as err:
280
+ return f"Error in opening {filename}, {err}"
281
 
282
  return dtype_inf
283