ConradLax commited on
Commit
16d1ed2
·
1 Parent(s): 1bfc1f2

fix: output not shown for all input docs

Browse files
Files changed (2) hide show
  1. main.py +132 -29
  2. static/script.js +4 -0
main.py CHANGED
@@ -2,7 +2,9 @@ from fastapi import FastAPI
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
4
  from fastapi import File, UploadFile
 
5
  from typing import List
 
6
 
7
  from transformers import pipeline
8
 
@@ -17,45 +19,141 @@ def t5(input):
17
  return {"output": output[0]["generated_text"]}
18
 
19
 
20
- # @app.post("/classify/")
21
- # async def classify_doc(file: UploadFile):
22
- # return {"file_size": len(file)}
23
-
24
  @app.post("/classify")
25
  async def classify_doc(files: List[UploadFile] = File(...)):
26
- # for file in files:
27
- # try:
28
- # contents = file.file.read()
29
- # filename = file.filename
30
- # classify_res = classify_acct_dtype_str(contents, filename)
31
- # except Exception as err:
32
- # print(Exception, err)
33
- # return {"message": "There was an error in uploading file(s)"}
34
- # finally:
35
- # file.file.close()
36
- return {"message": f"{[file.filename for file in files]} : {[getDocClassType(file) for file in files]}"}
37
 
38
 
39
- app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- @app.get("/")
42
- def index() -> FileResponse:
43
- return FileResponse(path="/app/static/index.html", media_type="text/html")
44
 
45
 
46
- def getDocClassType(file):
47
- classify_res = ""
 
48
  try:
49
- contents = file.file.read()
50
- filename = file.filename
51
- classify_res = classify_acct_dtype_str(contents, filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  except Exception as err:
53
  print(Exception, err)
54
- return {"Error in reading file."}
55
  finally:
56
  file.file.close()
57
 
58
- return classify_res
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  import re
@@ -161,9 +259,14 @@ import requests
161
  from io import BytesIO
162
 
163
  def classify_acct_dtype_str(content, filename):
164
- ipt = Image.open(BytesIO(content))
165
- dtype_inf, dtype_conf = doctype_classify(ipt, filename)
 
 
 
 
 
166
 
167
  return dtype_inf
168
 
169
- # classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
 
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
4
  from fastapi import File, UploadFile
5
+ from fastapi.responses import StreamingResponse
6
  from typing import List
7
+ from pdf2image import convert_from_bytes
8
 
9
  from transformers import pipeline
10
 
 
19
  return {"output": output[0]["generated_text"]}
20
 
21
 
 
 
 
 
22
  @app.post("/classify")
23
  async def classify_doc(files: List[UploadFile] = File(...)):
24
+ # classificationResults = {}
25
+ # for file in files:
26
+ # try:
27
+ # contents = file.file.read()
28
+ # filename = file.filename
 
 
 
 
 
 
29
 
30
 
31
+ # if filename.endswith('.pdf'):
32
+ # try:
33
+ # pages = convert_from_bytes(open(file, 'rb').read())
34
+
35
+ # for pagenum, image in enumerate(pages):
36
+ # if pagenum != 0 and pagenum < len(pages):
37
+ # classificationRes = classify_acct_dtype_str(contents, filename)
38
+ # classificationResults[f'{pagenum:02d}'] = {
39
+ # 'doctype': classificationRes
40
+ # }
41
+ # except:
42
+ # return f"Error in opening {filename}"
43
+
44
+ # else:
45
+ # classificationRes = classify_acct_dtype_str(contents, filename)
46
+ # classificationResults[f'{0:02d}'] = {
47
+ # 'doctype' : classificationRes
48
+ # }
49
+
50
+
51
+
52
+
53
+ # except Exception as err:
54
+ # print(Exception, err)
55
+ # return {"message": "There was an error in uploading file(s)"}
56
+ # finally:
57
+ # file.file.close()
58
+
59
+ # return StreamingResponse(
60
+ # iter(classificationResults.items()),
61
+ # media_type="text/csv",
62
+ # headers={"Content-Disposition": f"attachment; filename=data.csv"}
63
+ # )
64
+
65
+ return {"message": f"{[file.filename for file in files]} : {[classifyFiles(file) for file in files]}"}
66
+
67
 
 
 
 
68
 
69
 
70
+
71
+
72
+ def classifyFiles(file):
73
  try:
74
+ contents = file.file.read()
75
+ filename = file.filename
76
+
77
+ classificationResults = []
78
+
79
+ if filename.endswith('.pdf'):
80
+ try:
81
+ pages = convert_from_bytes(open(file, 'rb').read())
82
+
83
+ for pagenum, image in enumerate(pages):
84
+ if pagenum != 0 and pagenum < len(pages):
85
+ classificationRes = classify_acct_dtype_str(contents, filename)
86
+ # classificationResults[f"{pagenum:02d}"] = {
87
+ # 'doctype': classificationRes
88
+ # }
89
+ except:
90
+ return f"Error in opening {filename}"
91
+
92
+ else:
93
+ classificationRes = classify_acct_dtype_str(contents, filename)
94
+ # classificationResults[f"{0:02d}"] = {
95
+ # 'doctype' : classificationRes
96
+ # }
97
+
98
+
99
+
100
+
101
  except Exception as err:
102
  print(Exception, err)
103
+ return {"message": "There was an error in uploading file(s)"}
104
  finally:
105
  file.file.close()
106
 
107
+ return classificationRes
108
+
109
+
110
+
111
+ # # dict for counting doctypes
112
+ # doctype_dict = dict()
113
+
114
+ # # writer for csv
115
+ # with open("{}/doctype_count.csv".format(output_dir), 'w', newline='') as outcsv:
116
+ # writer = csv.writer(outcsv)
117
+ # writer.writerow(["Type", "Count"])
118
+
119
+ # for file in files:
120
+ # try:
121
+ # contents = file.file.read()
122
+ # classify_res = classify_acct_dtype_str(BytesIO(contents))
123
+ # except Exception as err:
124
+ # print(Exception, err)
125
+ # return {"message": "There was an error in uploading file(s)"}
126
+ # finally:
127
+ # file.file.close()
128
+
129
+ # print(classify_res)
130
+
131
+ # if (classify_res in doctype_dict):
132
+ # doctype_dict.update({classify_res : doctype_dict[classify_res] + 1})
133
+ # else:
134
+ # doctype_dict.update({classify_res : 1})
135
+
136
+
137
+
138
+ # export_media_type = 'text/csv'
139
+ # export_headers = {
140
+ # "Content-Disposition": "attachment; filename={file_name}.csv".format(file_name="output")
141
+ # }
142
+ # return StreamingResponse(csv_file_binary, headers=export_headers, media_type=export_media_type)
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
151
+
152
+ @app.get("/")
153
+ def index() -> FileResponse:
154
+ return FileResponse(path="/app/static/index.html", media_type="text/html")
155
+
156
+
157
 
158
 
159
  import re
 
259
  from io import BytesIO
260
 
261
  def classify_acct_dtype_str(content, filename):
262
+ # response = requests.get("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
263
+ # ipt = Image.open(BytesIO(response.content))
264
+ try:
265
+ ipt = Image.open(BytesIO(content))
266
+ dtype_inf, dtype_conf = doctype_classify(ipt, filename)
267
+ except:
268
+ return f"Error in opening {filename}"
269
 
270
  return dtype_inf
271
 
272
+ # classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
static/script.js CHANGED
@@ -45,6 +45,7 @@ async function submitImages(acceptedFiles) {
45
  body: formData
46
  });
47
  let res = classifyResponse.json();
 
48
  return res;
49
  }
50
 
@@ -54,9 +55,12 @@ let imageArray = [];
54
 
55
  inputImg.addEventListener("change", async event =>{
56
  const files = event.target.files;
 
57
  await submitImages(files).then((resultText) => {
58
  outputImg.innerText = "Result: " + resultText["message"];
59
  });
 
 
60
 
61
  const classifyBtn = document.getElementById('classify-btn');
62
  const outputPar = document.getElementById('test-output');
 
45
  body: formData
46
  });
47
  let res = classifyResponse.json();
48
+ console.log(res);
49
  return res;
50
  }
51
 
 
55
 
56
  inputImg.addEventListener("change", async event =>{
57
  const files = event.target.files;
58
+ console.log(files);
59
  await submitImages(files).then((resultText) => {
60
  outputImg.innerText = "Result: " + resultText["message"];
61
  });
62
+ // outputImg.innerText = "Result is: " + await submitImages(files);
63
+ });
64
 
65
  const classifyBtn = document.getElementById('classify-btn');
66
  const outputPar = document.getElementById('test-output');