ConradLax commited on
Commit
3393a89
·
1 Parent(s): 16d1ed2

feat: downloadable .csv output

Browse files
Files changed (3) hide show
  1. main.py +52 -35
  2. static/index.html +11 -23
  3. static/script.js +24 -54
main.py CHANGED
@@ -5,6 +5,8 @@ from fastapi import File, UploadFile
5
  from fastapi.responses import StreamingResponse
6
  from typing import List
7
  from pdf2image import convert_from_bytes
 
 
8
 
9
  from transformers import pipeline
10
 
@@ -21,48 +23,63 @@ def t5(input):
21
 
22
  @app.post("/classify")
23
  async def classify_doc(files: List[UploadFile] = File(...)):
24
- # classificationResults = {}
25
- # for file in files:
26
- # try:
27
- # contents = file.file.read()
28
- # filename = file.filename
29
-
30
-
31
- # if filename.endswith('.pdf'):
32
- # try:
33
- # pages = convert_from_bytes(open(file, 'rb').read())
34
-
35
- # for pagenum, image in enumerate(pages):
36
- # if pagenum != 0 and pagenum < len(pages):
37
- # classificationRes = classify_acct_dtype_str(contents, filename)
38
- # classificationResults[f'{pagenum:02d}'] = {
39
- # 'doctype': classificationRes
40
- # }
41
- # except:
42
- # return f"Error in opening {filename}"
 
 
 
 
 
43
 
44
- # else:
45
- # classificationRes = classify_acct_dtype_str(contents, filename)
46
- # classificationResults[f'{0:02d}'] = {
47
- # 'doctype' : classificationRes
48
- # }
 
 
 
49
 
50
 
51
 
52
 
53
- # except Exception as err:
54
- # print(Exception, err)
55
- # return {"message": "There was an error in uploading file(s)"}
56
- # finally:
57
- # file.file.close()
58
 
59
- # return StreamingResponse(
60
- # iter(classificationResults.items()),
61
- # media_type="text/csv",
62
- # headers={"Content-Disposition": f"attachment; filename=data.csv"}
63
- # )
 
 
 
 
 
 
 
64
 
65
- return {"message": f"{[file.filename for file in files]} : {[classifyFiles(file) for file in files]}"}
66
 
67
 
68
 
 
5
  from fastapi.responses import StreamingResponse
6
  from typing import List
7
  from pdf2image import convert_from_bytes
8
+ import csv
9
+ import io
10
 
11
  from transformers import pipeline
12
 
 
23
 
24
  @app.post("/classify")
25
  async def classify_doc(files: List[UploadFile] = File(...)):
26
+ classificationResults = {}
27
+ for file in files:
28
+ try:
29
+ contents = file.file.read()
30
+ filename = file.filename
31
+
32
+
33
+ if filename.endswith('.pdf'):
34
+ try:
35
+ pages = convert_from_bytes(open(file, 'rb').read())
36
+
37
+ for pagenum, image in enumerate(pages):
38
+ # if pagenum != 0 and pagenum < len(pages):
39
+ # if pagenum == 0:
40
+ classificationRes = classify_acct_dtype_str(image, filename)
41
+
42
+ # add/update classification result dictionary
43
+ if (classificationRes in classificationResults):
44
+ classificationResults.update({classificationRes : classificationResults[classificationRes] + 1})
45
+ else:
46
+ classificationResults.update({classificationRes : 1})
47
+ # break
48
+ except:
49
+ return f"Error in opening {filename}"
50
 
51
+ else:
52
+ classificationRes = classify_acct_dtype_str(contents, filename)
53
+
54
+ # add/update classification result dictionary
55
+ if (classificationRes in classificationResults):
56
+ classificationResults.update({classificationRes : classificationResults[classificationRes] + 1})
57
+ else:
58
+ classificationResults.update({classificationRes : 1})
59
 
60
 
61
 
62
 
63
+ except Exception as err:
64
+ print(Exception, err)
65
+ return {"message": "There was an error in uploading file(s)"}
66
+ finally:
67
+ file.file.close()
68
 
69
+ # Convert dictionary to CSV string
70
+ csv_data = io.StringIO()
71
+ csv_writer = csv.writer(csv_data)
72
+ csv_writer.writerow(["Type", "Count"]) # Header row
73
+ for key, value in classificationResults.items():
74
+ csv_writer.writerow([key, value])
75
+
76
+ return StreamingResponse(
77
+ iter([csv_data.getvalue()]),
78
+ media_type="text/csv",
79
+ headers={"Content-Disposition": f"attachment; filename=data.csv"}
80
+ )
81
 
82
+ # return {"message": f"{[file.filename for file in files]} : {[classifyFiles(file) for file in files]}"}
83
 
84
 
85
 
static/index.html CHANGED
@@ -9,34 +9,22 @@
9
  </head>
10
  <body>
11
  <main>
12
- <section id="text-gen">
13
- <h1>Text generation using Flan T5</h1>
14
  <p>
15
  Model:
16
- <a
17
- href="https://huggingface.co/google/flan-t5-small"
18
- rel="noreferrer"
19
- target="_blank"
20
- >google/flan-t5-small</a
21
- >
22
  </p>
23
- <form class="text-gen-form">
24
- <label for="text-gen-input">Text prompt</label>
25
- <input
26
- id="text-gen-input"
27
- type="text"
28
- value="English: Translate There are many ducks. German:"
29
- />
30
- <button id="text-gen-submit">Submit</button>
31
- <p class="text-gen-output"></p>
32
  </form>
33
  </section>
34
- <section>
35
- <input id="img-input" type="file" multiple="multiple" accept="image/jpeg, image/png, image/jpg">
36
- <button id="classify-btn"></button>
37
- <p id="test-output"></p>
38
- <p id="img-output"></p>
39
- </section>
40
  </main>
41
  </body>
42
  </html>
 
9
  </head>
10
  <body>
11
  <main>
12
+ <section id="doc-classification">
13
+ <h1>Document classification using donutclassifier_acctdocs_by_doctype</h1>
14
  <p>
15
  Model:
16
+ <a
17
+ href="https://huggingface.co/calumpianojericho/donutclassifier_acctdocs_by_doctype"
18
+ rel="noreferrer"
19
+ target="_blank">donutclassifier_acctdocs_by_doctype</a>
 
 
20
  </p>
21
+ <form class="doc-classification-form">
22
+ <input id="doc-input" type="file" multiple="multiple" accept="image/jpeg, image/png, image/jpg">
23
+ <a id="doc-output-link" style="visibility: hidden;">
24
+ <button type="button">Download</button>
25
+ </a>
 
 
 
 
26
  </form>
27
  </section>
 
 
 
 
 
 
28
  </main>
29
  </body>
30
  </html>
static/script.js CHANGED
@@ -1,70 +1,40 @@
1
- const textGenForm = document.querySelector('.text-gen-form');
2
-
3
- const translateText = async (text) => {
4
- const inferResponse = await fetch(`infer_t5?input=${text}`);
5
- const inferJson = await inferResponse.json();
6
-
7
- return inferJson.output;
8
- };
9
-
10
- textGenForm.addEventListener('submit', async (event) => {
11
- event.preventDefault();
12
-
13
- const textGenInput = document.getElementById('text-gen-input');
14
- const textGenParagraph = document.querySelector('.text-gen-output');
15
-
16
- try {
17
- textGenParagraph.textContent = await translateText(textGenInput.value);
18
- } catch (err) {
19
- console.error(err);
20
- }
21
- });
22
-
23
-
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
32
-
33
-
34
-
35
-
36
- async function submitImages(acceptedFiles) {
37
  let formData = new FormData();
38
 
39
  for (const file of acceptedFiles){
40
  formData.append('files', file);
41
  }
42
-
43
  let classifyResponse = await fetch('classify', {
44
  method: 'POST',
45
  body: formData
46
  });
47
- let res = classifyResponse.json();
48
- console.log(res);
49
- return res;
50
  }
51
 
52
- const inputImg = document.getElementById('img-input');
53
- const outputImg = document.getElementById('img-output');
54
- let imageArray = [];
55
 
56
- inputImg.addEventListener("change", async event =>{
 
 
57
  const files = event.target.files;
58
- console.log(files);
59
- await submitImages(files).then((resultText) => {
60
- outputImg.innerText = "Result: " + resultText["message"];
61
- });
62
- // outputImg.innerText = "Result is: " + await submitImages(files);
63
- });
64
 
65
- const classifyBtn = document.getElementById('classify-btn');
66
- const outputPar = document.getElementById('test-output');
67
 
68
- classifyBtn.addEventListener('onclick', () => {
69
- outputPar.innerText = "asa";
 
 
 
 
 
 
 
 
 
 
70
  });
 
1
+ /* classify uploaded images using a Huggingface model */
2
+ async function classifyImages(acceptedFiles) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  let formData = new FormData();
4
 
5
  for (const file of acceptedFiles){
6
  formData.append('files', file);
7
  }
 
8
  let classifyResponse = await fetch('classify', {
9
  method: 'POST',
10
  body: formData
11
  });
12
+ return classifyResponse;
 
 
13
  }
14
 
15
+ const inputDoc = document.getElementById('doc-input');
16
+ const outputDocLink = document.getElementById('doc-output-link');
 
17
 
18
+ inputDoc.addEventListener("change", async event =>{
19
+ /* hide Download button when in classification process */
20
+ outputDocLink.style.visibility = "hidden";
21
  const files = event.target.files;
22
+
23
+ /* make Download button's visible, and add the link to download the resulting .csv file */
24
+ await classifyImages(files).then(async (result) => {
25
+ outputDocLink.style.visibility = "visible";
 
 
26
 
 
 
27
 
28
+ const disposition = result.headers.get('Content-Disposition');
29
+ const filenameRegex = /filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/;
30
+ const matches = filenameRegex.exec(disposition);
31
+ // const filename = matches != null && matches[1] ? matches[1].replace(/['"]/g, '') : 'data.csv';
32
+ const filename = 'data.csv';
33
+
34
+ const blob = await result.blob();
35
+ const url = window.URL.createObjectURL(blob);
36
+
37
+ // set download link to the download button
38
+ outputDocLink.href = url;
39
+ });
40
  });