tferhan commited on
Commit
2b9b9f9
·
verified ·
1 Parent(s): d916c61

Update document_scrapped.py

Browse files
Files changed (1) hide show
  1. document_scrapped.py +6 -5
document_scrapped.py CHANGED
@@ -9,6 +9,7 @@ from io import BytesIO
9
  import chardet
10
  from docx import Document
11
  import pandas as pd
 
12
  from io import BytesIO
13
  from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
14
  from pdfminer.converter import TextConverter
@@ -195,17 +196,17 @@ def get_data(url):
195
  ext = jo.split(".")[-1]
196
  if ext == 'xlsx' or ext == 'xls' or ext == 'xlsm':
197
  rs = excel(jo)
198
- return rs
199
  elif ext == 'pdf':
200
  rs = pdf(jo)
201
- return rs
202
  elif ext == 'docx' or ext == 'doc':
203
  rs = docx(jo)
204
- return rs
205
  elif ext == 'csv':
206
  rs = csv(jo)
207
- return rs
208
  elif ext == 'pptx' or ext == 'ppt':
209
  rs = pptx(jo)
210
- return rs
211
  return "No data returned"
 
9
  import chardet
10
  from docx import Document
11
  import pandas as pd
12
+ from sumarize import summarize
13
  from io import BytesIO
14
  from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
15
  from pdfminer.converter import TextConverter
 
196
  ext = jo.split(".")[-1]
197
  if ext == 'xlsx' or ext == 'xls' or ext == 'xlsm':
198
  rs = excel(jo)
199
+ return summarize.invoke({"input":rs})
200
  elif ext == 'pdf':
201
  rs = pdf(jo)
202
+ return summarize.invoke({"input":rs})
203
  elif ext == 'docx' or ext == 'doc':
204
  rs = docx(jo)
205
+ return summarize.invoke({"input":rs})
206
  elif ext == 'csv':
207
  rs = csv(jo)
208
+ return summarize.invoke({"input":rs})
209
  elif ext == 'pptx' or ext == 'ppt':
210
  rs = pptx(jo)
211
+ return summarize.invoke({"input":rs})
212
  return "No data returned"