Soumen commited on
Commit
5496661
1 Parent(s): e993791

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -32,6 +32,7 @@ import numpy as np
32
  import pytesseract
33
  import line_cor
34
  import altair as alt
 
35
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
36
  from PIL import Image
37
  API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
@@ -64,18 +65,15 @@ def read_pdf(file):
64
 
65
  @st.cache(suppress_st_warning=True)
66
  def engsum(output):
67
- def query(payload):
68
- response = requests.post(API_URL1, headers=headers1, json=payload)
69
- return response.json()
70
-
71
- out = query({
72
- "inputs": output,
73
- "min_length":450,
74
- "max_length": 650
75
- })
76
- if isinstance(out, list) and out[0].get("generated_text"):
77
- text_output = out[0]["generated_text"]
78
- st.success(text_output)
79
  @st.cache(suppress_st_warning=True)
80
  def bansum(text):
81
  def query(payload):
 
32
  import pytesseract
33
  import line_cor
34
  import altair as alt
35
+ from transformers import AutoTokenizer, AutoModelWithLMHead
36
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
37
  from PIL import Image
38
  API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
 
65
 
66
  @st.cache(suppress_st_warning=True)
67
  def engsum(output):
68
+ tokenizer = AutoTokenizer.from_pretrained('t5-base')
69
+ model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
70
+ #st.text("Using Google T5 Transformer ..")
71
+ inputs = tokenizer.encode("summarize: " + text,return_tensors='pt',
72
+ max_length= 512,
73
+ truncation=True)
74
+ summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
75
+ summary = tokenizer.decode(summary_ids[0])
76
+ st.success(text_output)
 
 
 
77
  @st.cache(suppress_st_warning=True)
78
  def bansum(text):
79
  def query(payload):