aritheanalyst commited on
Commit
5cddb5b
1 Parent(s): 0f029ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #**************** IMPORT PACKAGES ********************
2
+ import gradio as gr
3
+ import numpy as np
4
+ import pytesseract as pt
5
+ import pdf2image
6
+ import os
7
+ import tempfile
8
+ from fpdf import FPDF
9
+ import re
10
+ import pdfkit
11
+ import yake
12
+ from zipfile import ZipFile
13
+ from gtts import gTTS
14
+ from pdfminer.high_level import extract_text
15
+
16
+
17
+ def pdf_to_text(text, PDF):
18
+ if text == "":
19
+ # The setup of huggingface.co
20
+ file_obj = PDF
21
+ #n = int(Percent.replace('%', ''))
22
+
23
+ text = extract_text(file_obj.name)
24
+ outpit_text = text
25
+ else:
26
+ output_text = text
27
+ # Generate Summary
28
+
29
+ summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
30
+ output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
31
+
32
+
33
+ pdf = FPDF()
34
+ pdf.add_page()
35
+ pdf.set_font("Times", size = 12)
36
+ # open the text file in read mode
37
+ f = output_text
38
+ # insert the texts in pdf
39
+ pdf.multi_cell(190, 10, txt = f, align = 'C')
40
+ # save the pdf with name .pdf
41
+ pdf.output("text.pdf")
42
+
43
+ myobj = gTTS(text=output_text, lang='en', slow=False)
44
+ myobj.save("audio.wav")
45
+
46
+ return "audio.wav", output_text, "text.pdf"
47
+
48
+
49
+ # return path
50
+ #pageObject.extractText()
51
+ iface = gr.Interface(fn = pdf_to_text,
52
+ inputs =["text", "file"], outputs=["audio","text", "file"] )
53
+
54
+ if __name__ == "__main__":
55
+ iface.launch(share=True)