File size: 1,459 Bytes
5cddb5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#**************** IMPORT PACKAGES ********************
import gradio as gr
import numpy as np
import pytesseract as pt
import pdf2image
import os
import tempfile
from fpdf import FPDF
import re
import pdfkit
import yake
from zipfile import ZipFile
from gtts import gTTS
from pdfminer.high_level import extract_text


def pdf_to_text(text, PDF):
   if text == "":
      # The setup of huggingface.co
      file_obj = PDF
      #n = int(Percent.replace('%', ''))

      text = extract_text(file_obj.name)
      outpit_text = text
   else:   
      output_text = text    
      # Generate Summary
      
      summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
      output_text  = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
   
   
   pdf = FPDF()  
   pdf.add_page()
   pdf.set_font("Times", size = 12)
  # open the text file in read mode
   f = output_text
  # insert the texts in pdf
   pdf.multi_cell(190, 10, txt = f, align = 'C')
  # save the pdf with name .pdf
   pdf.output("text.pdf")  
     
   myobj = gTTS(text=output_text, lang='en', slow=False)
   myobj.save("audio.wav")
   
   return  "audio.wav", output_text, "text.pdf"
   
    
  #  return path 
    #pageObject.extractText() 
iface = gr.Interface(fn = pdf_to_text,
   inputs =["text", "file"], outputs=["audio","text", "file"] )

if __name__ == "__main__":
    iface.launch(share=True)