Spaces:
Running
Running
#**************** IMPORT PACKAGES ******************** | |
import gradio as gr | |
import numpy as np | |
import pytesseract as pt | |
import pdf2image | |
import os | |
import tempfile | |
from fpdf import FPDF | |
import re | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
from nltk.tokenize import word_tokenize | |
import os | |
import pdfkit | |
import yake | |
from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig | |
from summarizer import Summarizer,TransformerSummarizer | |
from transformers import pipelines | |
nltk.download('punkt') | |
model_name = 'nlpaueb/legal-bert-base-uncased' | |
# The setup of huggingface.co | |
custom_config = AutoConfig.from_pretrained(model_name) | |
custom_config.output_hidden_states=True | |
custom_tokenizer = AutoTokenizer.from_pretrained(model_name) | |
custom_model = AutoModel.from_pretrained(model_name, config=custom_config) | |
bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer) | |
from zipfile import ZipFile | |
from gtts import gTTS | |
from pdfminer.high_level import extract_text | |
def pdf_to_text(file_obj): | |
text = extract_text(file_obj.name) | |
myobj = gTTS(text=text, lang='en', slow=False) | |
myobj.save("test.wav") | |
return 'test.wav' | |
# path = folder_name | |
# return path | |
#pageObject.extractText() | |
iface = gr.Interface(fn = pdf_to_text, | |
inputs = "file", outputs="audio" ) | |
if __name__ == "__main__": | |
iface.launch(share=True) |