Smart_Resume / app.py
ZELEFACK's picture
Update app.py
2093473
from gtts import gTTS
import gradio as gr
from PyPDF2 import PdfFileReader
from googletrans import Translator
import googletrans
import numpy as np
import requests
from PIL import Image
import pytesseract
import os
# from docx import Document
cnt = 0
langues = googletrans.LANGUAGES
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": "Bearer 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"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_key(val):
for key, value in langues.items():
if val == value:
return key
def read_article(file_name):
name = file_name.name.replace("\\",'/')
file = None
article = ""
if name.endswith(".txt"):
file = open(name, "r")
filedata = file.readlines()
for e in filedata :
article = article + e
if name.endswith(".pdf"):
# article = textract.process('document_path.PDF', method='PDFminer')
document = PdfFileReader(name)#open(name, 'rb'))
for page in range(document.numPages):
pageObj = document.getPage(page)
article += pageObj.extractText().replace('\n','')
if name.endswith(".docx"):
pass
# doc = Document(name)
# article = None
# for para in doc.paragraphs:
# article = article + para.text
if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"):
img = Image.open(name)
# path where the tesseract module is installed
pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
# converts the image to result and saves it into result variable
result = pytesseract.image_to_string(img)
return article
def translate_data(text, final_language):
translator = Translator()
translation = translator.translate(text, dest=get_key(final_language))
return translation.text
def generate_summary(file_name, mode,final_language):
# Step 1 - Read text anc split it
global cnt
sentences = read_article(file_name)
translator = Translator()
# cnt +=1
if mode == "traduction":
text_translate = translate_data(sentences,final_language)
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
#nous devrions vérifier si le fichier existe ou non avant de le supprimer.
if os.path.exists(f"audio_traduce{cnt}.wav"):
os.remove(f"audio_traduce{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_traduce{cnt}.wav")
return f"audio_traduce{cnt}.wav", text_translate
elif mode=="lecture":
text = translator.translate(sentences)
text_translate = sentences
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
if os.path.exists(f"audio_lecture{cnt}.wav"):
os.remove(f"audio_lecture{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_lecture{cnt}.wav")
return f"audio_lecture{cnt}.wav", text_translate
elif mode == "resume_et_traduire":
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translate_data(text_translate,final_language)
text_translate = text
myobj = gTTS(text=text, lang=get_key(final_language), slow=False)
if os.path.exists(f"audio_resume_traduire{cnt}.wav"):
os.remove(f"audio_resume_traduire{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_resume_traduire{cnt}.wav")
return f"audio_resume_traduire{cnt}.wav", text_translate
else:
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translator.translate(text_translate)
myobj = gTTS(text=text_translate, lang=text.src, slow=False)
if os.path.exists(f"audio_resume{cnt}.wav"):
os.remove(f"audio_resume{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_resume{cnt}.wav")
return f"audio_resume{cnt}.wav", text_translate
iface = gr.Interface(
fn=generate_summary,
inputs=[
gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"),
gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"),
gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani',
'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa',
'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish',
'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian',
'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian',
'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish',
'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)',
'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy',
'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali',
'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian',
'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian',
'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish',
'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")],
outputs= [gr.outputs.Audio(type="file", label="Audio du livre")
,gr.outputs.Textbox(label="resultat")],
theme="dark-seafoam")
iface.launch()