Spaces:
Runtime error
Runtime error
from gtts import gTTS | |
import gradio as gr | |
from PyPDF2 import PdfFileReader | |
from googletrans import Translator | |
import googletrans | |
import numpy as np | |
import requests | |
from PIL import Image | |
import pytesseract | |
# from docx import Document | |
cnt = 0 | |
langues = googletrans.LANGUAGES | |
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" | |
headers = {"Authorization": "Bearer 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"} | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
def get_key(val): | |
for key, value in langues.items(): | |
if val == value: | |
return key | |
def read_article(file_name): | |
name = file_name.name.replace("\\",'/') | |
file = None | |
article = "" | |
if name.endswith(".txt"): | |
file = open(name, "r") | |
filedata = file.readlines() | |
for e in filedata : | |
article = article + e | |
if name.endswith(".pdf"): | |
# article = textract.process('document_path.PDF', method='PDFminer') | |
document = PdfFileReader(name)#open(name, 'rb')) | |
for page in range(document.numPages): | |
pageObj = document.getPage(page) | |
article += pageObj.extractText().replace('\n','') | |
if name.endswith(".docx"): | |
pass | |
# doc = Document(name) | |
# article = None | |
# for para in doc.paragraphs: | |
# article = article + para.text | |
if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"): | |
img = Image.open(name) | |
# path where the tesseract module is installed | |
pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe' | |
# converts the image to result and saves it into result variable | |
result = pytesseract.image_to_string(img) | |
return article | |
def translate_data(text, final_language): | |
translator = Translator() | |
translation = translator.translate(text, dest=get_key(final_language)) | |
return translation.text | |
def generate_summary(file_name, mode,final_language): | |
# Step 1 - Read text anc split it | |
global cnt | |
sentences = read_article(file_name) | |
translator = Translator() | |
# cnt +=1 | |
if mode == "traduction": | |
text_translate = translate_data(sentences,final_language) | |
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False) | |
#nous devrions vérifier si le fichier existe ou non avant de le supprimer. | |
if os.path.exists(f"audio_traduce{cnt}.wav"): | |
os.remove(f"audio_traduce{cnt}.wav") | |
else: | |
print("Impossible de supprimer le fichier car il n'existe pas") | |
myobj.save(f"audio_traduce{cnt}.wav") | |
return f"audio_traduce{cnt}.wav", text_translate | |
elif mode=="lecture": | |
text = translator.translate(sentences) | |
text_translate = sentences | |
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False) | |
if os.path.exists(f"audio_lecture{cnt}.wav"): | |
os.remove(f"audio_lecture{cnt}.wav") | |
else: | |
print("Impossible de supprimer le fichier car il n'existe pas") | |
myobj.save(f"audio_lecture{cnt}.wav") | |
return f"audio_lecture{cnt}.wav", text_translate | |
elif mode == "resume_et_traduire": | |
text_translate = query({"inputs": sentences,}) | |
text_translate = text_translate[0]['summary_text'] | |
text = translate_data(text_translate,final_language) | |
text_translate = text | |
myobj = gTTS(text=text, lang=get_key(final_language), slow=False) | |
if os.path.exists(f"audio_resume_traduire{cnt}.wav"): | |
os.remove(f"audio_resume_traduire{cnt}.wav") | |
else: | |
print("Impossible de supprimer le fichier car il n'existe pas") | |
myobj.save(f"audio_resume_traduire{cnt}.wav") | |
return f"audio_resume_traduire{cnt}.wav", text_translate | |
else: | |
text_translate = query({"inputs": sentences,}) | |
text_translate = text_translate[0]['summary_text'] | |
text = translator.translate(text_translate) | |
myobj = gTTS(text=text_translate, lang=text.src, slow=False) | |
if os.path.exists(f"audio_resume{cnt}.wav"): | |
os.remove(f"audio_resume{cnt}.wav") | |
else: | |
print("Impossible de supprimer le fichier car il n'existe pas") | |
myobj.save(f"audio_resume{cnt}.wav") | |
return f"audio_resume{cnt}.wav", text_translate | |
iface = gr.Interface( | |
fn=generate_summary, | |
inputs=[ | |
gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"), | |
gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"), | |
gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', | |
'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', | |
'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', | |
'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', | |
'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', | |
'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', | |
'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', | |
'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', | |
'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', | |
'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', | |
'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', | |
'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', | |
'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")], | |
outputs= [gr.outputs.Audio(type="file", label="Audio du livre") | |
,gr.outputs.Textbox(label="resultat")], | |
theme="dark-seafoam") | |
iface.launch() |