Spaces:
Runtime error
Runtime error
File size: 6,307 Bytes
62bf155 ae4e758 78f64bc ae4e758 f9d96c0 ae4e758 f9d96c0 ae4e758 f9d96c0 ae4e758 f9d96c0 ae4e758 f9d96c0 ae4e758 e5b9119 52603c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from gtts import gTTS
import gradio as gr
from PyPDF2 import PdfFileReader
from googletrans import Translator
import googletrans
import numpy as np
import requests
from PIL import Image
import pytesseract
# from docx import Document
cnt = 0
langues = googletrans.LANGUAGES
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": "Bearer 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"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_key(val):
for key, value in langues.items():
if val == value:
return key
def read_article(file_name):
name = file_name.name.replace("\\",'/')
file = None
article = ""
if name.endswith(".txt"):
file = open(name, "r")
filedata = file.readlines()
for e in filedata :
article = article + e
if name.endswith(".pdf"):
# article = textract.process('document_path.PDF', method='PDFminer')
document = PdfFileReader(name)#open(name, 'rb'))
for page in range(document.numPages):
pageObj = document.getPage(page)
article += pageObj.extractText().replace('\n','')
if name.endswith(".docx"):
pass
# doc = Document(name)
# article = None
# for para in doc.paragraphs:
# article = article + para.text
if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"):
img = Image.open(name)
# path where the tesseract module is installed
pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
# converts the image to result and saves it into result variable
result = pytesseract.image_to_string(img)
return article
def translate_data(text, final_language):
translator = Translator()
translation = translator.translate(text, dest=get_key(final_language))
return translation.text
def generate_summary(file_name, mode,final_language):
# Step 1 - Read text anc split it
global cnt
sentences = read_article(file_name)
translator = Translator()
# cnt +=1
if mode == "traduction":
text_translate = translate_data(sentences,final_language)
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
#nous devrions vérifier si le fichier existe ou non avant de le supprimer.
if os.path.exists(f"audio_traduce{cnt}.wav"):
os.remove(f"audio_traduce{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_traduce{cnt}.wav")
return f"audio_traduce{cnt}.wav", text_translate
elif mode=="lecture":
text = translator.translate(sentences)
text_translate = sentences
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
if os.path.exists(f"audio_lecture{cnt}.wav"):
os.remove(f"audio_lecture{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_lecture{cnt}.wav")
return f"audio_lecture{cnt}.wav", text_translate
elif mode == "resume_et_traduire":
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translate_data(text_translate,final_language)
text_translate = text
myobj = gTTS(text=text, lang=get_key(final_language), slow=False)
if os.path.exists(f"audio_resume_traduire{cnt}.wav"):
os.remove(f"audio_resume_traduire{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_resume_traduire{cnt}.wav")
return f"audio_resume_traduire{cnt}.wav", text_translate
else:
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translator.translate(text_translate)
myobj = gTTS(text=text_translate, lang=text.src, slow=False)
if os.path.exists(f"audio_resume{cnt}.wav"):
os.remove(f"audio_resume{cnt}.wav")
else:
print("Impossible de supprimer le fichier car il n'existe pas")
myobj.save(f"audio_resume{cnt}.wav")
return f"audio_resume{cnt}.wav", text_translate
iface = gr.Interface(
fn=generate_summary,
inputs=[
gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"),
gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"),
gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani',
'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa',
'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish',
'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian',
'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian',
'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish',
'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)',
'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy',
'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali',
'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian',
'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian',
'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish',
'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")],
outputs= [gr.outputs.Audio(type="file", label="Audio du livre")
,gr.outputs.Textbox(label="resultat")],
theme="dark-seafoam")
iface.launch() |