Spaces:
Runtime error
Runtime error
| from gtts import gTTS | |
| import gradio as gr | |
| from PyPDF2 import PdfFileReader | |
| from googletrans import Translator | |
| import googletrans | |
| import numpy as np | |
| import requests | |
| from PIL import Image | |
| import pytesseract | |
| # from docx import Document | |
| cnt = 0 | |
| langues = googletrans.LANGUAGES | |
| API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" | |
| headers = {"Authorization": "Bearer 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"} | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| def get_key(val): | |
| for key, value in langues.items(): | |
| if val == value: | |
| return key | |
| def read_article(file_name): | |
| name = file_name.name.replace("\\",'/') | |
| file = None | |
| article = "" | |
| if name.endswith(".txt"): | |
| file = open(name, "r") | |
| filedata = file.readlines() | |
| for e in filedata : | |
| article = article + e | |
| if name.endswith(".pdf"): | |
| # article = textract.process('document_path.PDF', method='PDFminer') | |
| document = PdfFileReader(open(name, 'rb')) | |
| for page in range(document.numPages): | |
| pageObj = document.getPage(page) | |
| article += pageObj.extractText().replace('\n','') | |
| if name.endswith(".docx"): | |
| pass | |
| # doc = Document(name) | |
| # article = None | |
| # for para in doc.paragraphs: | |
| # article = article + para.text | |
| if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"): | |
| img = Image.open(name) | |
| # path where the tesseract module is installed | |
| pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe' | |
| # converts the image to result and saves it into result variable | |
| result = pytesseract.image_to_string(img) | |
| return article | |
| def translate_data(text, final_language): | |
| translator = Translator() | |
| translation = translator.translate(text, dest=get_key(final_language)) | |
| return translation.text | |
| def generate_summary(file_name, mode,final_language): | |
| # Step 1 - Read text anc split it | |
| global cnt | |
| sentences = read_article(file_name) | |
| translator = Translator() | |
| cnt +=1 | |
| if mode == "traduction": | |
| text_translate = translate_data(sentences,final_language) | |
| myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False) | |
| myobj.save(f"audio_traduce{cnt}.wav") | |
| return f"audio_traduce{cnt}.wav", text_translate | |
| elif mode=="lecture": | |
| text = translator.translate(sentences) | |
| text_translate = sentences | |
| myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False) | |
| myobj.save(f"audio_lecture{cnt}.wav") | |
| return f"audio_lecture{cnt}.wav", text_translate | |
| elif mode == "resume_et_traduire": | |
| text_translate = query({"inputs": sentences,}) | |
| text_translate = text_translate[0]['summary_text'] | |
| text = translate_data(text_translate,final_language) | |
| text_translate = text | |
| myobj = gTTS(text=text, lang=get_key(final_language), slow=False) | |
| myobj.save(f"audio_resume_traduire{cnt}.wav") | |
| return f"audio_resume_traduire{cnt}.wav", text_translate | |
| else: | |
| text_translate = query({"inputs": sentences,}) | |
| text_translate = text_translate[0]['summary_text'] | |
| text = translator.translate(text_translate) | |
| myobj = gTTS(text=text_translate, lang=text.src, slow=False) | |
| myobj.save(f"audio_resume{cnt}.wav") | |
| return f"audio_resume{cnt}.wav", text_translate | |
| iface = gr.Interface( | |
| fn=generate_summary, | |
| inputs=[ | |
| gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"), | |
| gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"), | |
| gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', | |
| 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', | |
| 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', | |
| 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', | |
| 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', | |
| 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', | |
| 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', | |
| 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', | |
| 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', | |
| 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', | |
| 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', | |
| 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', | |
| 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")], | |
| outputs= [gr.outputs.Audio(type="file", label="Audio du livre") | |
| ,gr.outputs.Textbox(label="resultat")], | |
| theme="dark-seafoam") | |
| iface.launch() | |
| # GPS ou GSM qui a le GPS (150k, 15k) |