Spaces:
Runtime error
Runtime error
File size: 5,544 Bytes
ae4e758 e5b9119 3489f08 ae4e758 29b8f35 ae4e758 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
#!/usr/bin/env python
# coding: utf-8
from gtts import gTTS
import gradio as gr
from PyPDF2 import PdfFileReader
from googletrans import Translator
import googletrans
import numpy as np
import requests
from PIL import Image
import pytesseract
# from docx import Document
cnt = 0
langues = googletrans.LANGUAGES
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": "Bearer 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"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def get_key(val):
for key, value in langues.items():
if val == value:
return key
def read_article(file_name):
name = file_name.name.replace("\\",'/')
file = None
article = ""
if name.endswith(".txt"):
file = open(name, "r")
filedata = file.readlines()
for e in filedata :
article = article + e
if name.endswith(".pdf"):
# article = textract.process('document_path.PDF', method='PDFminer')
document = PdfFileReader(open(name, 'rb'))
for page in range(document.numPages):
pageObj = document.getPage(page)
article += pageObj.extractText().replace('\n','')
if name.endswith(".docx"):
pass
# doc = Document(name)
# article = None
# for para in doc.paragraphs:
# article = article + para.text
if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"):
img = Image.open(name)
# path where the tesseract module is installed
pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
# converts the image to result and saves it into result variable
result = pytesseract.image_to_string(img)
return article
def translate_data(text, final_language):
translator = Translator()
translation = translator.translate(text, dest=get_key(final_language))
return translation.text
def generate_summary(file_name, mode,final_language):
# Step 1 - Read text anc split it
global cnt
sentences = read_article(file_name)
translator = Translator()
cnt +=1
if mode == "traduction":
text_translate = translate_data(sentences,final_language)
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
myobj.save(f"audio_traduce{cnt}.wav")
return f"audio_traduce{cnt}.wav", text_translate
elif mode=="lecture":
text = translator.translate(sentences)
text_translate = sentences
myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
myobj.save(f"audio_lecture{cnt}.wav")
return f"audio_lecture{cnt}.wav", text_translate
elif mode == "resume_et_traduire":
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translate_data(text_translate,final_language)
text_translate = text
myobj = gTTS(text=text, lang=get_key(final_language), slow=False)
myobj.save(f"audio_resume_traduire{cnt}.wav")
return f"audio_resume_traduire{cnt}.wav", text_translate
else:
text_translate = query({"inputs": sentences,})
text_translate = text_translate[0]['summary_text']
text = translator.translate(text_translate)
myobj = gTTS(text=text_translate, lang=text.src, slow=False)
myobj.save(f"audio_resume{cnt}.wav")
return f"audio_resume{cnt}.wav", text_translate
iface = gr.Interface(
fn=generate_summary,
inputs=[
gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"),
gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"),
gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani',
'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa',
'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish',
'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian',
'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian',
'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish',
'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)',
'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy',
'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali',
'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian',
'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian',
'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish',
'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")],
outputs= [gr.outputs.Audio(type="file", label="Audio du livre")
,gr.outputs.Textbox(label="resultat")],
theme="dark-seafoam")
iface.launch()
# GPS ou GSM qui a le GPS (150k, 15k)
# GPS ou GSM qui a le GPS (150k, 15k) |