ZELEFACK commited on
Commit
ae4e758
1 Parent(s): dd41b46

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ from gtts import gTTS
4
+ import gradio as gr
5
+ from PyPDF2 import PdfFileReader
6
+ from googletrans import Translator
7
+ import googletrans
8
+ import numpy as np
9
+ import requests
10
+ from PIL import Image
11
+ import pytesseract
12
+ # from docx import Document
13
+
14
+ cnt = 0
15
+ langues = googletrans.LANGUAGES
16
+
17
+
18
+
19
+ API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
20
+ headers = {"Authorization": "Bearer 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"}
21
+
22
+ def query(payload):
23
+ response = requests.post(API_URL, headers=headers, json=payload)
24
+ return response.json()
25
+
26
+ def get_key(val):
27
+ for key, value in langues.items():
28
+ if val == value:
29
+ return key
30
+
31
+ def read_article(file_name):
32
+
33
+ name = file_name.name.replace("\\",'/')
34
+ file = None
35
+ article = ""
36
+ if name.endswith(".txt"):
37
+ file = open(name, "r")
38
+ filedata = file.readlines()
39
+ for e in filedata :
40
+ article = article + e
41
+ if name.endswith(".pdf"):
42
+ # article = textract.process('document_path.PDF', method='PDFminer')
43
+ document = PdfFileReader(open(name, 'rb'))
44
+ for page in range(document.numPages):
45
+ pageObj = document.getPage(page)
46
+ article += pageObj.extractText().replace('\n','')
47
+ if name.endswith(".docx"):
48
+ pass
49
+ # doc = Document(name)
50
+ # article = None
51
+ # for para in doc.paragraphs:
52
+ # article = article + para.text
53
+ if name.endswith(".jpg") or name.endswith(".png") or name.endswith(".jpeg"):
54
+ img = Image.open(name)
55
+ # path where the tesseract module is installed
56
+ pytesseract.pytesseract.tesseract_cmd ='C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
57
+ # converts the image to result and saves it into result variable
58
+ result = pytesseract.image_to_string(img)
59
+
60
+ return article
61
+
62
+
63
+ def translate_data(text, final_language):
64
+ translator = Translator()
65
+ translation = translator.translate(text, dest=get_key(final_language))
66
+ return translation.text
67
+
68
+
69
+ def generate_summary(file_name, mode,final_language):
70
+ # Step 1 - Read text anc split it
71
+ global cnt
72
+ sentences = read_article(file_name)
73
+ translator = Translator()
74
+ cnt +=1
75
+ if mode == "traduction":
76
+ text_translate = translate_data(sentences,final_language)
77
+ myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
78
+ myobj.save(f"audio_traduce{cnt}.wav")
79
+ return f"audio_traduce{cnt}.wav", text_translate
80
+ elif mode=="lecture":
81
+ text = translator.translate(sentences)
82
+ text_translate = sentences
83
+ myobj = gTTS(text=text_translate, lang=get_key(final_language), slow=False)
84
+ myobj.save(f"audio_lecture{cnt}.wav")
85
+ return f"audio_lecture{cnt}.wav", text_translate
86
+ elif mode == "resume_et_traduire":
87
+ text_translate = query({"inputs": sentences,})
88
+ text_translate = text_translate[0]['summary_text']
89
+ text = translate_data(text_translate,final_language)
90
+ text_translate = text
91
+ myobj = gTTS(text=text, lang=get_key(final_language), slow=False)
92
+ myobj.save(f"audio_resume_traduire{cnt}.wav")
93
+ return f"audio_resume_traduire{cnt}.wav", text_translate
94
+ else:
95
+ text_translate = query({"inputs": sentences,})
96
+ text_translate = text_translate[0]['summary_text']
97
+ text = translator.translate(text_translate)
98
+ myobj = gTTS(text=text_translate, lang=text.src, slow=False)
99
+ myobj.save(f"audio_resume{cnt}.wav")
100
+ return f"audio_resume{cnt}.wav", text_translate
101
+
102
+
103
+
104
+ iface = gr.Interface(
105
+ fn=generate_summary,
106
+ inputs=[
107
+ gr.inputs.File( file_count="single",type="file", label="Fichier à Traduire"),
108
+ gr.inputs.Radio(['resume', 'traduction','resume_et_traduire','lecture'], label="Choix du mode de fonctionnement"),
109
+ gr.inputs.Radio(['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani',
110
+ 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa',
111
+ 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish',
112
+ 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian',
113
+ 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian',
114
+ 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish',
115
+ 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)',
116
+ 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy',
117
+ 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali',
118
+ 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian',
119
+ 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian',
120
+ 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish',
121
+ 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu'],label="Langage à traduire")],
122
+ outputs= [gr.outputs.Audio(type="file", label="Audio du livre")
123
+ ,gr.outputs.Textbox(label="resultat")],
124
+ theme="dark-seafoam")
125
+ iface.launch(inbrowser=True,share= True)
126
+
127
+ # GPS ou GSM qui a le GPS (150k, 15k)