Spaces:
Running
Running
Sarath0x8f
commited on
Commit
•
d69917c
1
Parent(s):
84247c4
Upload 9 files
Browse files- Audio/output.wav +0 -0
- Audio/translate.wav +0 -0
- ObjCharRec.py +25 -0
- SpllingChecker.py +8 -0
- app.py +53 -0
- demo_app.py +48 -0
- main.py +15 -0
- requirements.txt +0 -0
- translate_speak.py +50 -0
Audio/output.wav
ADDED
Binary file (168 kB). View file
|
|
Audio/translate.wav
ADDED
Binary file (221 kB). View file
|
|
ObjCharRec.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from paddleocr import PaddleOCR
|
2 |
+
|
3 |
+
import translate_speak
|
4 |
+
|
5 |
+
|
6 |
+
def ocr_with_paddle(img):
|
7 |
+
"""
|
8 |
+
Paddle OCR
|
9 |
+
"""
|
10 |
+
try:
|
11 |
+
finaltext = ''
|
12 |
+
ocr = PaddleOCR(lang='en', use_angle_cls=True)
|
13 |
+
result = ocr.ocr(img)
|
14 |
+
|
15 |
+
for i in range(len(result[0])):
|
16 |
+
text = result[0][i][1][0]
|
17 |
+
finaltext += ' ' + text
|
18 |
+
|
19 |
+
audio_path = translate_speak.audio_streaming(txt=finaltext, to=1)
|
20 |
+
return finaltext, audio_path
|
21 |
+
except:
|
22 |
+
return "An err occurred upload image"
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
print(ocr_with_paddle('Images/download.jpeg'))
|
SpllingChecker.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from textblob import TextBlob
|
2 |
+
|
3 |
+
s = "i m lve in wth you"
|
4 |
+
print("original text: "+str(s))
|
5 |
+
|
6 |
+
b = TextBlob(s)
|
7 |
+
|
8 |
+
print("corrected text: "+str(b.correct()))
|
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import ObjCharRec
|
3 |
+
from deep_translator import GoogleTranslator
|
4 |
+
|
5 |
+
import demo_app
|
6 |
+
import translate_speak
|
7 |
+
|
8 |
+
langs_list = GoogleTranslator().get_supported_languages()
|
9 |
+
langs_dict = GoogleTranslator().get_supported_languages(as_dict=True)
|
10 |
+
|
11 |
+
with gr.Blocks() as main_interface:
|
12 |
+
gr.Markdown("# OCR")
|
13 |
+
with gr.Tabs():
|
14 |
+
with gr.TabItem("Intro"):
|
15 |
+
pass
|
16 |
+
|
17 |
+
with gr.TabItem("Simple OCR"):
|
18 |
+
gr.Markdown("Paddle OCR")
|
19 |
+
with gr.Row():
|
20 |
+
with gr.Column():
|
21 |
+
image_input = gr.Image(label="Upload Image")
|
22 |
+
with gr.Row():
|
23 |
+
clear_btn = gr.ClearButton()
|
24 |
+
submit_btn = gr.Button("Submit")
|
25 |
+
output_text = gr.Text(label="Output")
|
26 |
+
|
27 |
+
submit_btn.click(fn=ObjCharRec.ocr_with_paddle, inputs=image_input, outputs=output_text)
|
28 |
+
clear_btn.click(lambda :[None, None], outputs=[image_input, output_text])
|
29 |
+
|
30 |
+
with gr.TabItem("translator"):
|
31 |
+
with gr.Row():
|
32 |
+
with gr.Column():
|
33 |
+
image_input = gr.Image(label="Upload Image")
|
34 |
+
with gr.Row():
|
35 |
+
clear_btn = gr.ClearButton()
|
36 |
+
submit_btn = gr.Button("Submit")
|
37 |
+
with gr.Column():
|
38 |
+
with gr.Row():
|
39 |
+
output_text = gr.Text(label="Output")
|
40 |
+
audio_out = gr.Audio(label="Streamed Audio")
|
41 |
+
lang_drop = gr.Dropdown(langs_dict, label="language", interactive=True)
|
42 |
+
translate_btn = gr.Button("Translate")
|
43 |
+
with gr.Row():
|
44 |
+
translated_txt = gr.Text(label="translated text")
|
45 |
+
translated_out = gr.Audio(label="Streamed Audio")
|
46 |
+
|
47 |
+
|
48 |
+
submit_btn.click(fn=ObjCharRec.ocr_with_paddle, inputs=image_input, outputs=[output_text, audio_out])
|
49 |
+
translate_btn.click(fn=translate_speak.translate_txt, inputs=[lang_drop, output_text], outputs=[translated_txt, translated_out])
|
50 |
+
clear_btn.click(lambda :[None]*5, outputs=[image_input, output_text, translated_txt, translated_out, audio_out])
|
51 |
+
|
52 |
+
if __name__ == "__main__":
|
53 |
+
main_interface.launch()
|
demo_app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import soundfile as sf
|
3 |
+
import numpy as np
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Define the file path of the audio file you want to play directly
|
8 |
+
direct_audio_file_path = "Audio/translated_audio.wav" # Replace this with the actual file path
|
9 |
+
|
10 |
+
# Function to handle audio streaming
|
11 |
+
def audio_streaming(audio=None):
|
12 |
+
# If an audio file is provided as input, use it; otherwise, use the direct file path
|
13 |
+
if audio is None:
|
14 |
+
audio = direct_audio_file_path
|
15 |
+
|
16 |
+
# Load the audio file
|
17 |
+
data, samplerate = sf.read(audio)
|
18 |
+
|
19 |
+
# Ensure data is in float32 format
|
20 |
+
data = np.array(data, dtype=np.float32)
|
21 |
+
|
22 |
+
# Save to a temporary file that Gradio can use for audio playback
|
23 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
24 |
+
sf.write(tmp_file.name, data, samplerate)
|
25 |
+
temp_audio_path = tmp_file.name
|
26 |
+
|
27 |
+
# Return the file path to Gradio
|
28 |
+
return temp_audio_path
|
29 |
+
|
30 |
+
# Gradio interface
|
31 |
+
with gr.Blocks() as demo:
|
32 |
+
gr.Markdown("### Audio Streaming App")
|
33 |
+
|
34 |
+
# Button to play audio from the predefined file path
|
35 |
+
play_button = gr.Button("Play Direct Audio")
|
36 |
+
|
37 |
+
# Define output for streamed audio
|
38 |
+
audio_output = gr.Audio(label="Streamed Audio")
|
39 |
+
|
40 |
+
# Set up the Gradio interface to handle the button click
|
41 |
+
play_button.click(
|
42 |
+
fn=audio_streaming,
|
43 |
+
inputs=None, # No input needed for direct play
|
44 |
+
outputs=audio_output
|
45 |
+
)
|
46 |
+
|
47 |
+
if __name__ == "__main__":
|
48 |
+
demo.launch()
|
main.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ObjCharRec import ocr_with_paddle
|
2 |
+
from textblob import TextBlob
|
3 |
+
|
4 |
+
def demo():
|
5 |
+
paths = ['Images/download.jpeg', 'Images/download.png', 'Images/hq720.jpg', 'Images/testocr.png']
|
6 |
+
|
7 |
+
l = []
|
8 |
+
for img in paths:
|
9 |
+
text = ocr_with_paddle(img)
|
10 |
+
txtblob = TextBlob(text)
|
11 |
+
l.append(str(txtblob.correct()))
|
12 |
+
return l
|
13 |
+
|
14 |
+
if __name__ == "__main__":
|
15 |
+
print(demo())
|
requirements.txt
ADDED
Binary file (156 Bytes). View file
|
|
translate_speak.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gtts import gTTS
|
3 |
+
from deep_translator import GoogleTranslator
|
4 |
+
import soundfile as sf
|
5 |
+
import tempfile
|
6 |
+
import numpy as np
|
7 |
+
import gtts
|
8 |
+
|
9 |
+
output_path = 'Audio/output.wav'
|
10 |
+
translate_path = 'Audio/translate.wav'
|
11 |
+
|
12 |
+
def audio_streaming(txt=None, lang='en', to=None):
|
13 |
+
# If an audio file is provided as input, use it; otherwise, use the direct file path
|
14 |
+
speak = gTTS(text=txt, lang=lang, slow=False)
|
15 |
+
if to == 1:
|
16 |
+
audio = output_path
|
17 |
+
else:
|
18 |
+
audio = translate_path
|
19 |
+
speak.save(audio)
|
20 |
+
|
21 |
+
# Load the audio file
|
22 |
+
data, samplerate = sf.read(audio)
|
23 |
+
|
24 |
+
# Ensure data is in float32 format
|
25 |
+
data = np.array(data, dtype=np.float32)
|
26 |
+
|
27 |
+
# Save to a temporary file that Gradio can use for audio playback
|
28 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
29 |
+
sf.write(tmp_file.name, data, samplerate)
|
30 |
+
temp_audio_path = tmp_file.name
|
31 |
+
|
32 |
+
# Return the file path to Gradio
|
33 |
+
return temp_audio_path
|
34 |
+
|
35 |
+
def translate_txt(lang, text):
|
36 |
+
translator = GoogleTranslator(source="en", target=lang)
|
37 |
+
translated_text = translator.translate(text)
|
38 |
+
audio_path = audio_streaming(translated_text, lang='en', to=2)
|
39 |
+
|
40 |
+
return translated_text, audio_path
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
# print(audio_streaming("hello world"))
|
44 |
+
# os.system(f"start {audio_streaming('hello world!')}")
|
45 |
+
translate = set(GoogleTranslator().get_supported_languages(as_dict=True))
|
46 |
+
speak = set(gtts.lang.tts_langs())
|
47 |
+
not_speak = translate - speak
|
48 |
+
print(not_speak, len(not_speak))
|
49 |
+
|
50 |
+
|