Spaces:

thealphamerc
/

audio-to-text

Runtime error

App Files Files Community

thealphamerc commited on Apr 29, 2023

Commit

fdad218

1 Parent(s): 19befe8

Added youtube video to text support

Browse files

Files changed (7) hide show

.gitignore +1 -0
Output/audio.txt +0 -1
Output/audio2.txt +0 -1
Output/audio3.json +0 -173
Output/audio3.txt +0 -1
app.py +43 -4
requirements.txt +1 -0

.gitignore CHANGED Viewed

	@@ -0,0 +1 @@


1	+ output/

Output/audio.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- Quatlin, quatlin quatlin quatlin quatlin. Anti-six.

Output/audio2.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- to gain life in all that...

Output/audio3.json DELETED Viewed

@@ -1,173 +0,0 @@
-[
-    {
-        "id": 0,
-        "seek": 0,
-        "start": 0.0,
-        "end": 1.52,
-        "text": " Come and sit on a rock.",
-        "tokens": [
-            50363,
-            7911,
-            290,
-            1650,
-            319,
-            257,
-            3881,
-            13,
-            50439
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 1,
-        "seek": 0,
-        "start": 1.52,
-        "end": 5.08,
-        "text": " Overlooking the river's blow, he wears a hat and some glasses.",
-        "tokens": [
-            50439,
-            3827,
-            11534,
-            262,
-            7850,
-            338,
-            6611,
-            11,
-            339,
-            17326,
-            257,
-            6877,
-            290,
-            617,
-            15232,
-            13,
-            50617
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 2,
-        "seek": 0,
-        "start": 5.08,
-        "end": 7.36,
-        "text": " A smile on his face.",
-        "tokens": [
-            50617,
-            317,
-            8212,
-            319,
-            465,
-            1986,
-            13,
-            50731
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 3,
-        "seek": 0,
-        "start": 7.36,
-        "end": 8.56,
-        "text": " He's not lost.",
-        "tokens": [
-            50731,
-            679,
-            338,
-            407,
-            2626,
-            13,
-            50791
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 4,
-        "seek": 0,
-        "start": 8.56,
-        "end": 10.4,
-        "text": " The water rushes by.",
-        "tokens": [
-            50791,
-            383,
-            1660,
-            38596,
-            416,
-            13,
-            50883
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 5,
-        "seek": 0,
-        "start": 10.4,
-        "end": 12.08,
-        "text": " A constant sound.",
-        "tokens": [
-            50883,
-            317,
-            6937,
-            2128,
-            13,
-            50967
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 6,
-        "seek": 0,
-        "start": 12.08,
-        "end": 13.68,
-        "text": " It takes in the view.",
-        "tokens": [
-            50967,
-            632,
-            2753,
-            287,
-            262,
-            1570,
-            13,
-            51047
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    },
-    {
-        "id": 7,
-        "seek": 0,
-        "start": 13.68,
-        "end": 14.48,
-        "text": " The mountains.",
-        "tokens": [
-            51047,
-            383,
-            12269,
-            13,
-            51087
-        ],
-        "temperature": 0.0,
-        "avg_logprob": -0.34572365704704733,
-        "compression_ratio": 1.356164383561644,
-        "no_speech_prob": 0.01958448439836502
-    }
-]

Output/audio3.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- Come and sit on a rock. Overlooking the river's blow, he wears a hat and some glasses. A smile on his face. He's not lost. The water rushes by. A constant sound. It takes in the view. The mountains.

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 from subprocess import call
 import whisper
@@ -43,7 +44,7 @@ model = whisper.load_model("base")
 inputs = gr.components.Audio(type="filepath", label="Add audio file")
 outputs = gr.components.Textbox()
-title = "Audio To text⚡️"
 description = "An example of using TTS to generate speech from text."
 article = ""
 examples = [
@@ -67,7 +68,7 @@ def transcribe(inputs):
     #     inputs = f.read()
     # load audio and pad/trim it to fit 30 seconds
-    result = model.transcribe(audio=inputs, language='hindi',
                               word_timestamps=False, verbose=True)
 #  ---------------------------------------------------
@@ -75,6 +76,22 @@ def transcribe(inputs):
     return result["text"]
 audio_chunked = gr.Interface(
     fn=transcribe,
     inputs=inputs,
@@ -100,11 +117,33 @@ microphone_chunked = gr.Interface(
     description=description,
     article=article,
 )
 demo = gr.Blocks()
 with demo:
-    gr.TabbedInterface([audio_chunked, microphone_chunked], [
-                       "Audio File", "Microphone"])
 demo.queue(concurrency_count=1, max_size=5)
 demo.launch(show_api=False)

 import os
 os.system("pip install git+https://github.com/openai/whisper.git")
+from pytube import YouTube
 import gradio as gr
 from subprocess import call
 import whisper
 inputs = gr.components.Audio(type="filepath", label="Add audio file")
 outputs = gr.components.Textbox()
+title = "Transcribe multi-lingual audio clips"
 description = "An example of using TTS to generate speech from text."
 article = ""
 examples = [
     #     inputs = f.read()
     # load audio and pad/trim it to fit 30 seconds
+    result = model.transcribe(audio=inputs, language='english',
                               word_timestamps=False, verbose=True)
 #  ---------------------------------------------------
     return result["text"]
+# Transcribe youtube video
+# define function for transcription
+def youtube_transcript(url):
+    try:
+        if url:
+            yt = YouTube(url, use_oauth=True)
+            source = yt.streams.filter(progressive=True, file_extension='mp4').order_by(
+                'resolution').desc().first().download('output/youtube')
+            transcript = model.transcribe(source)
+            return transcript["text"]
+    except Exception as e:
+        print('Error: ', e)
+        return 'Error: ' + str(e)
 audio_chunked = gr.Interface(
     fn=transcribe,
     inputs=inputs,
     description=description,
     article=article,
 )
+youtube_chunked = gr.Interface(
+    fn=youtube_transcript,
+    inputs=[
+        gr.inputs.Textbox(label="Youtube URL", type="text"),
+    ],
+    outputs=[
+        gr.outputs.Textbox(label="Transcription").style(
+            show_copy_button=True),
+    ],
+    allow_flagging="never",
+    title=title,
+    description=description,
+    article=article,
+    examples=[
+        [  "https://www.youtube.com/watch?v=nlMuHtV82q8&ab_channel=NothingforSale24",],
+        ["https://www.youtube.com/watch?v=JzPfMbG1vrE&ab_channel=ExplainerVideosByLauren",],
+        ["https://www.youtube.com/watch?v=S68vvV0kod8&ab_channel=Pearl-CohnTelevision"]
+    ],
+)
 demo = gr.Blocks()
 with demo:
+    gr.TabbedInterface([youtube_chunked, audio_chunked, microphone_chunked], [
+        "Youtube", "Audio File", "Microphone"])
 demo.queue(concurrency_count=1, max_size=5)
 demo.launch(show_api=False)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 whisper
 gradio===3.27.0

 whisper
 gradio===3.27.0
+pytube