transcription-delight

Sleeping

App Files Files Community

abidlabs HF staff commited on Jul 2, 2024

Commit

fe8d4db

1 Parent(s): add165b

changes

Browse files

Files changed (7) hide show

__pycache__/app.cpython-312.pyc +0 -0
__pycache__/clean.cpython-312.pyc +0 -0
__pycache__/transcribe.cpython-312.pyc +0 -0
__pycache__/utils.cpython-312.pyc +0 -0
app.py +31 -26
clean.py +27 -8
utils.py +8 -7

__pycache__/app.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ

__pycache__/clean.cpython-312.pyc ADDED Viewed

Binary file (1.96 kB). View file

__pycache__/transcribe.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/transcribe.cpython-312.pyc and b/__pycache__/transcribe.cpython-312.pyc differ

__pycache__/utils.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import utils
 import transcribe
 with gr.Blocks(theme="base") as demo:
@@ -18,26 +19,27 @@ with gr.Blocks(theme="base") as demo:
                     source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
                     preview = gr.HTML(label="Video preview")
                     source_component.change(utils.convert_to_embed_url, source_component, preview)
-                # transcribe_btn.click(
-                #     lambda : gr.Tabs(selected="result"),
-                #     None,
-                #     tabs
-                # ).then(
-                #     utils.generate_audio,
-                #     [source, source_component],
-                #     [download_audio],
-                #     show_progress="minimal"
-                # ).then(
-                #     transcribe.transcribe,
-                #     [download_audio],
-                #     [preliminary_transcript],
-                #     show_progress="hidden"
-                # )
-        with gr.Column():
-            transcribe_btn = gr.Button("Transcribe audio 📜", variant="primary")
-            preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
     source.change(utils.transcribe_button, source, transcribe_btn)
@@ -51,15 +53,18 @@ with gr.Blocks(theme="base") as demo:
                 cleanup_options,
                 llm_prompt
             )
         with gr.Column():
-            clean_btn = gr.Button("Clean transcript ✨", variant="primary", interactive=False)
-            gr.Markdown("*Final transcript will appear here*")
-        # with gr.Tab("Result", id="result"):
-        #     with gr.Row():
-        #         with gr.Column():
-        #             download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
-        #             preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
-        #         with gr.Column():
 demo.launch()

 import gradio as gr
 import utils
+import clean
 import transcribe
 with gr.Blocks(theme="base") as demo:
                     source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
                     preview = gr.HTML(label="Video preview")
                     source_component.change(utils.convert_to_embed_url, source_component, preview)
+                    transcribe_btn.click(
+                        utils.generate_audio,
+                        [source, source_component],
+                        [download_audio],
+                        show_progress="minimal"
+                    ).then(
+                        transcribe.transcribe,
+                        [download_audio],
+                        [preliminary_transcript],
+                    ).then(
+                        lambda : [gr.Button(interactive=True), gr.CheckboxGroup(interactive=True)],
+                        None,
+                        [clean_btn, cleanup_options]
+                    )
+        with gr.Column():
+            with gr.Row():
+                transcribe_btn = gr.Button("Transcribe audio 📜", variant="primary")
+                download_audio = gr.DownloadButton("Download .mp3 File 📥", interactive=False)
+            preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, max_lines=10, show_copy_button=True, show_label=False, interactive=False)
     source.change(utils.transcribe_button, source, transcribe_btn)
                 cleanup_options,
                 llm_prompt
             )
+            with gr.Row():
+                clean_btn = gr.Button("Clean transcript ✨", variant="primary")
+                download_md = gr.DownloadButton("Download .md 📥", interactive=False)
         with gr.Column():
+            final_transcript = gr.Markdown("*Final transcript will appear here*", height=400)
+    clean_btn.click(
+        clean.clean_transcript,
+        [download_audio, cleanup_options, llm_prompt, preliminary_transcript],
+        [final_transcript, download_md],
+        show_progress="minimal"
+    )
 demo.launch()

clean.py CHANGED Viewed

@@ -1,12 +1,31 @@
 from huggingface_hub import InferenceClient
 MODEL_NAME = "meta-llama/Meta-Llama-3-70b-Instruct"
-def clean_transcript(prompt, transcript):
-    messages = [
-        {"role": "user", "content": PROMPT}
-    ]
-    client = InferenceClient(model=MODEL_NAME)
-    for c in client.chat_completion(messages, max_tokens=200, stream=True):
-        token = c.choices[0].delta.content
-        print(token, end="")

 from huggingface_hub import InferenceClient
+from pathlib import Path
+import gradio as gr
 MODEL_NAME = "meta-llama/Meta-Llama-3-70b-Instruct"
+def split_text_into_chunks(text, chunk_size=600):
+    words = text.split()
+    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
+    return chunks
+def clean_transcript(audio_file, options, prompt, transcript: str):
+    text = f"### {Path(audio_file).with_suffix('').name}\n\n"
+    if options == []:
+        text += transcript
+    else:
+        chunks = split_text_into_chunks(transcript)
+        for chunk in chunks:
+            messages = [
+                {"role": "user", "content": prompt + "\n" + chunk}
+            ]
+            client = InferenceClient(model=MODEL_NAME)
+            for c in client.chat_completion(messages, max_tokens=1000, stream=True):
+                token = c.choices[0].delta.content
+                text += token
+                yield text, None
+    # write text to md file
+    md_file = Path(audio_file).with_suffix('.md')
+    md_file.write_text(text)
+    return text, gr.DownloadButton(interactive=True, value=md_file)

utils.py CHANGED Viewed

@@ -56,15 +56,16 @@ def generate_audio(source, source_file):
     else:
         gr.Info("Downloading audio from YouTube...")
         audio_file = download_audio_from_youtube(source_file)
-    return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)
 def generate_prompt(cleanup):
     if not cleanup:
         return gr.Textbox(visible=False)
-    elif cleanup == ["Remove typos"]:
-        return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos from the transcript.")
-    elif cleanup == ["Separate into paragraphs"]:
-        return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Separate the transcript into paragraphs based on logical breaks.")
-    elif cleanup == ["Remove typos", "Separate into paragraphs"]:
-        return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos and separate the transcript into paragraphs based on logical breaks.")

     else:
         gr.Info("Downloading audio from YouTube...")
         audio_file = download_audio_from_youtube(source_file)
+    return gr.DownloadButton(value=audio_file, interactive=True)
 def generate_prompt(cleanup):
+    prompt = "The following is a raw transcript from an automatic transcription system. "
     if not cleanup:
         return gr.Textbox(visible=False)
+    elif "Remove typos" in cleanup:
+        prompt += "Fix the minor typos (e.g. misspellings, homophones) in the transcript so that the transcript reads more logically. "
+    if "Separate into paragraphs" in cleanup:
+        prompt += "Separate the transcript into paragraphs to make it more readable. "
+    prompt += "Don't add any extra words in your response, like 'Here is the corrected transcript:' just return the final transcript."
+    return gr.Textbox(visible=True, value=prompt)