Spaces:

rayesh
/

word_level_with_punc

Sleeping

App Files Files Community

SPACERUNNER99 commited on Jul 15

Commit

40308e4

verified ·

1 Parent(s): b1f8ac2

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
BYekan.ttf +0 -0
README.md +12 -12
app.py +143 -0
arial.ttf +3 -0
packages.txt +2 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+arial.ttf filter=lfs diff=lfs merge=lfs -text

BYekan.ttf ADDED Viewed

Binary file (33.5 kB). View file

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
----
-title: Word Level With Punc
-emoji: 🐠
-colorFrom: gray
-colorTo: red
-sdk: gradio
-sdk_version: 5.37.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Sub Gen
+emoji: 🐢
+colorFrom: purple
+colorTo: blue
+sdk: gradio
+sdk_version: 5.4.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from faster_whisper import WhisperModel
+import math
+import gradio as gr
+from moviepy import VideoFileClip
+import requests
+def extract_audio(input_video_name):
+    # Define the input video file and output audio file
+    mp3_file = "audio.mp3"
+    # Load the video clip
+    video_clip = VideoFileClip(input_video_name)
+    # Extract the audio from the video clip
+    audio_clip = video_clip.audio
+    duration = audio_clip.duration
+    print(f"Audio duration: {duration}")
+    # Write the audio to a separate file
+    audio_clip.write_audiofile(mp3_file)
+    # Close the video and audio clips
+    audio_clip.close()
+    video_clip.close()
+    print("Audio extraction successful!")
+    return mp3_file, duration
+def download_video(url):
+    response = requests.get(url, stream=True)
+    response.raise_for_status()
+    video_file = "video.mp4"
+    with open(video_file, 'wb') as file:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                file.write(chunk)
+    print("Video downloaded successfully!")
+    return video_file
+def word_level_transcribe(audio, max_segment_duration=2.0):  # Set your desired max duration here
+    model = WhisperModel("tiny", device="cpu")
+    segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True, log_progress=True)
+    segments = list(segments)  # The transcription will actually run here.
+    wordlevel_info = []
+    for segment in segments:
+        for word in segment.words:
+          print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
+          wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
+    return wordlevel_info
+def create_subtitles(wordlevel_info):
+    punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '！', '？'}  # Add/remove punctuation as needed
+    subtitles = []
+    line = []
+    for word_data in wordlevel_info:
+        line.append(word_data)
+        current_word = word_data['word']
+        # Check if current word ends with punctuation or line reached 5 words
+        ends_with_punct = current_word and (current_word[-1] in punctuation_marks)
+        if ends_with_punct or len(line) == 5:
+            # Create a new subtitle segment
+            subtitle = {
+                "word": " ".join(item["word"] for item in line),
+                "start": line[0]["start"],
+                "end": line[-1]["end"],
+                "textcontents": line.copy()
+            }
+            subtitles.append(subtitle)
+            line = []
+    # Add remaining words if any
+    if line:
+        subtitle = {
+            "word": " ".join(item["word"] for item in line),
+            "start": line[0]["start"],
+            "end": line[-1]["end"],
+            "textcontents": line.copy()
+        }
+        subtitles.append(subtitle)
+    # Remove gaps between segments by extending the previous segment's end time
+    for i in range(1, len(subtitles)):
+        prev_subtitle = subtitles[i - 1]
+        current_subtitle = subtitles[i]
+        # Extend the previous segment's end time to the start of the current segment
+        prev_subtitle["end"] = current_subtitle["start"]
+    return subtitles
+def format_time(seconds):
+    hours = math.floor(seconds / 3600)
+    seconds %= 3600
+    minutes = math.floor(seconds / 60)
+    seconds %= 60
+    milliseconds = round((seconds - math.floor(seconds)) * 1000)
+    seconds = math.floor(seconds)
+    formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
+    return formatted_time
+def generate_subtitle_file(language, segments, input_video_name):
+    subtitle_file = f"sub-{input_video_name}.{language}.srt"
+    text = ""
+    for index, segment in enumerate(segments):
+        segment_start = format_time(segment['start'])
+        segment_end = format_time(segment['end'])
+        text += f"{str(index+1)} \n"
+        text += f"{segment_start} --> {segment_end} \n"
+        text += f"{segment['word']} \n"
+        text += "\n"
+    f = open(subtitle_file, "w", encoding='utf8')
+    f.write(text)
+    f.close()
+    return subtitle_file
+def transcribe(video):
+    mp3_file, duration = extract_audio(video)
+    print("transcribe")
+    wordlevel_info=word_level_transcribe(mp3_file)
+    subtitles = create_subtitles(wordlevel_info)
+    subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
+    return subtitle_file, video, mp3_file
+with gr.Blocks() as demo:
+    gr.Markdown("Start typing below and then click **Run** to see the progress and final output.")
+    with gr.Column():
+        #audio_in = gr.Audio(type="filepath")
+        video = gr.Video()
+        srt_file = gr.File()
+        btn = gr.Button("Create")
+        video_file_output = gr.Video(label="Result Video")
+        mp3_file = gr.Audio(type="filepath")
+        btn.click(
+            fn=transcribe,
+            inputs=video,
+            outputs=[srt_file, video_file_output, mp3_file],
+        )
+demo.launch(debug=True)

arial.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3299c5a1a7252c457a13d243185e103b903794d4e548bdc757b1ad73d4f6f27a
+size 23274572

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ imagemagick
2	+ libmagick++-dev