SPACERUNNER99 commited on
Commit
40308e4
·
verified ·
1 Parent(s): b1f8ac2

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. BYekan.ttf +0 -0
  3. README.md +12 -12
  4. app.py +143 -0
  5. arial.ttf +3 -0
  6. packages.txt +2 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ arial.ttf filter=lfs diff=lfs merge=lfs -text
BYekan.ttf ADDED
Binary file (33.5 kB). View file
 
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Word Level With Punc
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.37.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Sub Gen
3
+ emoji: 🐢
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.4.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from faster_whisper import WhisperModel
2
+ import math
3
+ import gradio as gr
4
+ from moviepy import VideoFileClip
5
+ import requests
6
+
7
+
8
+
9
+ def extract_audio(input_video_name):
10
+ # Define the input video file and output audio file
11
+ mp3_file = "audio.mp3"
12
+ # Load the video clip
13
+ video_clip = VideoFileClip(input_video_name)
14
+
15
+ # Extract the audio from the video clip
16
+ audio_clip = video_clip.audio
17
+ duration = audio_clip.duration
18
+ print(f"Audio duration: {duration}")
19
+ # Write the audio to a separate file
20
+ audio_clip.write_audiofile(mp3_file)
21
+
22
+ # Close the video and audio clips
23
+ audio_clip.close()
24
+ video_clip.close()
25
+
26
+ print("Audio extraction successful!")
27
+ return mp3_file, duration
28
+
29
+ def download_video(url):
30
+ response = requests.get(url, stream=True)
31
+ response.raise_for_status()
32
+ video_file = "video.mp4"
33
+ with open(video_file, 'wb') as file:
34
+ for chunk in response.iter_content(chunk_size=8192):
35
+ if chunk:
36
+ file.write(chunk)
37
+ print("Video downloaded successfully!")
38
+ return video_file
39
+
40
+ def word_level_transcribe(audio, max_segment_duration=2.0): # Set your desired max duration here
41
+ model = WhisperModel("tiny", device="cpu")
42
+ segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True, log_progress=True)
43
+ segments = list(segments) # The transcription will actually run here.
44
+ wordlevel_info = []
45
+ for segment in segments:
46
+ for word in segment.words:
47
+ print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
48
+ wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
49
+ return wordlevel_info
50
+
51
+ def create_subtitles(wordlevel_info):
52
+ punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '!', '?'} # Add/remove punctuation as needed
53
+ subtitles = []
54
+ line = []
55
+
56
+ for word_data in wordlevel_info:
57
+ line.append(word_data)
58
+ current_word = word_data['word']
59
+
60
+ # Check if current word ends with punctuation or line reached 5 words
61
+ ends_with_punct = current_word and (current_word[-1] in punctuation_marks)
62
+
63
+ if ends_with_punct or len(line) == 5:
64
+ # Create a new subtitle segment
65
+ subtitle = {
66
+ "word": " ".join(item["word"] for item in line),
67
+ "start": line[0]["start"],
68
+ "end": line[-1]["end"],
69
+ "textcontents": line.copy()
70
+ }
71
+ subtitles.append(subtitle)
72
+ line = []
73
+
74
+ # Add remaining words if any
75
+ if line:
76
+ subtitle = {
77
+ "word": " ".join(item["word"] for item in line),
78
+ "start": line[0]["start"],
79
+ "end": line[-1]["end"],
80
+ "textcontents": line.copy()
81
+ }
82
+ subtitles.append(subtitle)
83
+
84
+ # Remove gaps between segments by extending the previous segment's end time
85
+ for i in range(1, len(subtitles)):
86
+ prev_subtitle = subtitles[i - 1]
87
+ current_subtitle = subtitles[i]
88
+
89
+ # Extend the previous segment's end time to the start of the current segment
90
+ prev_subtitle["end"] = current_subtitle["start"]
91
+
92
+ return subtitles
93
+
94
+ def format_time(seconds):
95
+ hours = math.floor(seconds / 3600)
96
+ seconds %= 3600
97
+ minutes = math.floor(seconds / 60)
98
+ seconds %= 60
99
+ milliseconds = round((seconds - math.floor(seconds)) * 1000)
100
+ seconds = math.floor(seconds)
101
+ formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
102
+ return formatted_time
103
+
104
+ def generate_subtitle_file(language, segments, input_video_name):
105
+ subtitle_file = f"sub-{input_video_name}.{language}.srt"
106
+ text = ""
107
+ for index, segment in enumerate(segments):
108
+ segment_start = format_time(segment['start'])
109
+ segment_end = format_time(segment['end'])
110
+ text += f"{str(index+1)} \n"
111
+ text += f"{segment_start} --> {segment_end} \n"
112
+ text += f"{segment['word']} \n"
113
+ text += "\n"
114
+ f = open(subtitle_file, "w", encoding='utf8')
115
+ f.write(text)
116
+ f.close()
117
+ return subtitle_file
118
+
119
+ def transcribe(video):
120
+
121
+ mp3_file, duration = extract_audio(video)
122
+ print("transcribe")
123
+ wordlevel_info=word_level_transcribe(mp3_file)
124
+ subtitles = create_subtitles(wordlevel_info)
125
+ subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
126
+ return subtitle_file, video, mp3_file
127
+
128
+ with gr.Blocks() as demo:
129
+ gr.Markdown("Start typing below and then click **Run** to see the progress and final output.")
130
+ with gr.Column():
131
+ #audio_in = gr.Audio(type="filepath")
132
+ video = gr.Video()
133
+ srt_file = gr.File()
134
+ btn = gr.Button("Create")
135
+ video_file_output = gr.Video(label="Result Video")
136
+ mp3_file = gr.Audio(type="filepath")
137
+ btn.click(
138
+ fn=transcribe,
139
+ inputs=video,
140
+ outputs=[srt_file, video_file_output, mp3_file],
141
+ )
142
+
143
+ demo.launch(debug=True)
arial.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3299c5a1a7252c457a13d243185e103b903794d4e548bdc757b1ad73d4f6f27a
3
+ size 23274572
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ imagemagick
2
+ libmagick++-dev