devilent2 commited on
Commit
58cafdb
1 Parent(s): 82109fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -9
app.py CHANGED
@@ -1,19 +1,183 @@
1
- import gradio as gr
2
- import numpy as np
3
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  graudio=gr.Audio(type="filepath",show_download_button=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def test():
9
- return 'test'
10
 
 
 
 
 
 
 
 
 
 
 
 
11
 
 
 
12
 
 
 
 
13
 
14
- demo = gr.Interface(test,
15
- inputs=[graudio],
16
- outputs=[])
17
 
18
- if __name__ == "__main__":
19
- demo.launch()
 
1
+ import torch
 
2
  import time
3
+ import moviepy.editor as mp
4
+ import psutil
5
+ import gradio as gr
6
+ import spaces
7
+ from transformers import pipeline
8
+ from transformers.pipelines.audio_utils import ffmpeg_read
9
+
10
+ DEFAULT_MODEL_NAME = "distil-whisper/distil-large-v3"
11
+ BATCH_SIZE = 8
12
+
13
+ print('start app')
14
+
15
+ device = 0 if torch.cuda.is_available() else "cpu"
16
+ if device == "cpu":
17
+ DEFAULT_MODEL_NAME = "openai/whisper-tiny"
18
+
19
+ def load_pipeline(model_name):
20
+ return pipeline(
21
+ task="automatic-speech-recognition",
22
+ model=model_name,
23
+ chunk_length_s=30,
24
+ device=device,
25
+ )
26
+
27
+ pipe = load_pipeline(DEFAULT_MODEL_NAME)
28
+ #pipe = None
29
+
30
+
31
+ from gpustat import GPUStatCollection
32
+
33
+ def update_gpu_status():
34
+ if torch.cuda.is_available() == False:
35
+ return "No Nvidia Device"
36
+ try:
37
+ gpu_stats = GPUStatCollection.new_query()
38
+ for gpu in gpu_stats:
39
+ # Assuming you want to monitor the first GPU, index 0
40
+ gpu_id = gpu.index
41
+ gpu_name = gpu.name
42
+ gpu_utilization = gpu.utilization
43
+ memory_used = gpu.memory_used
44
+ memory_total = gpu.memory_total
45
+ memory_utilization = (memory_used / memory_total) * 100
46
+ gpu_status=(f"GPU {gpu_id}: {gpu_name}, Utilization: {gpu_utilization}%, Memory Used: {memory_used}MB, Memory Total: {memory_total}MB, Memory Utilization: {memory_utilization:.2f}%")
47
+ return gpu_status
48
+
49
+ except Exception as e:
50
+ print(f"Error getting GPU stats: {e}")
51
+ return torch_update_gpu_status()
52
+
53
+ def torch_update_gpu_status():
54
+ if torch.cuda.is_available():
55
+ gpu_info = torch.cuda.get_device_name(0)
56
+ gpu_memory = torch.cuda.mem_get_info(0)
57
+ total_memory = gpu_memory[1] / (1024 * 1024)
58
+ free_memory=gpu_memory[0] /(1024 *1024)
59
+ used_memory = (gpu_memory[1] - gpu_memory[0]) / (1024 * 1024)
60
+
61
+ gpu_status = f"GPU: {gpu_info} Free Memory:{free_memory}MB Total Memory: {total_memory:.2f} MB Used Memory: {used_memory:.2f} MB"
62
+ else:
63
+ gpu_status = "No GPU available"
64
+ return gpu_status
65
+
66
+ def update_cpu_status():
67
+ import datetime
68
+ # Get the current time
69
+ current_time = datetime.datetime.now().time()
70
+ # Convert the time to a string
71
+ time_str = current_time.strftime("%H:%M:%S")
72
 
73
+ cpu_percent = psutil.cpu_percent()
74
+ cpu_status = f"CPU Usage: {cpu_percent}% {time_str}"
75
+ return cpu_status
76
+
77
+ def update_status():
78
+ gpu_status = update_gpu_status()
79
+ cpu_status = update_cpu_status()
80
+ sys_status=gpu_status+"\n\n"+cpu_status
81
+ return sys_status
82
+
83
+ def refresh_status():
84
+ return update_status()
85
+
86
+
87
+ @spaces.GPU
88
+ def transcribe(audio_path, model_name):
89
+ print('start transcribe '+str(time.time()))
90
+
91
+ if audio_path is None:
92
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
93
+
94
+ audio_path=audio_path.strip()
95
+ model_name=model_name.strip()
96
+
97
+ global pipe
98
+ if model_name != pipe.model.name_or_path:
99
+ pipe = load_pipeline(model_name)
100
+
101
+ start_time = time.time() # Record the start time
102
+ print('start record time '+str(time.time()))
103
+ # Load the audio file and calculate its duration
104
+ audio = mp.AudioFileClip(audio_path)
105
+ audio_duration = audio.duration
106
+ print('start pipe '+str(time.time()))
107
+ text = pipe(audio_path, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
108
+ end_time = time.time() # Record the end time
109
+
110
+ transcription_time = end_time - start_time # Calculate the transcription time
111
+
112
+ # Create the transcription time output with additional information
113
+ transcription_time_output = (
114
+ f"Transcription Time: {transcription_time:.2f} seconds\n"
115
+ f"Audio Duration: {audio_duration:.2f} seconds\n"
116
+ f"Model Used: {model_name}\n"
117
+ f"Device Used: {'GPU' if torch.cuda.is_available() else 'CPU'}"
118
+ )
119
+
120
+ print('return transcribe '+str(time.time()))
121
+
122
+ return text, transcription_time_output
123
+
124
+ @spaces.GPU
125
+ def handle_upload_audio(audio_path,model_name,old_transcription=''):
126
+ print('old_trans:' + old_transcription)
127
+ (text,transcription_time_output)=transcribe(audio_path,model_name)
128
+ return text+'\n\n'+old_transcription, transcription_time_output
129
 
130
  graudio=gr.Audio(type="filepath",show_download_button=True)
131
+ grmodel_textbox=gr.Textbox(
132
+ label="Model Name",
133
+ value=DEFAULT_MODEL_NAME,
134
+ placeholder="Enter the model name",
135
+ info="Some available models: distil-whisper/distil-large-v3 distil-whisper/distil-medium.en Systran/faster-distil-whisper-large-v3 Systran/faster-whisper-large-v3 Systran/faster-whisper-medium openai/whisper-tiny, openai/whisper-base, openai/whisper-medium, openai/whisper-large-v3",
136
+ )
137
+ groutputs=[gr.TextArea(label="Transcription",elem_id="transcription_textarea",interactive=True,lines=20,show_copy_button=True),
138
+ gr.TextArea(label="Transcription Info",interactive=True,show_copy_button=True)]
139
+
140
+ mf_transcribe = gr.Interface(
141
+ fn=handle_upload_audio,
142
+ inputs=[
143
+ graudio, #"numpy" or filepath
144
+ #gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
145
+ grmodel_textbox,
146
+ ],
147
+ outputs=groutputs,
148
+ theme="huggingface",
149
+ title="Whisper Transcription",
150
+ description=(
151
+ "Scroll to Bottom to show system status. "
152
+ "Transcribe long-form microphone or audio file after uploaded audio! "
153
+ ),
154
+ allow_flagging="never",
155
+ )
156
+
157
+
158
+ demo = gr.Blocks()
159
 
 
 
160
 
161
+ with demo:
162
+ gr.TabbedInterface([mf_transcribe, ], ["Audio",])
163
+
164
+ with gr.Row():
165
+ refresh_button = gr.Button("Refresh Status") # Create a refresh button
166
+
167
+ sys_status_output = gr.Textbox(label="System Status", interactive=False)
168
+
169
+
170
+ # Link the refresh button to the refresh_status function
171
+ refresh_button.click(refresh_status, None, [sys_status_output])
172
 
173
+ # Load the initial status using update_status function
174
+ demo.load(update_status, inputs=None, outputs=[sys_status_output], every=2, queue=False)
175
 
176
+ graudio.stop_recording(handle_upload_audio,inputs=[graudio,grmodel_textbox,groutputs[0]],outputs=groutputs)
177
+ graudio.upload(handle_upload_audio,inputs=[graudio,grmodel_textbox,groutputs[0]],outputs=groutputs)
178
+
179
 
180
+ # Launch the Gradio app
181
+ demo.launch(share=True)
 
182
 
183
+ print('launched\n\n')