fanaf91318 commited on
Commit
5e671da
1 Parent(s): c1a936e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import (
3
+ AutomaticSpeechRecognitionPipeline,
4
+ WhisperForConditionalGeneration,
5
+ WhisperTokenizer,
6
+ WhisperProcessor,
7
+ )
8
+ from peft import PeftModel, PeftConfig
9
+ import torch
10
+ from huggingface_hub import snapshot_download
11
+
12
+ peft_model_id = "aisha-org/faster-whisper-uz"
13
+ language = "uz"
14
+ task = "transcribe"
15
+
16
+ peft_config = PeftConfig.from_pretrained(peft_model_id, use_auth_token=True)
17
+
18
+ model = WhisperForConditionalGeneration.from_pretrained(
19
+ peft_config.base_model_name_or_path,
20
+ load_in_8bit=True,
21
+ device_map="auto",
22
+ use_auth_token=True,
23
+ force_download=True,
24
+ resume_download=False
25
+ )
26
+
27
+ model = PeftModel.from_pretrained(model, peft_model_id, use_auth_token=True)
28
+
29
+ tokenizer = WhisperTokenizer.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)
30
+ processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)
31
+ feature_extractor = processor.feature_extractor
32
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
33
+
34
+ pipe = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
35
+ def transcribe(audio):
36
+ with torch.cuda.amp.autocast():
37
+ text = pipe(audio, generate_kwargs={"forced_decoder_ids": forced_decoder_ids}, max_new_tokens=255)["text"]
38
+ return text
39
+ import gradio as gr
40
+
41
+ demo = gr.Blocks()
42
+
43
+ mic_transcribe = gr.Interface(
44
+ fn=transcribe,
45
+ inputs=gr.Audio(sources="microphone", type="filepath"),
46
+ outputs=gr.Textbox(),
47
+ )
48
+
49
+ file_transcribe = gr.Interface(
50
+ fn=transcribe,
51
+ inputs=gr.Audio(sources="upload", type="filepath"),
52
+ outputs=gr.Textbox(),
53
+ )
54
+ with demo:
55
+ gr.TabbedInterface(
56
+ [mic_transcribe, file_transcribe],
57
+ ["Transcribe Microphone", "Transcribe Audio File"],
58
+ )
59
+
60
+ demo.launch(debug=True)