truongghieu commited on
Commit
aebae4d
·
1 Parent(s): 09cd5e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -1,13 +1,30 @@
1
  import gradio as gr
2
- # Load model directly
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- # Use a pipeline as a high-level helper
5
- from transformers import pipeline
6
 
7
- pipe = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf")
 
 
8
 
9
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
10
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
 
 
 
11
 
 
12
 
13
- gr.load("models/meta-llama/Llama-2-7b-hf").launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
+ from datasets import load_dataset
4
+ import soundfile as sf
5
+ import torch
6
 
7
+ # load model and tokenizer
8
+ processor = Wav2Vec2Processor.from_pretrained("nguyenvulebinh/wav2vec2-base-vietnamese-250h")
9
+ model = Wav2Vec2ForCTC.from_pretrained("nguyenvulebinh/wav2vec2-base-vietnamese-250h")
10
 
11
+ # define function to read in sound file
12
+ def map_to_array(batch):
13
+ speech, _ = sf.read(batch["file"])
14
+ batch["speech"] = speech
15
+ return batch
16
 
17
+ # tokenize
18
 
19
+ def recognize_speech(audio):
20
+ ds = map_to_array({
21
+ "file": audio
22
+ })
23
+ input_values = processor(ds["speech"], return_tensors="pt", padding="longest").input_values # Batch size 1
24
+ logits = model(input_values).logits
25
+ predicted_ids = torch.argmax(logits, dim=-1)
26
+ transcription = processor.batch_decode(predicted_ids)
27
+ return transcription[0]
28
+
29
+ # launch a simple UI
30
+ gr.Interface(fn=recognize_speech, inputs="microphone", outputs="text").launch()