qanastek commited on
Commit
63deeee
β€’
1 Parent(s): bcf29d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py CHANGED
@@ -27,6 +27,39 @@ tokenizer_ner = AutoTokenizer.from_pretrained(model_name)
27
  model_ner = AutoModelForTokenClassification.from_pretrained(model_name)
28
  predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def greet(name):
31
  return "Hello " + name + "!!"
32
 
 
27
  model_ner = AutoModelForTokenClassification.from_pretrained(model_name)
28
  predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
29
 
30
+ def transcribe(audio_path):
31
+
32
+ speech_array, sampling_rate = librosa.load(audio_path, sr=16_000)
33
+
34
+ inputs = processor_asr(speech_array, sampling_rate=16_000, return_tensors="pt", padding=True)
35
+
36
+ with torch.no_grad():
37
+ logits = model_asr(inputs.input_values, attention_mask=inputs.attention_mask).logits
38
+
39
+ predicted_ids = torch.argmax(logits, dim=-1)
40
+
41
+ return processor_asr.batch_decode(predicted_ids)[0]
42
+
43
+ def getUniform(text):
44
+
45
+ idx = 0
46
+ res = {}
47
+
48
+ for t in text:
49
+
50
+ raw = t["entity"].replace("B-","").replace("I-","")
51
+ word = t["word"].replace("▁","")
52
+
53
+ if "B-" in t["entity"]:
54
+ res[f"{raw}|{idx}"] = [word]
55
+ idx += 1
56
+ else:
57
+ res[f"{raw}|{idx}"].append(word)
58
+
59
+ res = [(r.split("|")[0], res[r]) for r in res]
60
+
61
+ return res
62
+
63
  def greet(name):
64
  return "Hello " + name + "!!"
65