Elanas commited on
Commit
10a86b9
·
verified ·
1 Parent(s): 60afc6d

Upload kalbos_nustatymas.py

Browse files
Files changed (1) hide show
  1. kalbos_nustatymas.py +53 -0
kalbos_nustatymas.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import whisper
3
+ from transformers import pipeline
4
+ import torch
5
+ import torchaudio
6
+
7
+ # 🔹 Whisper transkripcija
8
+ def transcribe_text(audio_path):
9
+ model = whisper.load_model("base")
10
+ result = model.transcribe(audio_path, language='lt')
11
+ return result.get("text", "").strip()
12
+
13
+ # 🔹 Whisper kalbos atpažinimas (su papildomu raktinių žodžių tikrinimu)
14
+ def recognize_language(audio_path):
15
+ model = whisper.load_model("base")
16
+ result = model.transcribe(audio_path)
17
+ text = result.get("text", "").strip()
18
+ lang_code = result.get("language", "unknown")
19
+
20
+ lower_text = text.lower()
21
+ if any(word in lower_text for word in ["labas", "ačiū", "draugas", "vardas", "sekasi", "prašau"]):
22
+ return "lt"
23
+ elif any(word in lower_text for word in ["hello", "name", "how are you", "friend", "please"]):
24
+ return "en"
25
+ elif any(word in lower_text for word in ["hallo", "danke", "freund", "ich", "bitte"]):
26
+ return "de"
27
+ else:
28
+ return lang_code
29
+
30
+ # 🔸 Wav2Vec2 transkripcija (su kalbos pasirinkimu)
31
+ def transcribe_text_wav2vec(audio_path, kalba):
32
+ kalbos_modeliai = {
33
+ "lt": "DeividasM/wav2vec2-large-xlsr-53-lithuanian",
34
+ "en": "facebook/wav2vec2-base-960h",
35
+ "de": "jonatasgrosman/wav2vec2-large-xlsr-53-german"
36
+ }
37
+
38
+ if kalba not in kalbos_modeliai:
39
+ raise ValueError(f"Nepalaikoma kalba: {kalba}")
40
+
41
+ pipe = pipeline(
42
+ "automatic-speech-recognition",
43
+ model=kalbos_modeliai[kalba]
44
+ )
45
+
46
+ speech_array, sampling_rate = torchaudio.load(audio_path)
47
+ if sampling_rate != 16000:
48
+ resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
49
+ speech_array = resampler(speech_array)
50
+ speech = speech_array[0].numpy()
51
+
52
+ result = pipe(speech)
53
+ return result["text"]