Samarth991 commited on
Commit
34a0eeb
1 Parent(s): fe5b216

adding duration feature

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. whisper_app.py +6 -2
app.py CHANGED
@@ -52,7 +52,7 @@ def process_documents(documents,data_chunk=1500,chunk_overlap=100):
52
  texts = text_splitter.split_documents(documents)
53
  return texts
54
 
55
- def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',temperature=0.1,max_tokens=4096):
56
  device='cpu'
57
  logger.info("Audio File Name :",wav_file.name)
58
  whisper = whisper_app.WHISPERModel(model_name=wav_model,device=device)
 
52
  texts = text_splitter.split_documents(documents)
53
  return texts
54
 
55
+ def audio_processor(wav_file,API_key,wav_model='small',llm='HuggingFace',temperature=0.1,max_tokens=4096,duration=5):
56
  device='cpu'
57
  logger.info("Audio File Name :",wav_file.name)
58
  whisper = whisper_app.WHISPERModel(model_name=wav_model,device=device)
whisper_app.py CHANGED
@@ -18,15 +18,17 @@ class WHISPERModel:
18
  result = self.model.transcribe(clip_audio)
19
  return result['language']
20
 
21
- def read_audio(self,audio_path):
22
  audio = None
23
  try:
24
  audio = whisper.load_audio(audio_path)
 
 
25
  except IOError as err:
26
  raise err
27
  return audio
28
 
29
- def speech_to_text(self, audio_path):
30
  text_data = dict()
31
  audio_duration = 0
32
  conv_language = ""
@@ -36,6 +38,8 @@ class WHISPERModel:
36
  audio = self.read_audio(audio_path)
37
  else:
38
  raise("Unable to reach for URL {}".format(audio_path))
 
 
39
  if audio :
40
  conv_language = self.get_info(audio)
41
  if conv_language !='en':
 
18
  result = self.model.transcribe(clip_audio)
19
  return result['language']
20
 
21
+ def read_audio(self,audio_path,duration=None):
22
  audio = None
23
  try:
24
  audio = whisper.load_audio(audio_path)
25
+ if duration :
26
+ audio = whisper.pad_or_trim(audio, length=SAMPLE_RATE * duration*60)
27
  except IOError as err:
28
  raise err
29
  return audio
30
 
31
+ def speech_to_text(self, audio_path,duration=None):
32
  text_data = dict()
33
  audio_duration = 0
34
  conv_language = ""
 
38
  audio = self.read_audio(audio_path)
39
  else:
40
  raise("Unable to reach for URL {}".format(audio_path))
41
+ else:
42
+ self.read_audio(audio_path)
43
  if audio :
44
  conv_language = self.get_info(audio)
45
  if conv_language !='en':