nahue-passano commited on
Commit
80f5b87
1 Parent(s): b2f2444

update: added lenght fixing in short utterances

Browse files
Files changed (2) hide show
  1. utils/audio.py +2 -2
  2. utils/text.py +52 -6
utils/audio.py CHANGED
@@ -4,7 +4,7 @@ import numpy as np
4
  import soundfile as sf
5
  import pandas as pd
6
 
7
- from utils.text import filter_dataframe_by_audiofile
8
 
9
 
10
  def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]:
@@ -91,6 +91,6 @@ def generate_audio_splits(
91
  Path were segments will be saved.
92
  """
93
  audio_array, sample_rate = load_audio(audio_path)
94
- timestamp_list = filter_dataframe_by_audiofile(timestamps_df, audio_path.name)
95
  audio_segments = split_audio(audio_array, sample_rate, timestamp_list)
96
  save_audio_segments(destination, audio_path, audio_segments, sample_rate)
 
4
  import soundfile as sf
5
  import pandas as pd
6
 
7
+ from utils.text import get_utterance_boundaries
8
 
9
 
10
  def load_audio(audio_path: Path) -> Tuple[np.ndarray, float]:
 
91
  Path were segments will be saved.
92
  """
93
  audio_array, sample_rate = load_audio(audio_path)
94
+ timestamp_list = get_utterance_boundaries(timestamps_df)
95
  audio_segments = split_audio(audio_array, sample_rate, timestamp_list)
96
  save_audio_segments(destination, audio_path, audio_segments, sample_rate)
utils/text.py CHANGED
@@ -68,15 +68,13 @@ def get_word_data(filename: str, timestamp_dict: dict):
68
  return word_df
69
 
70
 
71
- def filter_dataframe_by_audiofile(timestamps_df: pd.DataFrame, audio_file: str) -> List:
72
- """Generates a list from timestamps_df with the timestamps belonging to audio_file
73
 
74
  Parameters
75
  ----------
76
- timestamps_df : pd.DataFrame
77
  Dataframe containing timestamps
78
- audio_file : str
79
- Name of the audio file.
80
 
81
  Returns
82
  -------
@@ -84,10 +82,58 @@ def filter_dataframe_by_audiofile(timestamps_df: pd.DataFrame, audio_file: str)
84
  List of tuples containing the start and end of each stamp.
85
  E.g: [(start_1, end_2), ..., (start_n, end_n)]
86
  """
87
- audio_df = timestamps_df[timestamps_df["Audio file"] == audio_file]
88
  return list(zip(audio_df["Start"], audio_df["End"]))
89
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def get_utterances_transcriptions(timestamps_df: pd.DataFrame) -> List[str]:
92
  """Gives column with transcriptions
93
 
 
68
  return word_df
69
 
70
 
71
+ def get_utterance_boundaries(audio_df: pd.DataFrame) -> List:
72
+ """Generates a list from starts and ends of utterances in an audio.
73
 
74
  Parameters
75
  ----------
76
+ audio_df : pd.DataFrame
77
  Dataframe containing timestamps
 
 
78
 
79
  Returns
80
  -------
 
82
  List of tuples containing the start and end of each stamp.
83
  E.g: [(start_1, end_2), ..., (start_n, end_n)]
84
  """
 
85
  return list(zip(audio_df["Start"], audio_df["End"]))
86
 
87
 
88
+ def check_ut_min_duration(dataframe: pd.DataFrame) -> pd.DataFrame:
89
+ """
90
+ Concatenates audio segments that are shorter than minimum utterance duration only for sentence inferece
91
+
92
+ Parameters
93
+ ----------
94
+ dataframe: pd.DataFrame
95
+ Selected DataFrame to process
96
+
97
+ Returns
98
+ -------
99
+ pd.DataFrame
100
+ DataFrame with corrected audio segments
101
+ """
102
+ corrected_dataframe = pd.DataFrame()
103
+
104
+ # Get lists from dataframe
105
+ segments = list(zip(dataframe['Start'], dataframe['End']))
106
+ segment_durations = list(dataframe['Duration'])
107
+ names = list(dataframe['Audio file'])
108
+ texts = list(dataframe['Sentence'])
109
+
110
+ i = 0
111
+ while i < len(segments) and len(segments) > 1:
112
+ if segment_durations[i] < 1.6:
113
+ # See if the segment can be re-attached with the right or the left segment
114
+ left_duration = float("inf") if i == 0 else segment_durations[i - 1]
115
+ right_duration = (
116
+ float("inf") if i == len(segments) - 1 else segment_durations[i + 1]
117
+ )
118
+ joined_duration = segment_durations[i] + min(left_duration, right_duration)
119
+
120
+ # Re-attach the segment with the neighbour of shortest duration
121
+ j = i - 1 if left_duration <= right_duration else i
122
+ segments[j] = (segments[j][0], segments[j + 1][1])
123
+ segment_durations[j] = joined_duration
124
+ texts[j] = texts[j] + texts[j + 1]
125
+ del segments[j + 1], segment_durations[j + 1], names[j + 1], texts[j + 1]
126
+ else:
127
+ i += 1
128
+
129
+ # Append modified lists to new Dataframe
130
+ corrected_dataframe["Audio file"] = names
131
+ corrected_dataframe["Sentence"] = texts
132
+ corrected_dataframe["Start"], corrected_dataframe["End"] = zip(*segments)
133
+ corrected_dataframe["Duration"] = segment_durations
134
+ return corrected_dataframe
135
+
136
+
137
  def get_utterances_transcriptions(timestamps_df: pd.DataFrame) -> List[str]:
138
  """Gives column with transcriptions
139