Sabbah13's picture
Create utils.py
50e81bb verified
raw
history blame
2.97 kB
def convert_segments_object_to_text(data):
result = []
for segment in data['segments']:
words = segment['words']
segment_speaker = segment.get('speaker', None)
segment_start = segment.get('start', None)
segment_end = segment.get('end', None)
current_speaker = None
current_start = None
current_end = None
current_text = []
# Forward fill speaker, start and end if missing
for i, word_info in enumerate(words):
if 'speaker' not in word_info:
if i > 0 and 'speaker' in words[i - 1]:
word_info['speaker'] = words[i - 1]['speaker']
elif i < len(words) - 1 and 'speaker' in words[i + 1]:
word_info['speaker'] = words[i + 1]['speaker']
else:
word_info['speaker'] = segment_speaker
if 'start' not in word_info:
if i > 0 and 'end' in words[i - 1]:
word_info['start'] = words[i - 1]['end']
else:
word_info['start'] = segment_start
if 'end' not in word_info:
if i < len(words) - 1 and 'start' in words[i + 1]:
word_info['end'] = words[i + 1]['start']
elif i == len(words) - 1:
word_info['end'] = segment_end
else:
word_info['end'] = word_info['start']
for word_info in words:
word = word_info.get('word', '')
start = word_info.get('start', None)
end = word_info.get('end', None)
speaker = word_info.get('speaker', None)
if current_speaker is None:
current_speaker = speaker
current_start = start
if speaker == current_speaker:
current_text.append(word)
current_end = end
else:
# Finish current segment
if current_start is not None and current_end is not None:
formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
else:
formatted_text = f'{current_speaker} : {" ".join(current_text)}'
result.append(formatted_text)
# Start new segment
current_speaker = speaker
current_start = start
current_end = end
current_text = [word]
# Append the last segment
if current_text:
if current_start is not None and current_end is not None:
formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
else:
formatted_text = f'{current_speaker} : {" ".join(current_text)}'
result.append(formatted_text)
return '\n'.join(result)