|
def convert_segments_object_to_text(data): |
|
result = [] |
|
|
|
for segment in data['segments']: |
|
words = segment['words'] |
|
segment_speaker = segment.get('speaker', None) |
|
segment_start = segment.get('start', None) |
|
segment_end = segment.get('end', None) |
|
current_speaker = None |
|
current_start = None |
|
current_end = None |
|
current_text = [] |
|
|
|
|
|
for i, word_info in enumerate(words): |
|
if 'speaker' not in word_info: |
|
if i > 0 and 'speaker' in words[i - 1]: |
|
word_info['speaker'] = words[i - 1]['speaker'] |
|
elif i < len(words) - 1 and 'speaker' in words[i + 1]: |
|
word_info['speaker'] = words[i + 1]['speaker'] |
|
else: |
|
word_info['speaker'] = segment_speaker |
|
|
|
if 'start' not in word_info: |
|
if i > 0 and 'end' in words[i - 1]: |
|
word_info['start'] = words[i - 1]['end'] |
|
else: |
|
word_info['start'] = segment_start |
|
|
|
if 'end' not in word_info: |
|
if i < len(words) - 1 and 'start' in words[i + 1]: |
|
word_info['end'] = words[i + 1]['start'] |
|
elif i == len(words) - 1: |
|
word_info['end'] = segment_end |
|
else: |
|
word_info['end'] = word_info['start'] |
|
|
|
for word_info in words: |
|
word = word_info.get('word', '') |
|
start = word_info.get('start', None) |
|
end = word_info.get('end', None) |
|
speaker = word_info.get('speaker', None) |
|
|
|
if current_speaker is None: |
|
current_speaker = speaker |
|
current_start = start |
|
|
|
if speaker == current_speaker: |
|
current_text.append(word) |
|
current_end = end |
|
else: |
|
|
|
if current_start is not None and current_end is not None: |
|
formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}' |
|
else: |
|
formatted_text = f'{current_speaker} : {" ".join(current_text)}' |
|
result.append(formatted_text) |
|
|
|
|
|
current_speaker = speaker |
|
current_start = start |
|
current_end = end |
|
current_text = [word] |
|
|
|
|
|
if current_text: |
|
if current_start is not None and current_end is not None: |
|
formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}' |
|
else: |
|
formatted_text = f'{current_speaker} : {" ".join(current_text)}' |
|
result.append(formatted_text) |
|
|
|
return '\n'.join(result) |