File size: 4,335 Bytes
493dfd3
 
 
 
bd0d4c2
493dfd3
 
 
 
 
 
 
 
 
5479a9a
493dfd3
 
 
 
 
50e81bb
 
09bdd3a
50e81bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688270d
 
1862301
dfdc009
688270d
dfdc009
688270d
 
 
 
 
 
 
1862301
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st

def check_password():
    def password_entered():
        if st.session_state["password"] == st.secrets["PASSWORD"]:
            st.session_state["password_correct"] = True
            del st.session_state["password"]  # удаляем пароль из сессии для безопасности
        else:
            st.session_state["password_correct"] = False
    if "password_correct" not in st.session_state:
        # Первоначальное состояние
        st.session_state["password_correct"] = False
    if not st.session_state["password_correct"]:
        # Показываем поле для ввода пароля и кнопку подтверждения
        st.title('Audio Transcription App')
        st.text_input("Введите пароль:", type="password", on_change=password_entered, key="password")
        return False
    else:
        return True

def convert_segments_object_to_text(data):
    result = []
    print(data)

    for segment in data['segments']:
        words = segment['words']
        segment_speaker = segment.get('speaker', None)
        segment_start = segment.get('start', None)
        segment_end = segment.get('end', None)
        current_speaker = None
        current_start = None
        current_end = None
        current_text = []

        # Forward fill speaker, start and end if missing
        for i, word_info in enumerate(words):
            if 'speaker' not in word_info:
                if i > 0 and 'speaker' in words[i - 1]:
                    word_info['speaker'] = words[i - 1]['speaker']
                elif i < len(words) - 1 and 'speaker' in words[i + 1]:
                    word_info['speaker'] = words[i + 1]['speaker']
                else:
                    word_info['speaker'] = segment_speaker
                    
            if 'start' not in word_info:
                if i > 0 and 'end' in words[i - 1]:
                    word_info['start'] = words[i - 1]['end']
                else:
                    word_info['start'] = segment_start
                    
            if 'end' not in word_info:
                if i < len(words) - 1 and 'start' in words[i + 1]:
                    word_info['end'] = words[i + 1]['start']
                elif i == len(words) - 1:
                    word_info['end'] = segment_end
                else:
                    word_info['end'] = word_info['start']

        for word_info in words:
            word = word_info.get('word', '')
            start = word_info.get('start', None)
            end = word_info.get('end', None)
            speaker = word_info.get('speaker', None)

            if current_speaker is None:
                current_speaker = speaker
                current_start = start

            if speaker == current_speaker:
                current_text.append(word)
                current_end = end
            else:
                # Finish current segment
                if current_start is not None and current_end is not None:
                    formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
                else:
                    formatted_text = f'{current_speaker} : {" ".join(current_text)}'
                result.append(formatted_text)

                # Start new segment
                current_speaker = speaker
                current_start = start
                current_end = end
                current_text = [word]

        # Append the last segment
        if current_text:
            if current_start is not None and current_end is not None:
                formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
            else:
                formatted_text = f'{current_speaker} : {" ".join(current_text)}'
            result.append(formatted_text)

    return '\n'.join(result)

def convert_segments_object_to_text_simple(data):
    result = []

    for segment in data['segments']:
        text = segment['text']
        current_speaker = segment.get('speaker', None)
        segment_start = segment.get('start', None)
        segment_end = segment.get('end', None)

        result.append(f'{current_speaker} ({segment_start} : {segment_end}) : {text}')
        
    return '\n'.join(result)