File size: 5,000 Bytes
8fb0be5
ee531be
59f6126
 
024f740
e8a4c9c
09b358f
024f740
4b331f0
 
 
e8a4c9c
 
 
 
ee531be
09b358f
 
7dc42bb
09b358f
 
 
29a10e5
4b331f0
09b358f
791adc1
4b331f0
ee531be
4b331f0
ee531be
4b331f0
09b358f
 
 
 
 
060a1e0
 
 
 
 
09b358f
4b331f0
 
 
ee531be
4b331f0
 
09b358f
8f0bb70
09b358f
ee531be
09b358f
 
 
 
 
 
 
4380489
09b358f
 
 
4380489
09b358f
 
 
ee531be
09b358f
 
060a1e0
09b358f
 
 
4b331f0
791adc1
4b331f0
a8c8823
4b331f0
 
09b358f
 
 
 
 
 
e8a4c9c
4b331f0
 
791adc1
ee531be
791adc1
4b331f0
09b358f
 
 
7bbc5c5
ee531be
4b331f0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import streamlit as st
import whisperx
import torch
from utils import convert_segments_object_to_text, check_password
from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens
from openai_requests import get_completion_from_openai

if check_password():    
    st.title('Audio Transcription App')
    st.sidebar.title("Settings")
    
    device = os.getenv('DEVICE')
    batch_size = int(os.getenv('BATCH_SIZE'))
    compute_type = os.getenv('COMPUTE_TYPE')

    initial_base_prompt = os.getenv('BASE_PROMPT')
    initial_processing_prompt = os.getenv('PROCCESS_PROMPT')

    llm = st.sidebar.selectbox("LLM", ["GigaChat", "Chat GPT"], index=0)
    base_prompt = st.sidebar.text_area("Промпт для резюмирования", value=initial_base_prompt)
    max_tokens_summary = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)

    enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
    processing_prompt = st.sidebar.text_area("Промпт для обработки транскрибации", value=initial_processing_prompt)

    ACCESS_TOKEN = st.secrets["HF_TOKEN"]

    uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])

    if uploaded_file is not None:
        file_name = uploaded_file.name

        if 'file_name' not in st.session_state or st.session_state.file_name != file_name:
            st.session_state.transcript = ''
            st.session_state.file_name = file_name
            print(st.session_state.file_name)
            print(st.session_state.transcript)

        print(st.session_state.file_name)
        print(st.session_state.transcript)
            
        st.audio(uploaded_file)
        file_extension = uploaded_file.name.split(".")[-1]  # Получаем расширение файла
        temp_file_path = f"temp_file.{file_extension}"  # Создаем временное имя файла с правильным расширением
    
        with open(temp_file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())

        print(st.session_state.transcript)
        if 'transcript' not in st.session_state or st.session_state.transcript == '':
    
            with st.spinner('Транскрибируем...'):
                # Load model
                model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
                # Load and transcribe audio
                audio = whisperx.load_audio(temp_file_path)
                result = model.transcribe(audio, batch_size=batch_size, language="ru")
                print('Transcribed, now aligning')
        
                model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
                result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
                print('Aligned, now diarizing')
        
                diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
                diarize_segments = diarize_model(audio)
                result_diar = whisperx.assign_word_speakers(diarize_segments, result)
        
            st.write("Результат транскрибации:")
            transcript = convert_segments_object_to_text(result_diar)
            st.session_state.transcript = transcript
        else:
            transcript = st.session_state.transcript
            
        st.text(transcript)

        access_token = get_access_token()
    
        if (enable_summarization):
            with st.spinner('Обрабатываем транскрибацию...'):
                if (llm == 'GigaChat'):
                    number_of_tokens = get_number_of_tokens(transcript, access_token)
                    print('Количество токенов в транскрибации: ' + str(number_of_tokens))
                    transcript = get_completion_from_gigachat(processing_prompt + transcript, number_of_tokens + 500, access_token)
                elif (llm == 'Chat GPT'):
                    transcript = get_completion_from_openai(processing_prompt + transcript)
                
                st.write("Результат обработки:")
                st.text(transcript)

        
    
        with st.spinner('Резюмируем...'):
            if (llm == 'GigaChat'):
                summary_answer = get_completion_from_gigachat(base_prompt + transcript, max_tokens_summary, access_token)
            elif (llm == 'Chat GPT'):
                summary_answer = get_completion_from_openai(base_prompt + transcript, max_tokens_summary)
        
            st.write("Результат резюмирования:")
            st.text(summary_answer)