File size: 3,713 Bytes
e294914
cab263c
d022aab
7c29ee2
1581bbf
7378fc8
 
e294914
 
d5436e0
 
 
f8dfb0f
d63135e
e294914
7378fc8
e294914
 
 
7378fc8
e294914
 
 
d022aab
7378fc8
 
c8abf3f
d022aab
7c29ee2
 
7378fc8
7c29ee2
 
7378fc8
 
 
 
 
8bd4cb3
7378fc8
 
 
 
 
 
91b59ba
7378fc8
 
 
 
 
 
 
 
 
91b59ba
7378fc8
 
91b59ba
5a785fa
e294914
 
3e2a726
5a785fa
3e2a726
 
 
cf08317
8bd4cb3
e294914
5a785fa
 
8bd4cb3
 
5a785fa
 
 
3e2a726
 
7378fc8
 
 
e294914
7378fc8
cf08317
7378fc8
8bd4cb3
e294914
0f596d3
e294914
5a785fa
e294914
8bd4cb3
5a785fa
 
8bd4cb3
 
 
 
 
c8abf3f
8bd4cb3
5a785fa
 
4affef3
 
5a785fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# version - ArcticMonkeys:30.07.24

# python core libraries
import re
import psutil
import time
import random
# streamlit
import streamlit as st
# components from other authors
from streamlit_mic_recorder import mic_recorder
# core modules
from audio_processing.A2T import A2T
from audio_processing.T2A import T2A
from llm.utils.chat import Conversation
from vlm.vlm import VLM
# utils modules
from utils.keywords import keywords
from utils.prompt_toggle import select_prompt, load_prompts
from utils.image_caption import ImageCaption

prompts = load_prompts()
chat = Conversation()
t2a = T2A()
vlm = VLM()
ic = ImageCaption()
text_dict = {}

def remove_labels_with_regex(text: str):
    pattern = r'^(Human:|AI:|Chelsea:)\s*'
    cleaned_text = re.sub(pattern, '', text, flags=re.MULTILINE)
    return cleaned_text

def exctrator(sentence, phrase="show me your image"):
    extracted_text = sentence.split(phrase)[1].strip() if phrase in sentence else ""
    return extracted_text

def switching(text):
    command = re.search("show me your image", text.lower(), re.IGNORECASE) if text is not None else "Error because your voice request is None"
    result = None

    if command:
        prompt = exctrator(text.lower())
        # Завантажуємо зображення
        uploaded_image = ic.load_image()

        if uploaded_image is not None:
            # Якщо зображення завантажено, виконуємо обробку
            result = ic.send2ai(model=vlm, prompt=prompt)
        else:
            # Якщо зображення ще не завантажене, показуємо попередження
            st.warning("No image uploaded yet. Please upload an image to continue.")
    else:
        prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
        result = chat.chatting(prompt=prompt if prompt is not None else text)

    print(f"Prompt:\n{prompt}")
    return result

def get_text():
    try:
        mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
        start_time = time.perf_counter()
        a2t = A2T(mic["bytes"])
        text = a2t.predict()
        print(f"Text from A2T:\n{text}")
        execution_time = time.perf_counter() - start_time
        print(f"App.py -> get_text() -> time of execution A2T -> {execution_time}s")
        text_dict['text'] = text

        return text
    except Exception as e:
        print(f"An error occurred in get_text function, reason is: {e}")
        return None  # Повертаємо None у випадку помилки

def speaking(text):
    try:
        if text and text.strip() != "":
            print(f"Checking for execution this part {random.randint(0, 5)}")
            output = switching(text)
            response = remove_labels_with_regex(text=output)
            start_time_t2a = time.perf_counter()
            t2a.autoplay(response)
            execution_time_t2a = time.perf_counter() - start_time_t2a
            print(f"App.py -> speaking() -> time of execution T2A -> {execution_time_t2a}s")
            print(ic.pil_image)

            if response:
                st.markdown(f"Your input: {text}")
                st.markdown(f"Chelsea response: {response}")

    except Exception as e:
        print(f"An error occurred in speaking function, reason is: {e}")

def main():
    text = get_text()  
    
    if text is None and 'text' in text_dict:
        text = text_dict['text']
    
    print(f"Text dict: {text_dict}")
    print(f"Print text: s{text}s")
    speaking(text)
    print(f"Checking for execution main func {random.randint(0, 10)}")

if __name__ == "__main__":
    main()