File size: 3,713 Bytes
e294914 cab263c d022aab 7c29ee2 1581bbf 7378fc8 e294914 d5436e0 f8dfb0f d63135e e294914 7378fc8 e294914 7378fc8 e294914 d022aab 7378fc8 c8abf3f d022aab 7c29ee2 7378fc8 7c29ee2 7378fc8 8bd4cb3 7378fc8 91b59ba 7378fc8 91b59ba 7378fc8 91b59ba 5a785fa e294914 3e2a726 5a785fa 3e2a726 cf08317 8bd4cb3 e294914 5a785fa 8bd4cb3 5a785fa 3e2a726 7378fc8 e294914 7378fc8 cf08317 7378fc8 8bd4cb3 e294914 0f596d3 e294914 5a785fa e294914 8bd4cb3 5a785fa 8bd4cb3 c8abf3f 8bd4cb3 5a785fa 4affef3 5a785fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# version - ArcticMonkeys:30.07.24
# python core libraries
import re
import psutil
import time
import random
# streamlit
import streamlit as st
# components from other authors
from streamlit_mic_recorder import mic_recorder
# core modules
from audio_processing.A2T import A2T
from audio_processing.T2A import T2A
from llm.utils.chat import Conversation
from vlm.vlm import VLM
# utils modules
from utils.keywords import keywords
from utils.prompt_toggle import select_prompt, load_prompts
from utils.image_caption import ImageCaption
prompts = load_prompts()
chat = Conversation()
t2a = T2A()
vlm = VLM()
ic = ImageCaption()
text_dict = {}
def remove_labels_with_regex(text: str):
pattern = r'^(Human:|AI:|Chelsea:)\s*'
cleaned_text = re.sub(pattern, '', text, flags=re.MULTILINE)
return cleaned_text
def exctrator(sentence, phrase="show me your image"):
extracted_text = sentence.split(phrase)[1].strip() if phrase in sentence else ""
return extracted_text
def switching(text):
command = re.search("show me your image", text.lower(), re.IGNORECASE) if text is not None else "Error because your voice request is None"
result = None
if command:
prompt = exctrator(text.lower())
# Завантажуємо зображення
uploaded_image = ic.load_image()
if uploaded_image is not None:
# Якщо зображення завантажено, виконуємо обробку
result = ic.send2ai(model=vlm, prompt=prompt)
else:
# Якщо зображення ще не завантажене, показуємо попередження
st.warning("No image uploaded yet. Please upload an image to continue.")
else:
prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
result = chat.chatting(prompt=prompt if prompt is not None else text)
print(f"Prompt:\n{prompt}")
return result
def get_text():
try:
mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
start_time = time.perf_counter()
a2t = A2T(mic["bytes"])
text = a2t.predict()
print(f"Text from A2T:\n{text}")
execution_time = time.perf_counter() - start_time
print(f"App.py -> get_text() -> time of execution A2T -> {execution_time}s")
text_dict['text'] = text
return text
except Exception as e:
print(f"An error occurred in get_text function, reason is: {e}")
return None # Повертаємо None у випадку помилки
def speaking(text):
try:
if text and text.strip() != "":
print(f"Checking for execution this part {random.randint(0, 5)}")
output = switching(text)
response = remove_labels_with_regex(text=output)
start_time_t2a = time.perf_counter()
t2a.autoplay(response)
execution_time_t2a = time.perf_counter() - start_time_t2a
print(f"App.py -> speaking() -> time of execution T2A -> {execution_time_t2a}s")
print(ic.pil_image)
if response:
st.markdown(f"Your input: {text}")
st.markdown(f"Chelsea response: {response}")
except Exception as e:
print(f"An error occurred in speaking function, reason is: {e}")
def main():
text = get_text()
if text is None and 'text' in text_dict:
text = text_dict['text']
print(f"Text dict: {text_dict}")
print(f"Print text: s{text}s")
speaking(text)
print(f"Checking for execution main func {random.randint(0, 10)}")
if __name__ == "__main__":
main() |