CineAI commited on
Commit
d5436e0
1 Parent(s): 195cad5

v0.0.1_DigItBMTH

Browse files
app.py CHANGED
@@ -1,38 +1,38 @@
1
- # version - ArticMonkey:19.03.24:1743
2
 
3
  import psutil
4
- import sys
5
-
6
- import streamlit as st
7
 
8
- # my modules
9
  from audio_processing.A2T import A2T
10
  from audio_processing.T2A import T2A
11
- from llm.llm import LLM_chain
 
12
 
13
- # libraries from other authors
14
- from streamlit_mic_recorder import mic_recorder
 
 
 
 
 
15
 
16
- llmchain = LLM_chain()
17
 
18
  def main():
19
- mic = mic_recorder(start_prompt="Record",stop_prompt="Stop", just_once=True)
20
 
21
  if mic is not None:
22
  a2t = A2T(mic["bytes"])
23
  text = a2t.predict()
24
- response = llmchain(entity=text, id=1)
25
-
 
 
26
  t2a = T2A(response)
27
  t2a.autoplay()
28
 
29
- print(sys.getsizeof(response))
30
-
31
- del response
32
- del a2t
33
-
34
- print('RAM memory % used after:', psutil.virtual_memory()[2])
35
 
36
  if __name__ == "__main__":
37
- print('RAM memory % used:', psutil.virtual_memory()[2]) # ~ 94 GB full memory
38
- main()
 
1
+ # version - ArcticMonkey:19.03.24:1743
2
 
3
  import psutil
4
+ # components from other authors
5
+ from streamlit_mic_recorder import mic_recorder
 
6
 
7
+ # core modules
8
  from audio_processing.A2T import A2T
9
  from audio_processing.T2A import T2A
10
+ from command.utils import build_chain
11
+ from llm.llm_factory import LLM_Factory
12
 
13
+ llm_model = LLM_Factory()
14
+
15
+ trigger = {"lc": "small"}
16
+
17
+
18
+ def prepare_cor(input_text: str):
19
+ return build_chain.build_command_chain().handle_command(input_text)
20
 
 
21
 
22
  def main():
23
+ mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True)
24
 
25
  if mic is not None:
26
  a2t = A2T(mic["bytes"])
27
  text = a2t.predict()
28
+ # prepare_cor(input_text=text)
29
+ llm = llm_model.create_llm(prompt_entity=text, prompt_id=1, trigger=trigger)
30
+ response = llm.execution() if llm is not None else "Oops occurred some error. Please try again"
31
+
32
  t2a = T2A(response)
33
  t2a.autoplay()
34
 
 
 
 
 
 
 
35
 
36
  if __name__ == "__main__":
37
+ print('RAM memory % used:', psutil.virtual_memory()[2]) # ~ 94 GB full memory
38
+ main()
audio_processing/A2T.py CHANGED
@@ -2,6 +2,8 @@ import numpy as np
2
  import librosa
3
  import io
4
 
 
 
5
  from .config import pipe
6
 
7
  TASK = "transcribe"
@@ -11,30 +13,31 @@ class A2T:
11
  def __init__(self, mic):
12
  self.mic = mic
13
 
14
- def __generate_text(self, inputs, task: str = None):
15
  if inputs is None:
16
  raise Exception("Inputs is None")
17
 
18
  transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
19
  return transcribed_text
20
 
21
- def __preprocces(self, raw: bytes):
 
22
  print(f"Raw type : {type(raw)}")
23
  chunk = io.BytesIO(raw)
24
  audio, sample_rate = librosa.load(chunk, sr=16000)
25
  print(f"Sample rate : {sample_rate}")
26
  return audio
27
 
28
- def predict(self):
29
  try:
30
  if self.mic is not None:
31
  raw = self.mic
32
- audio = self.__preprocces(raw=raw)
33
  print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
34
  else:
35
  raise Exception("please provide audio")
36
 
37
- if isinstance(audio , np.ndarray):
38
  return self.__generate_text(inputs=audio, task=TASK)
39
  else:
40
  raise Exception("Audio is not np array")
 
2
  import librosa
3
  import io
4
 
5
+ from typing import Optional
6
+
7
  from .config import pipe
8
 
9
  TASK = "transcribe"
 
13
  def __init__(self, mic):
14
  self.mic = mic
15
 
16
+ def __generate_text(self, inputs, task: Optional[str] = None) -> str:
17
  if inputs is None:
18
  raise Exception("Inputs is None")
19
 
20
  transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
21
  return transcribed_text
22
 
23
+ @staticmethod
24
+ def __preprocess(raw: bytes) -> np.ndarray:
25
  print(f"Raw type : {type(raw)}")
26
  chunk = io.BytesIO(raw)
27
  audio, sample_rate = librosa.load(chunk, sr=16000)
28
  print(f"Sample rate : {sample_rate}")
29
  return audio
30
 
31
+ def predict(self) -> str:
32
  try:
33
  if self.mic is not None:
34
  raw = self.mic
35
+ audio = self.__preprocess(raw=raw)
36
  print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
37
  else:
38
  raise Exception("please provide audio")
39
 
40
+ if isinstance(audio, np.ndarray):
41
  return self.__generate_text(inputs=audio, task=TASK)
42
  else:
43
  raise Exception("Audio is not np array")
audio_processing/T2A.py CHANGED
@@ -1,22 +1,23 @@
1
  import logging
2
 
3
- import torch
 
 
4
  import librosa
5
  import soundfile as sf
 
6
 
7
- from io import BytesIO
8
  from .config import pipe_tts
9
 
10
- from streamlit_TTS import auto_play, text_to_audio
11
 
12
- SAMPLING_RATE = 16000
13
 
14
  class T2A:
15
- def __init__(self, input_text: str = None):
16
  self.text = input_text
17
  self.output_model = pipe_tts(input_text)
18
 
19
- def __get_duration(self, raw: bytes):
20
  chunk = BytesIO(raw)
21
  audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
22
  duration = librosa.get_duration(y=audio, sr=sample_rate)
@@ -27,14 +28,14 @@ class T2A:
27
  if isinstance(self.text, str):
28
  audio = text_to_audio(self.text, language=lang)
29
  auto_play(audio)
30
- else:
31
  text = f"Text you provide is {type(self.text)} accepted only string type"
32
- audio = text_to_audio(text, language=language)
33
  auto_play(audio)
34
  else:
35
  raise Exception("Text is None")
36
 
37
- def get_audio(self):
38
  try:
39
  synth = self.output_model["audio"][0]
40
 
@@ -42,7 +43,7 @@ class T2A:
42
 
43
  with BytesIO() as buffer:
44
  sf.write(buffer, synth, SAMPLING_RATE, format='wav')
45
- output = buffer.getvalue() # bytes
46
 
47
  print(f"type : {type(output)}")
48
 
@@ -52,4 +53,4 @@ class T2A:
52
 
53
  return output, SAMPLING_RATE, duration
54
  except Exception as e:
55
- logging.error(e)
 
1
  import logging
2
 
3
+ from io import BytesIO
4
+ from typing import Optional
5
+
6
  import librosa
7
  import soundfile as sf
8
+ from streamlit_TTS import auto_play, text_to_audio
9
 
 
10
  from .config import pipe_tts
11
 
12
+ SAMPLING_RATE = 16_000
13
 
 
14
 
15
  class T2A:
16
+ def __init__(self, input_text: Optional[str] = None):
17
  self.text = input_text
18
  self.output_model = pipe_tts(input_text)
19
 
20
+ def __get_duration(self, raw: bytes) -> float:
21
  chunk = BytesIO(raw)
22
  audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
23
  duration = librosa.get_duration(y=audio, sr=sample_rate)
 
28
  if isinstance(self.text, str):
29
  audio = text_to_audio(self.text, language=lang)
30
  auto_play(audio)
31
+ else: # more checking
32
  text = f"Text you provide is {type(self.text)} accepted only string type"
33
+ audio = text_to_audio(text, language=lang)
34
  auto_play(audio)
35
  else:
36
  raise Exception("Text is None")
37
 
38
+ def get_audio(self) -> tuple[bytes, int, float]:
39
  try:
40
  synth = self.output_model["audio"][0]
41
 
 
43
 
44
  with BytesIO() as buffer:
45
  sf.write(buffer, synth, SAMPLING_RATE, format='wav')
46
+ output = buffer.getvalue() # bytes
47
 
48
  print(f"type : {type(output)}")
49
 
 
53
 
54
  return output, SAMPLING_RATE, duration
55
  except Exception as e:
56
+ logging.error(e)
audio_processing/config.py CHANGED
@@ -1,9 +1,10 @@
 
 
 
1
  import torch
2
 
3
  from transformers import pipeline
4
 
5
- # ArticMonkey:19.03.24:1700 example of version name in plaintext will be convert into hex using this site -> https://magictool.ai/tool/text-to-hex-converter/
6
- # Here ArticMonkey is name of version and rest of all is data and time
7
 
8
  device = 0 if torch.cuda.is_available() else "cpu"
9
 
 
1
+ # ArcticMonkey:19.03.24:1700 example of version name in plaintext will be converted into hex using this site ->
2
+ # https://magictool.ai/tool/text-to-hex-converter/ Here ArcticMonkey is name of version and rest of all is data and time
3
+
4
  import torch
5
 
6
  from transformers import pipeline
7
 
 
 
8
 
9
  device = 0 if torch.cuda.is_available() else "cpu"
10
 
command/basic/documentation.py CHANGED
@@ -1,15 +1,15 @@
1
- import os
2
  import logging
3
 
4
- import streamlit as st
5
- import core.form_documentation as fd
6
 
7
  from typing import Optional
8
  from ..command_handler import CommandHandler
9
 
10
 
11
  class DocumentationHandler(CommandHandler):
12
- def __init__(self, commands, successor: Optional["CommandHandler"] = None):
13
  super().__init__(successor)
14
  self.commands = commands
15
 
@@ -20,13 +20,12 @@ class DocumentationHandler(CommandHandler):
20
  super().handle_command(command)
21
 
22
  def execute_command(self):
23
- current_dir = os.path.dirname(os.path.realpath(__file__))
24
- path_to_file = fd.generate_doc(path=current_dir)
25
 
26
  if path_to_file is not None:
27
- with st.sidebar:
28
- st.download_button('Download doc', path_to_file)
29
- st.success("Done", icon="✅")
30
  else:
31
  logging.error("path_to_file is None")
32
-
 
1
+ import os
2
  import logging
3
 
4
+ import streamlit as st
5
+ from ..utils.form_documentation import generate_doc
6
 
7
  from typing import Optional
8
  from ..command_handler import CommandHandler
9
 
10
 
11
  class DocumentationHandler(CommandHandler):
12
+ def __init__(self, commands, successor: Optional["CommandHandler"] = None):
13
  super().__init__(successor)
14
  self.commands = commands
15
 
 
20
  super().handle_command(command)
21
 
22
  def execute_command(self):
23
+ current_dir = os.path.dirname(os.path.realpath(__file__))
24
+ path_to_file = generate_doc(path=current_dir)
25
 
26
  if path_to_file is not None:
27
+ with st.sidebar:
28
+ st.download_button('Download doc', path_to_file)
29
+ st.success("Done", icon="✅")
30
  else:
31
  logging.error("path_to_file is None")
 
command/command_handler.py CHANGED
@@ -1,6 +1,7 @@
1
  from typing import Optional
2
  from abc import ABC, abstractmethod
3
 
 
4
  class CommandHandler(ABC):
5
  def __init__(self, successor: Optional["CommandHandler"] = None):
6
  self.successor = successor
@@ -11,4 +12,4 @@ class CommandHandler(ABC):
11
 
12
  @abstractmethod
13
  def execute_command(self):
14
- """Method of processing command execution logic"""
 
1
  from typing import Optional
2
  from abc import ABC, abstractmethod
3
 
4
+
5
  class CommandHandler(ABC):
6
  def __init__(self, successor: Optional["CommandHandler"] = None):
7
  self.successor = successor
 
12
 
13
  @abstractmethod
14
  def execute_command(self):
15
+ """Method of processing command execution logic"""
command/utils/build_chain.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..basic.documentation import DocumentationHandler
2
+ from ..utils.read_yaml import load_commands_from_yaml
3
+
4
+
5
+ def build_command_chain():
6
+ commands = load_commands_from_yaml(file_path="../commands.yaml")
7
+ handlers = None
8
+ for cmd_data in commands:
9
+ command_dict = cmd_data.get('command_dict', {})
10
+ commands_list = [cmd.lower() for cmds in command_dict.values() for cmd in cmds]
11
+ description = cmd_data.get('description', '')
12
+
13
+ if description.lower() == "download text file with all commands":
14
+ handler = DocumentationHandler(commands_list, handlers)
15
+ else:
16
+ handler = None
17
+
18
+ handlers = handler
19
+ return handlers
command/{core → utils}/form_documentation.py RENAMED
@@ -19,23 +19,22 @@ The list of commands and their use.
19
  Command documentation: the user downloads a text file that describes the commands in detail.
20
  The information on how to use the very first and main command will be announced during the first use.
21
  The following list of voice commands in English is available for calling the command: "Download commands", "download commands", "load commands", "boot commands", "install commands", "commands", "load", "boot", "download", "install".
22
- In Ukrainian: "Завантажити команди", "Скачати команди", "Загрузити команди", "Переглянути команди", "Завантажити", "Скачати", "Загрузити", "Переглянути", "команди".
23
  """
24
 
25
- def generate_doc(path: Optional[str] = None) -> Optional[str]:
26
-
27
  if path is not None:
28
- file = os.path.join(path, NAME)
29
  else:
30
- current_dir = os.path.dirname(os.path.realpath(__file__))
31
  file = os.path.join(current_dir, NAME)
32
 
33
  logging.info(file)
34
-
35
  try:
36
  with open(file, 'w') as f:
37
  f.write(TEMPLATE)
38
  return file
39
  except IOError as e:
40
  logging.error(e)
41
-
 
19
  Command documentation: the user downloads a text file that describes the commands in detail.
20
  The information on how to use the very first and main command will be announced during the first use.
21
  The following list of voice commands in English is available for calling the command: "Download commands", "download commands", "load commands", "boot commands", "install commands", "commands", "load", "boot", "download", "install".
22
+ In Ukrainian: "Завантаж команди", "Скачай команди", "Загрузи команди", "Завантаж", "Скачай", "Загрузи", "команди".
23
  """
24
 
25
+
26
+ def generate_doc(path: Optional[str] = None) -> Optional[str]:
27
  if path is not None:
28
+ file = os.path.join(path, NAME)
29
  else:
30
+ current_dir = os.path.dirname(os.path.realpath(__file__))
31
  file = os.path.join(current_dir, NAME)
32
 
33
  logging.info(file)
34
+
35
  try:
36
  with open(file, 'w') as f:
37
  f.write(TEMPLATE)
38
  return file
39
  except IOError as e:
40
  logging.error(e)
 
command/utils/read_yaml.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import logging
3
+ from typing import Optional, Any
4
+
5
+
6
+ def load_commands_from_yaml(file_path: Optional[str] = None) -> Any:
7
+ try:
8
+ with open(file_path, 'r') as file:
9
+ commands_data = yaml.safe_load(file)
10
+ return commands_data.get('commands', [])
11
+ except IOError as e:
12
+ logging.error(e)
llm/config.py CHANGED
@@ -1 +1,34 @@
1
- config = {"model": "mistralai/Mixtral-8x7B-Instruct-v0.1", "temperature": 0.1, "max_new_tokens": 1024, "top_k": 5, "load_in_8bit": True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config = {
2
+ "HF_Mistrail": {
3
+ "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
4
+ "temperature": 0.1,
5
+ "max_new_tokens": 1024,
6
+ "top_k": 5,
7
+ "load_in_8bit": True
8
+ },
9
+ "HF_TinyLlama": {
10
+ "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
11
+ "temperature": 0.1,
12
+ "max_new_tokens": 1024,
13
+ "top_k": 5,
14
+ "top_p":0.95,
15
+ "load_in_8bit": True,
16
+ "do_sample": True
17
+ },
18
+ "LC_TinyLlama-1.1B-Chat-v1.0-GGUF": {
19
+ "model_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
20
+ "model_name": "tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin",
21
+ "temperature": 0.4,
22
+ "max_tokens": 868,
23
+ "top_p": 0.8,
24
+ "top_k": 5,
25
+ },
26
+ "LC_Phi-3-mini-4k-instruct-gguf": {
27
+ "model_url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
28
+ "model_name": "Phi-3-mini-4k-instruct-gguf.bin",
29
+ "temperature": 0.4,
30
+ "max_tokens": 868,
31
+ "top_p": 0.8,
32
+ "top_k": 5,
33
+ }
34
+ }
llm/huggingfacehub/hf_model.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import yaml
4
+
5
+ from abc import ABC
6
+
7
+ from llm.llm_interface import LLMInterface
8
+ from llm.config import config
9
+
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.chains import LLMChain
12
+ from langchain.llms import HuggingFaceEndpoint
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ logger.setLevel(logging.CRITICAL) # because if something went wrong in execution application can't be work anymore
17
+
18
+ file_handler = logging.FileHandler(
19
+ "logs/chelsea_llm_huggingfacehub.log") # for all modules here template for logs file is "../logs/chelsea_{module_name}_{dir_name}.log"
20
+ logger.setLevel(logging.INFO) # informed
21
+
22
+ formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
23
+ file_handler.setFormatter(formatted)
24
+
25
+ logger.addHandler(file_handler)
26
+
27
+ logger.info("Getting information from hf_model module")
28
+
29
+
30
+ class HF_Mistaril(LLMInterface, ABC):
31
+ def __init__(self, prompt_entity: str, prompt_id: int = 0):
32
+ self.prompt_entity = prompt_entity
33
+ self.prompt_id = prompt_id
34
+
35
+ self.model_config = config["HF_Mistrail"]
36
+
37
+ self.llm = HuggingFaceEndpoint(
38
+ repo_id=self.model_config["model"],
39
+ model_kwargs={"temperature": self.model_config["temperature"],
40
+ "max_new_tokens": self.model_config["max_new_tokens"],
41
+ "top_k": self.model_config["top_k"], "load_in_8bit": self.model_config["load_in_8bit"]})
42
+
43
+ @staticmethod
44
+ def __read_yaml():
45
+ try:
46
+ yaml_file = os.path.join("../", 'prompts.yaml')
47
+ with open(yaml_file, 'r') as file:
48
+ data = yaml.safe_load(file)
49
+ return data
50
+ except Exception as e:
51
+ logger.error(msg="Execution filed", exc_info=e)
52
+
53
+ def execution(self):
54
+ try:
55
+ data = self.__read_yaml()
56
+ prompts = data["prompts"][
57
+ self.prompt_id] #get second prompt from yaml, need change id parameter to get other prompt
58
+ template = prompts["prompt_template"]
59
+ prompt = PromptTemplate(template=template, input_variables=["entity"])
60
+ llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
61
+ output = llm_chain.invoke(self.prompt_entity)
62
+ return output["text"]
63
+ except Exception as e:
64
+ logger.critical(msg="Execution filed", exc_info=e)
65
+
66
+ def __str__(self):
67
+ return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
68
+
69
+ def __repr__(self):
70
+ return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
71
+
72
+
73
+ class HF_TinyLlama(LLMInterface, ABC):
74
+ def __init__(self, prompt_entity: str, prompt_id: int = 0):
75
+ self.prompt_entity = prompt_entity
76
+ self.prompt_id = prompt_id
77
+
78
+ self.model_config = config["HF_TinyLlama"]
79
+
80
+ self.llm = HuggingFaceEndpoint(
81
+ repo_id=self.model_config["model"],
82
+ model_kwargs={"temperature": self.model_config["temperature"],
83
+ "max_new_tokens": self.model_config["max_new_tokens"],
84
+ "top_k": self.model_config["top_k"], "load_in_8bit": self.model_config["load_in_8bit"]})
85
+
86
+ @staticmethod
87
+ def __read_yaml():
88
+ try:
89
+ yaml_file = os.path.join("../", 'prompts.yaml')
90
+ with open(yaml_file, 'r') as file:
91
+ data = yaml.safe_load(file)
92
+ return data
93
+ except Exception as e:
94
+ logger.error(msg="Execution filed", exc_info=e)
95
+
96
+ def execution(self):
97
+ try:
98
+ data = self.__read_yaml()
99
+ prompts = data["prompts"][
100
+ self.prompt_id] #get second prompt from yaml, need change id parameter to get other prompt
101
+ template = prompts["prompt_template"]
102
+ prompt = PromptTemplate(template=template, input_variables=["entity"])
103
+ llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
104
+ output = llm_chain.invoke(self.prompt_entity)
105
+ return output["text"]
106
+ except Exception as e:
107
+ logger.critical(msg="Execution filed", exc_info=e)
108
+
109
+ def __str__(self):
110
+ return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
111
+
112
+ def __repr__(self):
113
+ return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
llm/llamacpp/lc_model.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from abc import ABC
4
+
5
+ import requests
6
+ import yaml
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain_community.llms import LlamaCpp
9
+
10
+ from llm.config import config
11
+ from llm.llm_interface import LLMInterface
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ logger.setLevel(logging.CRITICAL) # because if something went wrong in execution application can't be work anymore
16
+
17
+ file_handler = logging.FileHandler(
18
+ "logs/chelsea_llm_llamacpp.log") # for all modules template for logs file is "logs/chelsea_{module_name}_{dir_name}.log"
19
+ logger.setLevel(logging.INFO) # informed
20
+
21
+ formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
22
+ file_handler.setFormatter(formatted)
23
+
24
+ logger.addHandler(file_handler)
25
+
26
+
27
+ class LC_TinyLlama(LLMInterface, ABC):
28
+ def __init__(self, prompt_entity: str, prompt_id: int = 0):
29
+ self.prompt_entity = prompt_entity
30
+ self.prompt_id = prompt_id
31
+
32
+ self.model_config = config["LC_TinyLlama-1.1B-Chat-v1.0-GGUF"]
33
+
34
+ try:
35
+ get_file = requests.get(self.model_config["model_url"])
36
+ if get_file.status_code == 200:
37
+ path_to_model = os.path.join("../models", self.model_config["model_name"])
38
+ with open(path_to_model, "wb") as f:
39
+ f.write(get_file.content)
40
+ logger.info("Model file successfully recorded")
41
+ f.close()
42
+ except OSError as e:
43
+ logger.error(msg="Error while write a file to directory", exc_info=e)
44
+
45
+ @staticmethod
46
+ def __read_yaml():
47
+ try:
48
+ yaml_file = os.path.join("../", 'prompts.yaml')
49
+ with open(yaml_file, 'r') as file:
50
+ data = yaml.safe_load(file)
51
+ return data
52
+ except Exception as e:
53
+ logger.error(msg="Execution filed", exc_info=e)
54
+
55
+ def execution(self):
56
+ try:
57
+ data = self.__read_yaml()
58
+ prompts = data["prompts"][
59
+ self.prompt_id] # get second prompt from yaml, need change id parameter to get other prompt
60
+ template = prompts["prompt_template"]
61
+ prompt = PromptTemplate(template=template, input_variables=["entity"])
62
+
63
+ llm = LlamaCpp(
64
+ model_path=os.path.join("../models", self.model_config["model_name"]),
65
+ temperature=self.model_config["temperature"],
66
+ max_tokens=self.model_config["max_tokens"],
67
+ top_p=self.model_config["top_p"],
68
+ top_k=self.model_config["top_k"],
69
+ # callback_manager=callback_manager,
70
+ verbose=True, # Verbose is required to pass to the callback manager
71
+ )
72
+
73
+ logger.info(f"Check llm : {llm}")
74
+
75
+ llm_chain = prompt | llm
76
+ output = llm_chain.invoke({"question": self.prompt_entity})
77
+ return output
78
+ except Exception as e:
79
+ logger.critical(msg="Execution filed", exc_info=e)
80
+
81
+ def clear_llm(self, unused_model_dict, current_lc):
82
+ # If unused_model_dict is not empty
83
+ if len(unused_model_dict) > 1 and unused_model_dict is not None:
84
+ # go through key and value
85
+ for key, value in zip(unused_model_dict.keys(), unused_model_dict.values()):
86
+ # check if path is existing and key is not current using model
87
+ if os.path.exists(value) and key != current_lc:
88
+ # delete files from models directory except of current_lc
89
+ os.remove(value)
90
+ logger.info(f"Successfully deleted file {value}")
91
+ else:
92
+ logger.info(f"Unfortunately dictionary empty or None")
93
+
94
+ def get_unused(self, current_lc):
95
+ models_dir = "../models"
96
+
97
+ if len(os.listdir(models_dir)) > 1:
98
+ file_names = [os.path.basename(md) for md in os.listdir(models_dir)]
99
+ for item in file_names:
100
+ if item != current_lc:
101
+ unused_model_file = os.path.join(models_dir, item)
102
+ return {item: unused_model_file}
103
+ else:
104
+ return None
105
+
106
+ def __str__(self):
107
+ return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
108
+
109
+ def __repr__(self):
110
+ return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
111
+
112
+
113
+ class LC_Phi3(LLMInterface, ABC):
114
+ def __init__(self, prompt_entity: str, prompt_id: int = 0):
115
+ self.prompt_entity = prompt_entity
116
+ self.prompt_id = prompt_id
117
+
118
+ self.model_config = config["LC_Phi-3-mini-4k-instruct-gguf"]
119
+
120
+ try:
121
+ get_file = requests.get(self.model_config["model_url"])
122
+ if get_file.status_code == 200:
123
+ path_to_model = os.path.join("../models", self.model_config["model_name"])
124
+ with open(path_to_model, "wb") as f:
125
+ f.write(get_file.content)
126
+ logger.info("Model file successfully recorded")
127
+ f.close()
128
+ except OSError as e:
129
+ logger.error(msg="Error while write a file to directory", exc_info=e)
130
+
131
+ @staticmethod
132
+ def __read_yaml():
133
+ try:
134
+ yaml_file = os.path.join("../", 'prompts.yaml')
135
+ with open(yaml_file, 'r') as file:
136
+ data = yaml.safe_load(file)
137
+ return data
138
+ except Exception as e:
139
+ logger.error(msg="Execution filed", exc_info=e)
140
+
141
+ def execution(self):
142
+ try:
143
+ data = self.__read_yaml()
144
+ prompts = data["prompts"][
145
+ self.prompt_id] # get second prompt from yaml, need change id parameter to get other prompt
146
+ template = prompts["prompt_template"]
147
+ prompt = PromptTemplate(template=template, input_variables=["entity"])
148
+
149
+ llm = LlamaCpp(
150
+ model_path=os.path.join("../models", self.model_config["model_name"]),
151
+ temperature=self.model_config["temperature"],
152
+ max_tokens=self.model_config["max_tokens"],
153
+ top_p=self.model_config["top_p"],
154
+ top_k=self.model_config["top_k"],
155
+ # callback_manager=callback_manager,
156
+ verbose=True, # Verbose is required to pass to the callback manager
157
+ )
158
+
159
+ logger.info(f"Check llm : {llm}")
160
+
161
+ llm_chain = prompt | llm
162
+ output = llm_chain.invoke({"question": self.prompt_entity})
163
+ return output
164
+ except Exception as e:
165
+ logger.critical(msg="Execution filed", exc_info=e)
166
+
167
+ def clear_llm(self, unused_model_dict, current_lc):
168
+ # If unused_model_dict is not empty
169
+ if len(unused_model_dict) > 1:
170
+ # go through key and value
171
+ for key, value in zip(unused_model_dict.keys(), unused_model_dict.values()):
172
+ # check if path is existing and key is not current using model
173
+ if os.path.exists(value) and key != current_lc:
174
+ # delete files from models directory except of current_lc
175
+ os.remove(value)
176
+ logger.info(f"Successfully deleted file {value}")
177
+ else:
178
+ logger.info(f"Unfortunately dictionary empty")
179
+
180
+ def get_unused(self, current_lc):
181
+ models_dir = "../models"
182
+
183
+ if len(os.listdir(models_dir)) > 1:
184
+ file_names = [os.path.basename(md) for md in os.listdir(models_dir)]
185
+ for item in file_names:
186
+ if item != current_lc:
187
+ unused_model_file = os.path.join(models_dir, item)
188
+ return {item: unused_model_file}
189
+ else:
190
+ return None
191
+
192
+ def __str__(self):
193
+ return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
194
+
195
+ def __repr__(self):
196
+ return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
llm/llm.py CHANGED
@@ -1,31 +1,29 @@
1
- # {'prompts': [
2
- # {'id': 1, 'prompt_template': 'Question: {question}\nAnswer: \n', 'description': 'simple question without a prompt', 'rate': 1},
3
- # {'id': 2, 'prompt_template': "Question: {question}\nAnswer: Write a concise answer on the question with
4
- # one example if it's possible. CONCISE ANSWER.\n", 'description': 'simple concise prompt', 'rate': 3}]}
5
-
6
-
7
  import os
8
 
9
  import yaml
10
  import logging
11
 
12
  from langchain.prompts import PromptTemplate
13
- from langchain.chains import LLMChain
14
- from langchain.llms import HuggingFaceHub
15
- from .config import config
 
 
16
 
17
  class LLM_chain:
18
  def __init__(self):
19
- self.llm = HuggingFaceHub(
20
- repo_id=config["model"],
21
- model_kwargs={"temperature": config["temperature"], "max_new_tokens": config["max_new_tokens"], "top_k": config["top_k"], "load_in_8bit": config["load_in_8bit"]})
 
22
 
23
- def __read_yaml(self):
 
24
  try:
25
- # get current dir
26
  current_dir = os.path.dirname(os.path.realpath(__file__))
27
  yaml_file = os.path.join(current_dir, 'prompts.yaml')
28
-
29
  with open(yaml_file, 'r') as file:
30
  data = yaml.safe_load(file)
31
  return data
@@ -35,11 +33,11 @@ class LLM_chain:
35
  def __call__(self, entity: str, id: int = 0):
36
  try:
37
  data = self.__read_yaml()
38
- prompts = data["prompts"][id] # get second prompt from yaml, need change id parameter to get other prompt
39
  template = prompts["prompt_template"]
40
  prompt = PromptTemplate(template=template, input_variables=["entity"])
41
  llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
42
  output = llm_chain.invoke(entity)
43
  return output["text"]
44
  except Exception as e:
45
- logging.error(e)
 
 
 
 
 
 
 
1
  import os
2
 
3
  import yaml
4
  import logging
5
 
6
  from langchain.prompts import PromptTemplate
7
+ from langchain.chains import LLMChain
8
+ from langchain.llms import HuggingFaceEndpoint
9
+ from .config import config
10
+
11
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
12
 
13
  class LLM_chain:
14
  def __init__(self):
15
+ self.llm = HuggingFaceEndpoint(
16
+ repo_id=config["model"],
17
+ model_kwargs={"temperature": config["temperature"], "max_new_tokens": config["max_new_tokens"],
18
+ "top_k": config["top_k"], "load_in_8bit": config["load_in_8bit"]})
19
 
20
+ @staticmethod
21
+ def __read_yaml():
22
  try:
23
+ # get current dir
24
  current_dir = os.path.dirname(os.path.realpath(__file__))
25
  yaml_file = os.path.join(current_dir, 'prompts.yaml')
26
+
27
  with open(yaml_file, 'r') as file:
28
  data = yaml.safe_load(file)
29
  return data
 
33
  def __call__(self, entity: str, id: int = 0):
34
  try:
35
  data = self.__read_yaml()
36
+ prompts = data["prompts"][id] # get second prompt from yaml, need change id parameter to get other prompt
37
  template = prompts["prompt_template"]
38
  prompt = PromptTemplate(template=template, input_variables=["entity"])
39
  llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
40
  output = llm_chain.invoke(entity)
41
  return output["text"]
42
  except Exception as e:
43
+ logging.error(e)
llm/llm_factory.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+ from .huggingfacehub.hf_model import HF_Mistaril, HF_TinyLlama
4
+ from .llamacpp.lc_model import LC_TinyLlama, LC_Phi3
5
+
6
+
7
+ class LLM_Factory:
8
+
9
+ # trigger = {"model_type": "execution_type"} -> {"hf": "small"}
10
+ @staticmethod
11
+ def create_llm(prompt_entity: str, prompt_id: int, trigger: Dict[str, str]):
12
+
13
+ if trigger.keys() == "hf" and trigger.values() == "effective":
14
+ model = HF_Mistaril(prompt_entity=prompt_entity, prompt_id=prompt_id)
15
+ elif trigger.keys() == "hf" and trigger.values() == "small":
16
+ model = HF_TinyLlama(prompt_entity=prompt_entity, prompt_id=prompt_id)
17
+ elif trigger.keys() == "lc" and trigger.values() == "effective":
18
+ model = LC_Phi3(prompt_entity=prompt_entity, prompt_id=prompt_id)
19
+ elif trigger.keys() == "lc" and trigger.values() == "small":
20
+ model = LC_TinyLlama(prompt_entity=prompt_entity, prompt_id=prompt_id)
21
+ else:
22
+ model = None
23
+
24
+ return model
llm/llm_interface.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Optional
3
+
4
+
5
+ class LLMInterface(ABC):
6
+ @abstractmethod
7
+ def execution(self):
8
+ """Method execution LLM model based on HuggingFace or Langchain"""
9
+ pass
10
+
11
+ # {"model_name": "./models/model_name"}
12
+ @abstractmethod
13
+ def clear_llm(self, unused_model_dict: Optional[Dict[str, str]], current_lc: str) -> None:
14
+ """Method clear unused LLM"""
15
+ pass
16
+
17
+ @abstractmethod
18
+ def get_unused(self, current_lc: str) -> Optional[Dict[str, str]]:
19
+ """Method getting LLM that unused and forming corresponding dict"""
20
+ pass
llm/utils/toggle.py ADDED
File without changes
packages.txt DELETED
@@ -1 +0,0 @@
1
- ffmpeg
 
 
requirements.txt CHANGED
@@ -1,30 +1,35 @@
1
- psutil
 
 
2
 
3
  # related to stremalit
4
- streamlit-mic-recorder==0.0.4
5
  streamlit-TTS
6
- streamlit
7
 
8
  # related to streamlit-TTS
9
  python-dotenv
10
  gTTS
11
  pydub
12
  openai
 
 
13
 
14
  # related to huggingface
15
- torch
16
- transformers
17
  accelerate
18
  sentence-transformers
19
  bitsandbytes
 
20
 
21
  # related to langchain
22
- langchain
23
  pypdf
24
  chromadb
25
 
26
  # related to audio
27
  librosa==0.10.1
28
- soundfile
29
-
30
- einops
 
1
+ # other
2
+ psutil~=5.9.8
3
+ PyYAML~=6.0.1
4
 
5
  # related to stremalit
6
+ streamlit-mic-recorder==0.0.8
7
  streamlit-TTS
8
+ streamlit~=1.33.0
9
 
10
  # related to streamlit-TTS
11
  python-dotenv
12
  gTTS
13
  pydub
14
  openai
15
+ ffprobe
16
+ ffmpeg
17
 
18
  # related to huggingface
19
+ torch~=2.2.2
20
+ transformers~=4.40.1
21
  accelerate
22
  sentence-transformers
23
  bitsandbytes
24
+ einops
25
 
26
  # related to langchain
27
+ langchain~=0.1.17
28
  pypdf
29
  chromadb
30
 
31
  # related to audio
32
  librosa==0.10.1
33
+ soundfile~=0.12.1
34
+ numpy~=1.26.4
35
+ streamlit_TTS~=0.0.7