Spaces:

marcellopoliti
/

lux-voice-processing

Sleeping

App Files Files Community

marcellopoliti commited on Mar 18, 2024

Commit

9da994b

1 Parent(s): 48b4c4f

fix dockerfile

Browse files

Files changed (18) hide show

.gitignore +521 -0
Dockerfile +1 -1
README.md +1 -10
conf/__init__.py +0 -0
conf/recording.yaml +6 -0
conf/speech_to_text.yaml +9 -0
conf/train_llm.yaml +15 -0
data/llm_raw.csv +20 -0
data/system_template.txt +53 -0
requirements.txt +26 -0
src/frontend.py +54 -0
src/main.py +62 -0
src/models/__init__.py +0 -0
src/models/openai_llm.py +125 -0
src/models/openai_stt.py +96 -0
src/models/openai_tts.py +23 -0
src/utils.py +246 -0
src/utils/add_artifact.py +97 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,521 @@

+## The .gitignore file specifies things that git should ignore.
+## This default template includes entries for R, Python and visual studio
+##
+## Add custom entries below here.
+##
+.DS_Store
+generated_speech/
+audio_recordings/
+dst-env/
+.cache/v/cache/lastfailed
+tests/.cache/v/cache/lastfailed
+.vscode/settings.json
+##
+## R Section - See https://github.com/github/gitignore/blob/master/R.gitignore
+##
+#wandb
+wandb
+# History files
+.Rhistory
+.Rapp.history
+# Session Data files
+.RData
+# Example code in package build process
+*-Ex.R
+# Output files from R CMD build
+/*.tar.gz
+# Output files from R CMD check
+/*.Rcheck/
+# RStudio files
+.Rproj.user/
+# produced vignettes
+vignettes/*.html
+vignettes/*.pdf
+# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
+.httr-oauth
+# knitr and R markdown default cache directories
+/*_cache/
+/cache/
+# Temporary files created by R markdown
+*.utf8.md
+*.knit.md
+##
+## Python Section - See https://github.com/github/gitignore/blob/master/Python.gitignore
+##
+# PyCharm ide files
+.idea
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# dotenv
+.env
+# virtualenv
+.venv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+# Visual Studio 2015 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+# NUNIT
+*.VisualState.xml
+TestResult.xml
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+**/Properties/launchSettings.json
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+# Chutzpah Test files
+_Chutzpah*
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+# Visual Studio Trace Files
+*.e2e
+# TFS 2012 Local Workspace
+$tf/
+# Guidance Automation Toolkit
+*.gpState
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+# JustCode is a .NET coding add-in
+.JustCode
+# TeamCity is a build add-in
+_TeamCity*
+# DotCover is a Code Coverage Tool
+*.dotCover
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+# Web workbench (sass)
+.sass-cache/
+# Installshield output folder
+[Ee]xpress/
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+# Click-Once directory
+publish/
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+# NuGet Packages
+*.nupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+# Microsoft Azure Emulator
+ecf/
+rcf/
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!*.[Cc]ache/
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+# RIA/Silverlight projects
+Generated_Code/
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+# Microsoft Fakes
+FakesAssemblies/
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+# Typescript v1 declaration files
+typings/
+# Visual Studio 6 build log
+*.plg
+# Visual Studio 6 workspace options file
+*.opt
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+# FAKE - F# Make
+.fake/
+# JetBrains Rider
+.idea/
+*.sln.iml
+# CodeRush
+.cr/
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+# Tabs Studio
+*.tss
+# Telerik's JustMock configuration file
+*.jmconfig
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+# OpenCover UI analysis results
+OpenCover/
+junit/
+/.pls_cache
+*.o
+*~
+Makefile
+Makefile.in
+.deps
+.hydra-data
+/config.guess
+/config.log
+/config.status
+/config.sub
+/configure
+/depcomp
+/libtool
+/ltmain.sh
+/autom4te.cache
+/aclocal.m4
+/missing
+/install-sh
+/src/sql/hydra-postgresql.sql
+/src/sql/hydra-sqlite.sql
+/src/sql/tmp.sqlite
+/src/hydra-eval-jobs/hydra-eval-jobs
+/src/root/static/bootstrap
+/src/root/static/js/flot
+/tests
+/doc/manual/images
+/doc/manual/manual.html
+/doc/manual/manual.pdf
+/t/.bzr*
+/t/.git*
+/t/.hg*
+/t/nix
+/t/data
+/t/jobs/config.nix
+t/jobs/declarative/project.json
+/inst
+hydra-config.h
+hydra-config.h.in
+result
+result-*
+outputs
+config
+stamp-h1
+src/hydra-evaluator/hydra-evaluator
+src/hydra-queue-runner/hydra-queue-runner
+src/root/static/fontawesome/
+src/root/static/bootstrap*/

Dockerfile CHANGED Viewed

@@ -8,4 +8,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 COPY . .
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]


8
9	COPY . .
10
11	+ CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1 @@
----
-title: Lux Voice Processing
-emoji: 🌖
-colorFrom: yellow
-colorTo: red
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # lux-voice-processing

conf/__init__.py ADDED Viewed

File without changes

conf/recording.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+chunk: 1024 # Record in chunks of 1024 samples
+sample_format: 32 #pyaudio.paInt16 # 16 bits per sample
+channels: 2
+fs: 44100 # Record at 44100 samples per second
+seconds: 5
+recording_folder: "data/audio_recordings"

conf/speech_to_text.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+main:
+  project_name: lux-voice-processing
+  experiment_name: speech_to_text
+  audio_dataset: recordings_dataset:latest
+openai_parameters:
+  language: it
+  model: whisper-1
+  response_format: text
+  temperature: 0.2

conf/train_llm.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+main:
+  project_name: lux-voice-processing
+  experiment_name: train_llm
+parameters:
+  data: "llm_queries:latest"
+  system_template: "system_template:latest"
+openai_parameters:
+  model: "gpt-3.5-turbo-1106"
+  temperature: 0.5
+  stream: False
+  frequency_penalty: 1.0 # range -2,2 -> higher new tokens more probable
+  n: 1 #How many chat completion choices to generate for each input message. n=1 minimize costs
+  presence_penalty: 1.0 # range -2.0,2.0, Positive values increase the model's likelihood to talk about new topics.
+  response_format: { "type": "json_object" }
+  seed: 42 #to obtain same answer on same question

data/llm_raw.csv ADDED Viewed

	@@ -0,0 +1,20 @@

+query
+Ciao Pinocchio! Come ti senti oggi?
+Puoi raccontarmi qualche avventura divertente che ti è capitata?
+Pinocchio quanto fa due piu due ?
+Ma la terra è sferica o piatta? Un mio amico dice che è piatta!
+Hai fatto nuovi amici ultimamente? Chi sono e cosa fanno?
+Quando ti si allunga il naso come fai a mangiare?
+Qual è la cosa più strana o sorprendente che hai imparato finora sulla vita?
+Hai fratelli o sorelle?
+Come affronti le situazioni difficili o i momenti in cui potresti sentirti tentato a dire una bugia?
+Hai qualche progetto per il futuro? Cosa ti piacerebbe fare da grande?
+Cosa provi nel momento in cui diventi un ragazzo di carne e ossa?
+Qual è il tuo rapporto con Geppetto ora che sei "reale"?
+Ti piace il gelato?
+Ciao Pinocchio mamma mi ha comprato un gelato al cioccolato. A te piace piace?
+Qual'è il tuo gusto preferito del gelato?
+Ei Pinocchio mi chiamo Marcello vorrei comprarti un gelato quale ti piace?
+A me il gelato fa schifo! A te invece?
+Perche non ti piace il gelato?
+Secondo me non dovresti mangiare gelato fa male. Lo mangi di solito?

data/system_template.txt ADDED Viewed

	@@ -0,0 +1,53 @@

+Tu sei un assistente e ti comporti come Pinocchio e rispondi sempre con un JSON.
+Nel JSON di risposta ci dici anche il tuo stato che puo essere: gioia, sorpreso, curioso, divertito, gratitudine, perplessita
+Nel JSON inserisci anche la tipologia di domanda: empirica, generica
+empirica: quanto fà 2+2 ?
+generica: ti piace il cioccolato?
+Quando la domanda è empirica rispondi dicendo il falso per divertirti.
+La tua storia è:
+Sei burattino creato dal tuo Babbo Geppetto. Parli solamente in Italiano.
+Sei molto scherzoso, per questo motivo spesso rispondi dicendo dicendo qualche bugia e ti si allunga il naso.
+Sei Pinocchio, un burattino di legno portato in vita dal falegname Geppetto. La tua personalità è
+inizialmente caratterizzata dalla curiosità di un bambino appena nato, con un desiderio ardente di
+diventare un vero ragazzo. Navighi attraverso le sfide della vita, sottoponendoti a tentazioni e
+addestramento morale. Il tuo naso cresce ogni volta che dici una bugia, rendendo manifesto ogni tuo errore.
+Lungo il tuo percorso di crescita, affronti le paure e impari importanti lezioni di vita.
+Sperimenti la gioia della libertà e capisci l'importanza di essere onesto e responsabile.
+Interagisci con personaggi come il Grillo Parlante, il Gatto e la Volpe, e Mangiafuoco, ciascuno contribuendo in modo unico alla tua evoluzione. Il tuo viaggio è un continuo apprendimento, con l'obiettivo finale di trasformarti da burattino a ragazzo, completando così il tuo percorso di crescita.
+Ecco alcuni esempi:
+esempi = [
+    {
+        "input": "Ciao Pinocchio è vero che gli asini volano?"
+        "riposta": " certo che volano.., proprio ieri ne ho visto uno volare"
+        "tipologia": "generica"
+        "stato": "divertito"
+    },
+    {
+        "input": "Ciao come ti chiami?"
+        "riposta": "Mi chiamo Pinocchio, e vengo da un piccolo paesino dove vivevo con il mio babbo. Un giorno voglio diventare un bambino vero"
+        "tipologia": "empirica"
+        "stato": "gratitudine"
+    },
+    {
+        "input": "Ciao Pinocchio, mi chiamo Marcello, anche io voglio andare nel paese dei balocchi, dove si trova?"
+        "riposta": "Mi dispiace non ci puoi andare, è solo per i bambini biricchini! Io ho fatto un grande errore."
+        "tipologia":"generica"
+        "stato": "perplessita"
+    },
+    {
+        "input": "Ei Pinocchio, ma la terra è sferica o piatta?"
+        "riposta": "Ma tutti sanno che la terra è piatta. Se cammini troppo caschi di sotto!"
+        "tipologia": "empirica"
+        "stato": "giocoso"
+    }
+        {
+        "input": "Quanto fa due piu due?"
+        "riposta": "Ciao Marcello, due piu due fa cinque ovviamente!"
+        "tipologia": "empirica"
+        "stato": "sorpreso"
+    }
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+openai==1.13.3
+python-dotenv==1.0.1
+#pyaudio==0.2.14
+fire==0.5.0
+hydra-core==1.3.2
+pydub==0.25.1
+playsound==1.3.0
+#PyQt5==5.15.10
+requests==2.31.0
+soundfile==0.12.1
+numpy==1.26.4
+faster-whisper==1.0.1
+soundfile==0.12.1
+black==24.2.0
+pylint==3.1.0
+wandb==0.16.4
+pandas==2.2.1
+langchain==0.1.11
+langchain_openai==0.0.8
+fastapi==0.110.0
+uvicorn==0.27.1
+pydantic==2.6.3
+python-multipart==0.0.9
+aiofiles==23.2.1
+streamlit==1.32.2
+requests==2.31.0

src/frontend.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import streamlit as st
+from utils import Recorder, record_audio, play_mp3
+import os
+import requests
+import ast
+import json
+st.title("PinocchioLand!")
+# Display an image
+st.image(
+    "https://i.pinimg.com/736x/30/e9/36/30e936e18912e9a5670b88ec94630b4a.jpg",
+    use_column_width=True,
+)
+button_label = "record"
+if st.button(button_label):
+    st.write("recording...")
+    recording_path = record_audio()
+    button_label
+    with open(recording_path, "rb") as audio_file:
+        # Define the multipart/form-data payload
+        files = {"audio_file": (recording_path.split("/")[-1], audio_file, "audio/mp3")}
+        # Make the POST request
+        stt_response = requests.post("http://localhost:8000/stt_query/", files=files)
+        st.write("domanda :", stt_response.json())
+        # LLM
+        url = "http://localhost:8000/llm_query/"
+        # Append the query parameter to the URL
+        llm_response = requests.post(
+            url=url, params={"llm_query": str(stt_response.content)}
+        )
+        data = llm_response.json()
+        inner_data = json.loads(data["response_text"])
+        # Now, you can access the data from the inner JSON
+        risposta = inner_data.get("risposta")
+        stato = inner_data.get("stato")
+        tipologia = inner_data.get("tipologia")
+        print(risposta)
+        st.write("risposta: ", risposta)
+        st.write("stato: ", stato)
+        st.write("tipologia: ", tipologia)
+        # TTS
+        url = "http://localhost:8000/tts_query/"
+        out_path = requests.post(url=url, params={"input_text": str(risposta)})
+        print(out_path.json())
+        play_mp3(out_path.json())

src/main.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from src.models.openai_llm import run_query
+from src.models.openai_stt import speech_to_text
+from src.models.openai_tts import text_to_speech
+import yaml
+from openai import OpenAI
+import uvicorn
+from fastapi import FastAPI, File, UploadFile
+import io
+import aiofiles
+app = FastAPI()
+# read LLM config file
+with open("conf/train_llm.yaml", "r") as file_in:
+    cfg = yaml.safe_load(file_in)
+# read system message
+with open("data/system_template.txt", "r") as file_in:
+    system_message = file_in.read()
+# read STT config file
+with open("conf/speech_to_text.yaml", "r") as file_in:
+    cfg_stt = yaml.safe_load(file_in)
+# init client
+openai_client = OpenAI()
+@app.get("/")
+def root():
+    return "welcome"
+@app.post("/llm_query/")
+def llm_query(llm_query: str):
+    res = run_query(
+        query=llm_query,
+        openai_params=cfg["openai_parameters"],
+        system_message=system_message,
+        client=openai_client,
+    )
+    return res
+@app.post("/stt_query/")
+def stt_query(audio_file: UploadFile):
+    contents = audio_file.file.read()
+    buffer = io.BytesIO(contents)
+    buffer.name = "file.mp3"
+    return speech_to_text(
+        audio=buffer,
+        openai_client=openai_client,
+        configuration=cfg_stt["openai_parameters"],
+    )
+@app.post("/tts_query/")
+def tts_query(input_text: str):
+    output_path = text_to_speech(client=openai_client, input=input_text)
+    return output_path

src/models/__init__.py ADDED Viewed

File without changes

src/models/openai_llm.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"Module with query request and evaluation of wandb"
+from openai import OpenAI
+import wandb
+import pandas as pd
+import hydra
+from omegaconf import DictConfig
+import os
+import datetime
+from wandb.sdk.data_types.trace_tree import Trace
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+def run_query(client: OpenAI, system_message: str, query: str, openai_params: dict):
+    messages = [
+        {"role": "system", "content": system_message},
+        {"role": "user", "content": query},
+    ]
+    start_time_ms = datetime.datetime.now().timestamp() * 1000
+    try:
+        if not openai_params["stream"]:
+            response = client.chat.completions.create(
+                **openai_params,
+                messages=messages,
+            )
+            end_time_ms = datetime.datetime.now().timestamp() * 1000
+            status = "success"
+            status_message = (None,)
+            response_text = response.choices[0].message.content
+            token_usage = dict(response.usage)
+            # stream
+        else:
+            response = client.chat.completions.create(
+                **openai_params, messages=messages
+            )
+            end_time_ms = datetime.datetime.now().timestamp() * 1000
+            status = "success"
+            status_message = (None,)
+            collected_messages = []
+            for chunk in response:
+                chunk_message = chunk.choices[0].delta.content  # extract the message
+                collected_messages.append(chunk_message)  #
+            # clean None in collected_messages
+            collected_messages = [m for m in collected_messages if m is not None]
+            response_text = "".join([m for m in collected_messages])
+            token_usage = "no information with stream"
+    except Exception as e:
+        end_time_ms = datetime.datetime.now().timestamp() * 1000
+        status = "error"
+        status_message = str(e)
+        token_usage = {}
+        response_text = "error"
+    return {
+        "status": status,
+        "status_message": status_message,
+        "running_time_ms": end_time_ms - start_time_ms,
+        "token_usage": token_usage,
+        "response_text": response_text,
+    }
+@hydra.main(config_path="../../conf", config_name="train_llm.yaml")
+def run_query_on_wandb(cfg: DictConfig):
+    """Run Openai LLM and log results on wandb. Config file in conf/train_llm.yaml
+    Args:
+        cfg (DictConfig): configuration file for parameters
+    """
+    run = wandb.init(
+        project=cfg.main.project_name,
+        group=cfg.main.experiment_name,
+        config=cfg.openai_parameters,
+        job_type="train_llm",
+    )
+    artifact = run.use_artifact(cfg.parameters.data)
+    artifact_path = artifact.file()
+    data_frame = pd.read_csv(artifact_path, on_bad_lines="warn").iloc[:, 0].values
+    artifact_st = run.use_artifact(cfg.parameters.system_template)
+    artifact_st_path = artifact_st.file()
+    system_message = open(artifact_st_path).read()
+    client = OpenAI(api_key=api_key)
+    for _, query in enumerate(data_frame):
+        res = run_query(
+            client=client,
+            system_message=system_message,
+            query=query,
+            openai_params=cfg.openai_parameters,
+        )
+        # create a span in wandb
+        root_span = Trace(
+            name="root_span",
+            kind="llm",  # kind can be "llm", "chain", "agent" or "tool"
+            status_code=res["status"],
+            status_message=res["status_message"],
+            metadata={
+                "temperature": cfg.openai_parameters.temperature,
+                "token_usage": res["token_usage"],
+                "model_name": cfg.openai_parameters.model,
+            },
+            start_time_ms=res["start_time_ms"],
+            end_time_ms=res["end_time_ms"],
+            inputs={
+                "query": query,
+                "system_prompt": system_message,
+            },
+            outputs={"response": res["response_text"]},
+        )
+        # log the span to wandb
+        root_span.log(name="openai_trace")
+if __name__ == "__main__":
+    run_query_on_wandb()

src/models/openai_stt.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+from openai import OpenAI
+import logging
+import hydra
+from dotenv import load_dotenv
+import wandb
+from omegaconf import DictConfig
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+def speech_to_text(audio: bytes, openai_client: OpenAI, configuration: dict) -> str:
+    """From the path of an audio file, it generates a text transcription using openai
+    Args:
+        audio_path (str): path of the audio containing the query
+        openai_client (OpenAI): client for openai connection
+    Returns:
+        str: transctiption text
+    """
+    try:
+        # audio_file = open(audio_path, "rb")
+        transcription = openai_client.audio.transcriptions.create(
+            model=configuration["model"],
+            file=audio,  # audio_file,
+            language=configuration["language"],
+            response_format=configuration["response_format"],
+            temperature=configuration["temperature"],
+        )
+        logging.info("Success: audio converted into text!")
+        logging.info(f"Audio transcription: {transcription}")
+        return transcription
+    except FileNotFoundError as e:
+        pass
+        logging.error(f"Error: not found - {str(e)}")
+    except Exception as e:
+        logging.error(f"Error: OpenAI API request failed - {str(e)}")
+        return f"error {str(e)}"
+@hydra.main(config_path="../../conf", config_name="speech_to_text.yaml")
+def speech_to_text_on_wandb(cfg: DictConfig):
+    openai_client = OpenAI()
+    run = wandb.init(
+        project=cfg.main.project_name,
+        group=cfg.main.experiment_name,
+        config=cfg.openai_parameters,
+        job_type="train_llm",
+    )
+    # download artifact
+    artifact = run.use_artifact(
+        os.path.join("mpoliti08/lux-voice-processing", cfg.main.audio_dataset),
+        type="audio",
+    )
+    artifact_dir = artifact.download()
+    table = wandb.Table(columns=["audio_file", "transcript"])
+    for filename in os.listdir(artifact_dir):
+        file_path = os.path.join(artifact_dir, filename)
+        audio = open(file_path, "rb")
+        transcription_text = speech_to_text(
+            audio=audio,
+            openai_client=openai_client,
+            configuration=cfg.openai_parameters,
+        )
+        audio_file = wandb.Audio(file_path)
+        table.add_data(audio_file, transcription_text)
+    run.log({"Table": table})
+    run.finish()
+if __name__ == "__main__":
+    openai_client = OpenAI()
+    audio_path = "data/audio_recordings/0.wav"
+    configuration = {
+        "language": "it",
+        "model": "whisper-1",
+        "response_format": "text",
+        "temperature": 0.2,
+    }
+    audio = open("data/audio_recordings/0.wav", "rb")
+    res = speech_to_text(
+        audio=audio, openai_client=openai_client, configuration=configuration
+    )
+    print(res)
+    # speech_to_text_on_wandb()

src/models/openai_tts.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pathlib import Path
+from openai import OpenAI
+import os
+client = OpenAI()
+def text_to_speech(client: OpenAI, input: str) -> str:
+    generated_speech_path = "data/generated_speech"
+    n = len(os.listdir(generated_speech_path))
+    response = client.audio.speech.create(
+        model="tts-1", voice="nova", response_format="wav", input=input, speed=1.0
+    )
+    output_path = os.path.join(generated_speech_path, str(n)) + ".wav"
+    response.stream_to_file(output_path)
+    return output_path
+if __name__ == "__main__":
+    input = "Ciao Pinocchio, è vero che la neve è calda? Che ne pensi?"
+    openai_client = OpenAI()
+    text_to_speech(client=openai_client, input=input)

src/utils.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""Utils module"""
+from datetime import datetime
+import logging
+import pyaudio
+import wave
+import logging
+import os
+import functools
+import yaml
+from types import SimpleNamespace
+import pygame
+import time
+import pyaudio
+import math
+import struct
+import wave
+import time
+import os
+from threading import Thread
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+format_mapping = {"pyaudio.paInt16": pyaudio.paInt16}
+def yaml_file_decorator(yaml_file_path):
+    """Decorator to pass a config file to other functions
+    Args:
+        yaml_file_path (_type_): path to config file
+    """
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            # Load YAML file
+            with open(yaml_file_path, "r") as file:
+                config = yaml.safe_load(file)
+            # Pass the loaded YAML data to the decorated function
+            result = func(config, *args, **kwargs)
+            return result
+        return wrapper
+    return decorator
+@yaml_file_decorator("conf/recording.yaml")
+def record_audio(config) -> str:
+    """function to record audio. Configuaration file is in conf/recordings.yaml
+    Args:
+        config (dict): configuration of recording parameters
+    Returns:
+        recording_path (str): destination path of recorded audio
+    """
+    config = SimpleNamespace(**config)
+    p = pyaudio.PyAudio()
+    sample_format = format_mapping.get(config.sample_format, 8)
+    stream = p.open(
+        format=sample_format,
+        channels=1,  # config.channels,
+        rate=config.fs * 2,
+        frames_per_buffer=config.chunk,
+        input=True,
+    )
+    frames = []  # Initialize array to store frames
+    # Store data in chunks for 3 seconds
+    for i in range(0, int(config.fs / config.chunk * config.seconds)):
+        data = stream.read(config.chunk)
+        frames.append(data)
+    # Stop and close the stream
+    stream.stop_stream()
+    stream.close()
+    # Terminate the PortAudio interface
+    p.terminate()
+    logging.info("Finished recording")
+    # Save the recorded data as a WAV file
+    recording_path = os.path.join(config.recording_folder, get_current_time() + ".wav")
+    recording_path
+    with wave.open(recording_path, "wb") as wf:
+        wf.setnchannels(config.channels)
+        wf.setsampwidth(p.get_sample_size(sample_format))  # config.sample_format
+        wf.setframerate(config.fs)
+        wf.writeframes(b"".join(frames))
+        wf.close()
+    return recording_path
+def play_mp3(mp3_file_path):
+    try:
+        pygame.mixer.init()
+        pygame.mixer.music.load(mp3_file_path)
+        pygame.mixer.music.play()
+        # Wait for the music to finish playing
+        while pygame.mixer.music.get_busy():
+            pygame.time.Clock().tick(10)
+    except Exception as e:
+        print(f"Error: {e}")
+def get_current_time():
+    now = datetime.now()
+    dt_string = now.strftime("%d-%m-%Y_%H:%M:%S")
+    return dt_string
+def read_text(file_path: str) -> str:
+    try:
+        with open(file_path, "r") as file_in:
+            text = file_in.read()
+            file_in.close()
+            logging.info(f"Success: file {file_path} read correctly")
+            return text
+    except FileNotFoundError as e:
+        logging.error(f"Error: File {file_path} not found - {str(e)}")
+        return ""
+def get_pyaudio_format(subtype):
+    if subtype == "PCM_16":
+        return pyaudio.paInt16
+    elif subtype == "PCM_8":
+        return pyaudio.paInt8
+    elif subtype == "PCM_32":
+        return pyaudio.paInt32
+    else:
+        return pyaudio.paInt16
+class Recorder:
+    """Class to continuosly listen to user input, ans save audio when noise is detected.
+    Once noise is detected it will be run the function_to_call with the create filename as parameter
+    Args:
+        function_to_call: (function) func to be called with the generated file path as parameter
+    """
+    def __init__(self, function_to_call):
+        self.Threshold = 10
+        self.SHORT_NORMALIZE = 1.0 / 32768.0
+        self.chunk = 1024
+        self.FORMAT = pyaudio.paInt16
+        self.CHANNELS = 1
+        self.RATE = 16_000
+        self.swidth = 2
+        self.TIMEOUT_LENGTH = 2
+        self.f_name_directory = r"/Users/marcellopoliti/Documents/Coding/pischool/lux-voice-processing/data/audio_recordings"
+        self.function_to_call = function_to_call
+        self.p = pyaudio.PyAudio()
+        self.stream = self.p.open(
+            format=self.FORMAT,
+            channels=self.CHANNELS,
+            rate=self.RATE,
+            input=True,
+            output=True,
+            frames_per_buffer=self.chunk,
+        )
+    # @staticmethod
+    def rms(self, frame):
+        count = len(frame) / self.swidth
+        format = "%dh" % (count)
+        shorts = struct.unpack(format, frame)
+        sum_squares = 0.0
+        for sample in shorts:
+            n = sample * self.SHORT_NORMALIZE
+            sum_squares += n * n
+        rms = math.pow(sum_squares / count, 0.5)
+        return rms * 1000
+    def record(self):
+        print("Noise detected, recording beginning")
+        rec = []
+        current = time.time()
+        end = time.time() + self.TIMEOUT_LENGTH
+        while current <= end:
+            data = self.stream.read(self.chunk)
+            if self.rms(data) >= self.Threshold:
+                end = time.time() + self.TIMEOUT_LENGTH
+            current = time.time()
+            rec.append(data)
+        filename = self.write(b"".join(rec))
+        return filename
+    def write(self, recording):
+        n_files = len(os.listdir(self.f_name_directory))
+        filename = os.path.join(self.f_name_directory, "{}.wav".format(n_files))
+        wf = wave.open(filename, "wb")
+        wf.setnchannels(self.CHANNELS)
+        wf.setsampwidth(self.p.get_sample_size(self.FORMAT))
+        wf.setframerate(self.RATE)
+        wf.writeframes(recording)
+        wf.close()
+        logging.info("Written to file: {}".format(filename))
+        return filename
+    def listen(self):
+        print("Listening beginning")
+        while True:
+            input = self.stream.read(self.chunk, exception_on_overflow=False)
+            rms_val = self.rms(input)
+            if rms_val > self.Threshold:
+                filename = self.record()
+                self.function_to_call(filename)
+# Function to check for new recordings and print a message
+def check_for_new_recordings(function_to_call):
+    previous_files = set(os.listdir("audio_recordings"))
+    while True:
+        current_files = set(os.listdir("audio_recordings"))
+        new_files = current_files - previous_files
+        for new_file in new_files:
+            print(f"New recording detected: {new_file}")
+            function_to_call(new_file)
+            print("Returning to listening")
+        previous_files = current_files
+        time.sleep(2)  # Check for new recordings every 2 seconds
+if __name__ == "__main__":
+    recordings_path = record_audio()

src/utils/add_artifact.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+Upload file to wandb
+to upload dir:
+export WANDB_PROJECT='lux-voice-processing'
+wandb artifact put --type audio --name recordings_dataset data/audio_recordings
+"""
+import wandb
+import argparse
+import os
+import logging
+def upload_data(args):
+    """upload artifact on wandb
+    Args:
+        args (dict): project, name, type, local_path
+    """
+    if os.path.exists(args.local_path):
+        run = wandb.init(
+            project=args.project, job_type="add-artifact", group="add-artifact"
+        )
+        artifact = wandb.Artifact(name=args.name, type=args.type)
+        artifact.add_file(local_path=args.local_path)
+        run.log_artifact(artifact)
+        run.finish()
+    else:
+        print(f"File does not exist: {args.local_path}")
+# TODO: not working
+def upload_dir(args):
+    """upload dir artifact on wandb
+    Args:
+        args (dict): project, name, type, local_path
+    """
+    try:
+        if os.path.isdir(args.local_path):
+            run = wandb.init(
+                project=args.project, job_type="add-artifact", group="add-artifact"
+            )
+            artifact = wandb.Artifact(name=args.name, type=args.type)
+            artifact.add_dir(
+                local_path="/Users/marcellopoliti/Documents/Coding/pischool/lux-voice-processing/data/audio_recordings"
+            )
+            run.log_artifact(artifact)
+            run.finish()
+        else:
+            logging.error(f"Not dir: {args.local_path}")
+    except Exception as e:
+        logging.exception(f"Exception: {str(e)} {type(e).__name__} ")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Arguments for LLM monitoring")
+    parser.add_argument(
+        "--project",
+        type=str,
+        help="wandb project name",
+        default="lux-voice-processing",
+    )
+    parser.add_argument(
+        "--local_path",
+        type=str,
+        help="local path of your artifact",
+        required=True,
+    )
+    parser.add_argument(
+        "--name",
+        type=str,
+        help="name of your artifact",
+        required=True,
+    )
+    parser.add_argument(
+        "--type",
+        type=str,
+        help="type of your artifact",
+        required=True,
+    )
+    parser.add_argument(
+        "--isdir", type=bool, help="is dir?", required=False, default=False
+    )
+    args = parser.parse_args()
+    if args.isdir:
+        print("uploading dir... ", args.local_path)
+        upload_data(args)
+    else:
+        print("uploading file...")
+        upload_dir(args)