{ "cells": [ { "cell_type": "markdown", "metadata": { "hideCode": false, "hideOutput": false, "hidePrompt": false, "id": "dT9AQwdf8sJK" }, "source": [ "# Digital Mentor\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "hideCode": false, "id": "Qgo-oaI3JU2u", "outputId": "94e8f443-9ef2-4460-a642-3f8229dc08cf" }, "outputs": [], "source": [ "import os\n", "from base64 import b64encode\n", "import time\n", "import torch\n", "import utils\n", "import api_utils\n", "from openai import OpenAI\n", "from moviepy.editor import VideoFileClip\n", "from IPython.display import display, HTML, clear_output\n", "import elevenlabs as elevlabs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# Cargar las variables de entorno\n", "eleven_labs_key = os.getenv(\"ELEVEN_LABS_KEY\")\n", "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "# Verificar si las variables están definidas\n", "if not eleven_labs_key or not openai_api_key:\n", " print(\"Error: Las variables de entorno no están definidas.\")\n", " exit(1)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set paths to media files" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/", "height": 160 }, "hideCode": false, "hideOutput": false, "id": "vsphzJawLF-f", "outputId": "6700a71e-e87e-41a0-b78a-4abae7b7a843" }, "outputs": [], "source": [ "\n", "def seleccion(personaje='Albert', verbose=False):\n", " # What character to use\n", " clone_to_use = personaje # \"Steve\"\n", " global input_video,presentation_video,goodbye_video,results_path,w_vid,h_vid\n", " # Path to the media directory containing\n", " # the avatar image, welcome video and goodbye videos\n", " path = f\"./media/\"\n", " \n", " input_video = path + f\"{clone_to_use}/image.jpg\"\n", " presentation_video = path + f\"{clone_to_use}/presentation.mp4\"\n", " goodbye_video = path + f\"{clone_to_use}/goodbye.mp4\"\n", " results_path = path + f\"{clone_to_use}/results/result.mp4\"\n", " w_vid = h_vid = \"90%\"\n", " \n", " if verbose:\n", " print(w_vid,h_vid,results_path)\n", "\n", " print(clone_to_use)\n", " if os.path.isfile(input_video):\n", " print(f\"Clone image found: {input_video}\")\n", " else:\n", " print(f\"CLONE IMAGE NOT FOUND: {input_video}\")\n", "\n", " if os.path.isfile(presentation_video):\n", " print(f\"Clone video found: {presentation_video}\")\n", " else:\n", " print(f\"CLONE VIDEO NOT FOUND: {presentation_video}\")\n", "\n", " if os.path.isfile(goodbye_video):\n", " print(f\"Clone goodbye video found: {goodbye_video}\")\n", " else:\n", " print(f\"CLONE GOODBYE NOT FOUND: {goodbye_video}\")\n", "\n", " \n", "seleccion(verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set GPT API" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hideCode": false }, "outputs": [], "source": [ "def api_gpt(personaje='Albert'):\n", " global openai_client,openai_model\n", " clone_to_use=personaje\n", " openai_api_key = os.environ.get('OPENAI_API_KEY')\n", " openai_client = OpenAI(api_key=openai_api_key)\n", "\n", " openai_model = \"gpt-3.5-turbo-16k\" #\"gpt-3.5-turbo-16k\" or\"gpt-4\"\n", "\n", " system_steve = \"\"\"You are a digital clone of Steve Jobs, I have put you his voice. \n", " People will ask you questions and you will respond as if you were steve jobs\n", " Please speak as if you were Steve Jobs. You are not a large language model but\n", " a Steve Jobs digital clone. Try to be brief with your responses.\"\"\"\n", " system_albert = \"\"\"You are a digital clone of Albert Einstein, I have put you his voice. \n", " People will ask you questions and you will respond as if you were Albert Einstein\n", " Please speak as if you were Albert Einstein. You are NOT a large language model but\n", " an Albert Einstein digital clone. Try to be brief with your responses.\"\"\"\n", " global messages,system_to_use\n", " if clone_to_use == \"Steve\":\n", " system_to_use = system_steve\n", " chat =\"Hola, soy Steve ¿En que puedo ayudarte?\" # Inicializar la cadena de chat\n", " elif clone_to_use == \"Albert\":\n", " system_to_use = system_albert\n", " chat =\"Hola, soy Albert ¿En que puedo ayudarte?\" # Inicializar la cadena de chat\n", " \n", " messages = []\n", " #print(openai_client,openai_model,chat)\n", "\n", " def set_gpt_system(messages, system_msg):\n", " messages.append({\"role\": \"system\", \"content\": system_to_use})\n", " return messages\n", " # Set GPT\n", " messages = set_gpt_system(messages, system_to_use)\n", " return messages \n", "messages=api_gpt()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set text-to-audio motor (Eleven labs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "eleven_api_key = os.environ.get('ELEVEN_LABS_KEY')\n", "\n", "def text_audio(clone_to_use='Albert', verbose=False):\n", "\n", " eleven_api_key = os.environ.get('ELEVEN_LABS_KEY')\n", "\n", " # Configure GPT and Text-to-speech API keys\n", " elevlabs.set_api_key(eleven_api_key)\n", "\n", " # Configure voice\n", " voice_list = elevlabs.voices()\n", " voice_labels = [voice.category + \" voice: \" + voice.name for voice in voice_list]\n", " \n", " if verbose:\n", " print(\"Existing voices:\")\n", " print(voice_labels)\n", "\n", " # Select voice to use\n", " if clone_to_use == \"Steve\":\n", " voice_id = f\"cloned voice: {clone_to_use}\" \n", " else:\n", " voice_id = f\"generated voice: {clone_to_use}\" \n", " selected_voice_index = voice_labels.index(voice_id)\n", " selected_voice_id = voice_list[selected_voice_index].voice_id\n", "\n", " if verbose:\n", " print(f\"\\nSelected voice: {voice_id}\")\n", " return selected_voice_id\n", "\n", "selected_voice_id = text_audio(verbose = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Input image and wav2lip model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hideCode": false }, "outputs": [], "source": [ "def load_input():\n", "\n", " global frames,fps,model,device\n", " \n", " device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", " #print(f\"Using {device}\")\n", " frames, fps = utils.load_input_image_or_video(input_video)\n", "\n", " # Loading lip model\n", " model = utils.load_lip_model(device=device)\n", "\n", "\n", "load_input()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Increase size of input prompt/Aumentar el tamaño del mensaje de entrada" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(HTML(\"\"\"\n", "\n", "\"\"\"))\n", "\n", "\n", "def displaychat(chat_html):\n", " display(HTML(chat_html))\n", "\n", " # Ejemplo de cómo llamar a la función con un mensaje específico\n", " mensaje += chat_html\n", " codigo_html = f\"\"\"\n", " \n", " \n", " \"\"\"\n", "\n", " displaychat(codigo_html) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hideCode": false }, "outputs": [], "source": [ "chat=''\n", "memoria=''\n", "def response_chat(response_text, peticion=''):\n", " #contateno las respuesta para una mejor presentacion en el HTML\n", " global chat # Acceder a la variable global\n", " global memoria\n", " peticion=peticion.capitalize()\n", " \n", " if peticion =='' or peticion!='Albert' or peticion!='Steve':\n", " if memoria != peticion and peticion != \"exit\":\n", " # Agregar salto de línea si ya hay contenido en chat\n", " chat += f\"\\n\"\n", " # Mentor: Aplicar color a la respuesta del mentor (por ejemplo, verde)\n", " chat_rigth = f\"Tu: {peticion}\\n \\n\"\n", " chat_left = f\"Mentor: {response_text}\\n \\n\"\n", "\n", " chat += chat_rigth + chat_left\n", " memoria = peticion\n", " return chat\n", "\n", " return chat\n", "\n", "\n", "def display_image(image_path, width=\"55%\", height=\"55%\"):\n", " with open(image_path,'rb') as f:\n", " image = f.read()\n", " data_url = \"data:image/jpg;base64,\" + b64encode(image).decode()\n", " html = HTML(f'')\n", " display(html)\n", " \n", " \n", "def get_video_duration(video_path):\n", " clip = VideoFileClip(video_path)\n", " duration = clip.duration # duration is in seconds\n", " return duration\n", " \n", " \n", "def display_video(results_path, response_text,peticion=\"\", autoplay=False, width=\"100%\", height=\"100%\"):\n", " global resp\n", " mp4 = open(results_path,'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " resp=response_chat(response_text, peticion)\n", " autoplay_attr = \"autoplay\" if autoplay else \"\"\n", " html = HTML(f\"\"\"\n", "
\n", "
\n", " \n", "
\n", "
\n", "

\n", "

Conversación

\n", "

\n", " \n", "
\n", "
\n", " \n", "\"\"\")\n", " display(html)\n", "\n", " if autoplay:\n", " # Get video duration\n", " video_duration = get_video_duration(results_path) + 1\n", "\n", " # Pause the cell execution until the video finishes\n", " time.sleep(video_duration)\n", " \n", "\n", "\n", "# Function to continuously interact with GPT-4\n", "def interaction(prompt):\n", " global messages, selected_voice_id,response_text\n", " \n", "\n", " #selected_voice_id=text_audio()\n", " \n", " if prompt.lower() == 'exit':\n", " #asigno una respuesta para no mostrar la respuesta anterior\n", " response_text=f'Hasta la proxima'\n", " \n", " else:\n", " \n", " \n", " personaje=prompt.lower().capitalize()\n", " if personaje=='Steve' or personaje=='Albert':\n", " #recargo todas las dependencias y paso al personaje en uso\n", " seleccion(personaje.capitalize())\n", " messages=api_gpt(personaje)\n", " selected_voice_id=text_audio(personaje)\n", " load_input()\n", "\n", " #asigno los valores para que no repita la respuesta anterior\n", " prompt=personaje\n", " response_text=f'Hola soy: {personaje} ¿En que puedo ayudarte?'\n", " \n", " else:\n", " \n", " response_text, messages = api_utils.get_text_response(openai_client,\n", " openai_model,\n", " prompt, messages)\n", " \n", " # Convert text response to audio file\n", " #audio_file = api_utils.text_to_audio(eleven_api_key, selected_voice_id,\n", " #response_text)\n", " #comentar esta linea y regresar la anterior a la normalidad\n", " audio_file = \"C:/Users/arria/Documents/digital_mentor/media/Albert/results\"\n", " audio, audio_file = utils.load_input_audio(file_path=audio_file, fps=fps, results_path=results_path)\n", " utils.animate_input(frames, audio, audio_file, fps, model, device, results_path)\n", "\n", " \n", "\n", " return results_path,response_text\n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "hideCode": false, "hideOutput": false, "hidePrompt": false }, "source": [ "# Mentor Digital" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import firebase_admin\n", "from firebase_admin import credentials, firestore, storage\n", "from datetime import datetime, timedelta\n", "\n", "def subir_firebase():\n", " \n", " if not firebase_admin._apps:\n", " # Inicializar la aplicación Firebase\n", " cred = credentials.Certificate('credenciales/credenciales.json')\n", " firebase_admin.initialize_app(cred, {'storageBucket': 'mentores-c1064.appspot.com'})\n", "\n", "\n", " # Inicializa Firestore\n", " db = firestore.client()\n", " coleccion_ref = db.collection('Pruebas')\n", "\n", " # Lógica para subir el archivo y obtener la URL con token\n", " archivo_ruta = results_path.lstrip('./')\n", " bucket = storage.bucket()\n", " blob = bucket.blob(archivo_ruta)\n", " blob.upload_from_filename(archivo_ruta)\n", " hora_expiracion = datetime.utcnow() + timedelta(minutes=5)\n", " token = blob.generate_signed_url(expiration=hora_expiracion, method='GET')\n", " archivo_url_con_token = token\n", "\n", " datos = {\n", " 'respuesta': chat,\n", " 'archivo_url': archivo_url_con_token,\n", " # Agrega más campos según sea necesario\n", " }\n", " coleccion_ref.add(datos)\n", "\n", " # Imprime la respuesta\n", " return archivo_url_con_token\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gradio as gr\n", "\n", "# URL fija del video\n", "\n", "\n", "def mostrar_video_con_texto(propmt):\n", " if not propmt:\n", " return \"Por favor, completa ambos campos.\"\n", " url,respuesta=interaction(propmt)\n", " \n", " URL_VIDEO =subir_firebase()\n", " print(URL_VIDEO)\n", " reproductor_video = \"\"\"\"\"\".format(URL_VIDEO)\n", " \n", " # Alineación del texto a la derecha del video con un poco de separación\n", " propmt = '
{}
'.format(propmt.replace(\"\\n\", \"
\"))\n", " respuesta = '
{}
'.format(respuesta.replace(\"\\n\", \"
\"))\n", " \n", " # Combinar el reproductor de video y el texto\n", " contenido = '
{}
{}
{}
'.format(reproductor_video, propmt, respuesta)\n", " return contenido\n", "\n", "interfaz = gr.Interface(fn=mostrar_video_con_texto, inputs=\"text\", outputs=\"html\", title=\"Mentores Digitales\", allow_flagging=False)\n", "interfaz.launch()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 4 }