{ "cells": [ { "cell_type": "markdown", "metadata": { "hideCode": false, "hideOutput": false, "hidePrompt": false, "id": "dT9AQwdf8sJK" }, "source": [ "# Digital Mentor\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "hideCode": false, "id": "Qgo-oaI3JU2u", "outputId": "94e8f443-9ef2-4460-a642-3f8229dc08cf" }, "outputs": [], "source": [ "import os\n", "from base64 import b64encode\n", "import time\n", "import torch\n", "import utils\n", "import api_utils\n", "from openai import OpenAI\n", "from moviepy.editor import VideoFileClip\n", "from IPython.display import display, HTML, clear_output\n", "import elevenlabs as elevlabs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# Cargar las variables de entorno\n", "eleven_labs_key = os.getenv(\"ELEVEN_LABS_KEY\")\n", "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "# Verificar si las variables están definidas\n", "if not eleven_labs_key or not openai_api_key:\n", " print(\"Error: Las variables de entorno no están definidas.\")\n", " exit(1)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set paths to media files" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/", "height": 160 }, "hideCode": false, "hideOutput": false, "id": "vsphzJawLF-f", "outputId": "6700a71e-e87e-41a0-b78a-4abae7b7a843" }, "outputs": [], "source": [ "\n", "def seleccion(personaje='Albert', verbose=False):\n", " # What character to use\n", " clone_to_use = personaje # \"Steve\"\n", " global input_video,presentation_video,goodbye_video,results_path,w_vid,h_vid\n", " # Path to the media directory containing\n", " # the avatar image, welcome video and goodbye videos\n", " path = f\"./media/\"\n", " \n", " input_video = path + f\"{clone_to_use}/image.jpg\"\n", " presentation_video = path + f\"{clone_to_use}/presentation.mp4\"\n", " goodbye_video = path + f\"{clone_to_use}/goodbye.mp4\"\n", " results_path = path + f\"{clone_to_use}/results/result.mp4\"\n", " w_vid = h_vid = \"90%\"\n", " \n", " if verbose:\n", " print(w_vid,h_vid,results_path)\n", "\n", " print(clone_to_use)\n", " if os.path.isfile(input_video):\n", " print(f\"Clone image found: {input_video}\")\n", " else:\n", " print(f\"CLONE IMAGE NOT FOUND: {input_video}\")\n", "\n", " if os.path.isfile(presentation_video):\n", " print(f\"Clone video found: {presentation_video}\")\n", " else:\n", " print(f\"CLONE VIDEO NOT FOUND: {presentation_video}\")\n", "\n", " if os.path.isfile(goodbye_video):\n", " print(f\"Clone goodbye video found: {goodbye_video}\")\n", " else:\n", " print(f\"CLONE GOODBYE NOT FOUND: {goodbye_video}\")\n", "\n", " \n", "seleccion(verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set GPT API" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hideCode": false }, "outputs": [], "source": [ "def api_gpt(personaje='Albert'):\n", " global openai_client,openai_model\n", " clone_to_use=personaje\n", " openai_api_key = os.environ.get('OPENAI_API_KEY')\n", " openai_client = OpenAI(api_key=openai_api_key)\n", "\n", " openai_model = \"gpt-3.5-turbo-16k\" #\"gpt-3.5-turbo-16k\" or\"gpt-4\"\n", "\n", " system_steve = \"\"\"You are a digital clone of Steve Jobs, I have put you his voice. \n", " People will ask you questions and you will respond as if you were steve jobs\n", " Please speak as if you were Steve Jobs. You are not a large language model but\n", " a Steve Jobs digital clone. Try to be brief with your responses.\"\"\"\n", " system_albert = \"\"\"You are a digital clone of Albert Einstein, I have put you his voice. \n", " People will ask you questions and you will respond as if you were Albert Einstein\n", " Please speak as if you were Albert Einstein. You are NOT a large language model but\n", " an Albert Einstein digital clone. Try to be brief with your responses.\"\"\"\n", " global messages,system_to_use\n", " if clone_to_use == \"Steve\":\n", " system_to_use = system_steve\n", " chat =\"Hola, soy Steve ¿En que puedo ayudarte?\" # Inicializar la cadena de chat\n", " elif clone_to_use == \"Albert\":\n", " system_to_use = system_albert\n", " chat =\"Hola, soy Albert ¿En que puedo ayudarte?\" # Inicializar la cadena de chat\n", " \n", " messages = []\n", " #print(openai_client,openai_model,chat)\n", "\n", " def set_gpt_system(messages, system_msg):\n", " messages.append({\"role\": \"system\", \"content\": system_to_use})\n", " return messages\n", " # Set GPT\n", " messages = set_gpt_system(messages, system_to_use)\n", " return messages \n", "messages=api_gpt()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set text-to-audio motor (Eleven labs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "eleven_api_key = os.environ.get('ELEVEN_LABS_KEY')\n", "\n", "def text_audio(clone_to_use='Albert', verbose=False):\n", "\n", " eleven_api_key = os.environ.get('ELEVEN_LABS_KEY')\n", "\n", " # Configure GPT and Text-to-speech API keys\n", " elevlabs.set_api_key(eleven_api_key)\n", "\n", " # Configure voice\n", " voice_list = elevlabs.voices()\n", " voice_labels = [voice.category + \" voice: \" + voice.name for voice in voice_list]\n", " \n", " if verbose:\n", " print(\"Existing voices:\")\n", " print(voice_labels)\n", "\n", " # Select voice to use\n", " if clone_to_use == \"Steve\":\n", " voice_id = f\"cloned voice: {clone_to_use}\" \n", " else:\n", " voice_id = f\"generated voice: {clone_to_use}\" \n", " selected_voice_index = voice_labels.index(voice_id)\n", " selected_voice_id = voice_list[selected_voice_index].voice_id\n", "\n", " if verbose:\n", " print(f\"\\nSelected voice: {voice_id}\")\n", " return selected_voice_id\n", "\n", "selected_voice_id = text_audio(verbose = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Input image and wav2lip model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hideCode": false }, "outputs": [], "source": [ "def load_input():\n", "\n", " global frames,fps,model,device\n", " \n", " device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", " #print(f\"Using {device}\")\n", " frames, fps = utils.load_input_image_or_video(input_video)\n", "\n", " # Loading lip model\n", " model = utils.load_lip_model(device=device)\n", "\n", "\n", "load_input()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Increase size of input prompt/Aumentar el tamaño del mensaje de entrada" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(HTML(\"\"\"\n", "\n", "\"\"\"))\n", "\n", "\n", "def displaychat(chat_html):\n", " display(HTML(chat_html))\n", "\n", " # Ejemplo de cómo llamar a la función con un mensaje específico\n", " mensaje += chat_html\n", " codigo_html = f\"\"\"\n", " \n", " \n", " \"\"\"\n", "\n", " displaychat(codigo_html) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "hideCode": false }, "outputs": [], "source": [ "chat=''\n", "memoria=''\n", "def response_chat(response_text, peticion=''):\n", " #contateno las respuesta para una mejor presentacion en el HTML\n", " global chat # Acceder a la variable global\n", " global memoria\n", " peticion=peticion.capitalize()\n", " \n", " if peticion =='' or peticion!='Albert' or peticion!='Steve':\n", " if memoria != peticion and peticion != \"exit\":\n", " # Agregar salto de línea si ya hay contenido en chat\n", " chat += f\"\\n\"\n", " # Mentor: Aplicar color a la respuesta del mentor (por ejemplo, verde)\n", " chat_rigth = f\"Tu: {peticion}\\n \\n\"\n", " chat_left = f\"Mentor: {response_text}\\n \\n\"\n", "\n", " chat += chat_rigth + chat_left\n", " memoria = peticion\n", " return chat\n", "\n", " return chat\n", "\n", "\n", "def display_image(image_path, width=\"55%\", height=\"55%\"):\n", " with open(image_path,'rb') as f:\n", " image = f.read()\n", " data_url = \"data:image/jpg;base64,\" + b64encode(image).decode()\n", " html = HTML(f'')\n", " display(html)\n", " \n", " \n", "def get_video_duration(video_path):\n", " clip = VideoFileClip(video_path)\n", " duration = clip.duration # duration is in seconds\n", " return duration\n", " \n", " \n", "def display_video(results_path, response_text,peticion=\"\", autoplay=False, width=\"100%\", height=\"100%\"):\n", " global resp\n", " mp4 = open(results_path,'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " resp=response_chat(response_text, peticion)\n", " autoplay_attr = \"autoplay\" if autoplay else \"\"\n", " html = HTML(f\"\"\"\n", "
\n", "