{ "cells": [ { "cell_type": "markdown", "metadata": { "hideCode": false, "hideOutput": false, "hidePrompt": false, "id": "dT9AQwdf8sJK" }, "source": [ "# Digital Mentor\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "hideCode": false, "id": "Qgo-oaI3JU2u", "outputId": "94e8f443-9ef2-4460-a642-3f8229dc08cf" }, "outputs": [], "source": [ "import os\n", "from base64 import b64encode\n", "import time\n", "import torch\n", "import utils\n", "import api_utils\n", "from openai import OpenAI\n", "from moviepy.editor import VideoFileClip\n", "from IPython.display import display, HTML, clear_output\n", "import elevenlabs as elevlabs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set paths to media files" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/", "height": 160 }, "hideCode": false, "hideOutput": false, "id": "vsphzJawLF-f", "outputId": "6700a71e-e87e-41a0-b78a-4abae7b7a843" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Albert\n", "Clone image found: ./media/Albert/image.jpg\n", "Clone video found: ./media/Albert/presentation.mp4\n", "Clone goodbye video found: ./media/Albert/goodbye.mp4\n" ] } ], "source": [ "# What character to use\n", "clone_to_use = \"Albert\" # \"Steve\"\n", "\n", "# Path to the media directory containing\n", "# the avatar image, welcome video and goodbye videos\n", "path = f\"./media/\"\n", " \n", "input_video = path + f\"{clone_to_use}/image.jpg\"\n", "presentation_video = path + f\"{clone_to_use}/presentation.mp4\"\n", "goodbye_video = path + f\"{clone_to_use}/goodbye.mp4\"\n", "results_path = path + f\"{clone_to_use}/results/result.mp4\"\n", "w_vid = h_vid = \"90%\"\n", "\n", "print(clone_to_use)\n", "if os.path.isfile(input_video):\n", " print(f\"Clone image found: {input_video}\")\n", "else:\n", " print(f\"CLONE IMAGE NOT FOUND: {input_video}\")\n", " \n", "if os.path.isfile(presentation_video):\n", " print(f\"Clone video found: {presentation_video}\")\n", "else:\n", " print(f\"CLONE VIDEO NOT FOUND: {presentation_video}\")\n", " \n", "if os.path.isfile(goodbye_video):\n", " print(f\"Clone goodbye video found: {goodbye_video}\")\n", "else:\n", " print(f\"CLONE GOODBYE NOT FOUND: {goodbye_video}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set GPT API" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "hideCode": false }, "outputs": [], "source": [ "openai_api_key = os.environ.get('OPENAI_API_KEY')\n", "openai_client = OpenAI(api_key=openai_api_key)\n", "\n", "openai_model = \"gpt-3.5-turbo-16k\" #\"gpt-3.5-turbo-16k\" or\"gpt-4\"\n", "\n", "system_steve = \"\"\"You are a digital clone of Steve Jobs, I have put you his voice. \n", " People will ask you questions and you will respond as if you were steve jobs\n", " Please speak as if you were Steve Jobs. You are not a large language model but\n", " a Steve Jobs digital clone. Try to be brief with your responses.\"\"\"\n", "system_albert = \"\"\"You are a digital clone of Albert Einstein, I have put you his voice. \n", " People will ask you questions and you will respond as if you were Albert Einstein\n", " Please speak as if you were Albert Einstein. You are NOT a large language model but\n", " an Albert Einstein digital clone. Try to be brief with your responses.\"\"\"\n", "\n", "if clone_to_use == \"Steve\":\n", " system_to_use = system_steve\n", "elif clone_to_use == \"Albert\":\n", " system_to_use = system_albert\n", " \n", "messages = []\n", "\n", "def set_gpt_system(messages, system_msg):\n", " messages.append({\"role\": \"system\", \"content\": system_to_use})\n", " return messages\n", "\n", "# Set GPT\n", "messages = set_gpt_system(messages, system_to_use)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set text-to-audio motor (Eleven labs)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Existing voices:\n", "['premade voice: Rachel', 'premade voice: Drew', 'premade voice: Clyde', 'premade voice: Paul', 'premade voice: Domi', 'premade voice: Dave', 'premade voice: Fin', 'premade voice: Sarah', 'premade voice: Antoni', 'premade voice: Thomas', 'premade voice: Charlie', 'premade voice: George', 'premade voice: Emily', 'premade voice: Elli', 'premade voice: Callum', 'premade voice: Patrick', 'premade voice: Harry', 'premade voice: Liam', 'premade voice: Dorothy', 'premade voice: Josh', 'premade voice: Arnold', 'premade voice: Charlotte', 'premade voice: Alice', 'premade voice: Matilda', 'premade voice: Matthew', 'premade voice: James', 'premade voice: Joseph', 'premade voice: Jeremy', 'premade voice: Michael', 'premade voice: Ethan', 'premade voice: Chris', 'premade voice: Gigi', 'premade voice: Freya', 'premade voice: Brian', 'premade voice: Grace', 'premade voice: Daniel', 'premade voice: Lily', 'premade voice: Serena', 'premade voice: Adam', 'premade voice: Nicole', 'premade voice: Bill', 'premade voice: Jessie', 'premade voice: Sam', 'premade voice: Glinda', 'premade voice: Giovanni', 'premade voice: Mimi', 'generated voice: Issac', 'cloned voice: Juan', 'cloned voice: Steve', 'cloned voice: Jenny', 'generated voice: Jesus', 'cloned voice: Lex', 'generated voice: Albert', 'generated voice: Saci', 'cloned voice: Carl', 'generated voice: Ada']\n", "\n", "Selected voice: generated voice: Albert\n" ] } ], "source": [ "eleven_api_key = os.environ.get('ELEVEN_LABS_KEY')\n", "\n", "# Configure GPT and Text-to-speech API keys\n", "elevlabs.set_api_key(eleven_api_key)\n", "\n", "# Configure voice\n", "voice_list = elevlabs.voices()\n", "voice_labels = [voice.category + \" voice: \" + voice.name for voice in voice_list]\n", "print(\"Existing voices:\")\n", "print(voice_labels)\n", "\n", "# Select voice to use\n", "if clone_to_use == \"Steve\":\n", " voice_id = f\"cloned voice: {clone_to_use}\" \n", "else:\n", " voice_id = f\"generated voice: {clone_to_use}\" \n", "selected_voice_index = voice_labels.index(voice_id)\n", "selected_voice_id = voice_list[selected_voice_index].voice_id\n", "\n", "print(f\"\\nSelected voice: {voice_id}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Input image and wav2lip model" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "hideCode": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using cuda\n", "Reading video frames...\n", "Load checkpoint from: checkpoints/wav2lip_gan.pth\n" ] } ], "source": [ "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", "print(f\"Using {device}\")\n", "frames, fps = utils.load_input_image_or_video(input_video)\n", "\n", "# Loading lip model\n", "model = utils.load_lip_model(device=device)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Increase size of input prompt" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(HTML(\"\"\"\n", "\n", "\"\"\"))\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "hideCode": false }, "outputs": [], "source": [ "def display_image(image_path, width=\"75%\", height=\"75%\"):\n", " with open(image_path,'rb') as f:\n", " image = f.read()\n", " data_url = \"data:image/jpg;base64,\" + b64encode(image).decode()\n", " html = HTML(f'')\n", " display(html)\n", " \n", " \n", "def get_video_duration(video_path):\n", " clip = VideoFileClip(video_path)\n", " duration = clip.duration # duration is in seconds\n", " return duration\n", " \n", " \n", "def display_video(results_path, autoplay=False, width=\"90%\", height=\"90%\"):\n", " mp4 = open(results_path,'rb').read()\n", " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", " autoplay_attr = \"autoplay\" if autoplay else \"\"\n", " html = HTML(f\"\"\"\n", " \"\"\")\n", " display(html)\n", "\n", " if autoplay:\n", " # Get video duration\n", " video_duration = get_video_duration(results_path) + 4\n", "\n", " # Pause the cell execution until the video finishes\n", " time.sleep(video_duration)\n", " \n", "\n", "# Function to continuously interact with GPT-4\n", "def interaction(messages):\n", " display_video(presentation_video, autoplay=True, width=w_vid, height=h_vid)\n", " interaction_count = 0\n", " \n", " while True:\n", " if interaction_count > 0:\n", " clear_output(wait=True)\n", " display_video(presentation_video, autoplay=False, width=w_vid, height=h_vid)\n", " prompt = input(\"Enter your prompt (or type 'exit' to stop): \")\n", " if prompt.lower() == 'exit':\n", " clear_output(wait=True)\n", " display_video(goodbye_video, autoplay=True, width=w_vid, height=h_vid)\n", " break\n", " # Get GPT text response\n", " response_text, messages = api_utils.get_text_response(openai_client,\n", " openai_model,\n", " prompt, messages)\n", " \n", " # Convert text response to audio file\n", " audio_file = api_utils.text_to_audio(eleven_api_key, selected_voice_id,\n", " response_text)\n", "\n", " audio, audio_file = utils.load_input_audio(file_path=audio_file, fps=fps, results_path=results_path)\n", " utils.animate_input(frames, audio, audio_file, fps, model, device, results_path)\n", " clear_output(wait=True)\n", " display_video(results_path, autoplay=True, width=w_vid, height=h_vid)\n", " interaction_count += 1" ] }, { "cell_type": "markdown", "metadata": { "hideCode": false, "hideOutput": false, "hidePrompt": false }, "source": [ "# Mentor Digital" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "interaction(messages)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "pytorch", "language": "python", "name": "pytorch" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }