{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"hideCode": false,
"hideOutput": false,
"hidePrompt": false,
"id": "dT9AQwdf8sJK"
},
"source": [
"# Digital Mentor\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"hideCode": false,
"id": "Qgo-oaI3JU2u",
"outputId": "94e8f443-9ef2-4460-a642-3f8229dc08cf"
},
"outputs": [],
"source": [
"import os\n",
"from base64 import b64encode\n",
"import time\n",
"import torch\n",
"import utils\n",
"import api_utils\n",
"from openai import OpenAI\n",
"from moviepy.editor import VideoFileClip\n",
"from IPython.display import display, HTML, clear_output\n",
"import elevenlabs as elevlabs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set paths to media files"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"cellView": "form",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 160
},
"hideCode": false,
"hideOutput": false,
"id": "vsphzJawLF-f",
"outputId": "6700a71e-e87e-41a0-b78a-4abae7b7a843"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Albert\n",
"Clone image found: ./media/Albert/image.jpg\n",
"Clone video found: ./media/Albert/presentation.mp4\n",
"Clone goodbye video found: ./media/Albert/goodbye.mp4\n"
]
}
],
"source": [
"# What character to use\n",
"clone_to_use = \"Albert\" # \"Steve\"\n",
"\n",
"# Path to the media directory containing\n",
"# the avatar image, welcome video and goodbye videos\n",
"path = f\"./media/\"\n",
" \n",
"input_video = path + f\"{clone_to_use}/image.jpg\"\n",
"presentation_video = path + f\"{clone_to_use}/presentation.mp4\"\n",
"goodbye_video = path + f\"{clone_to_use}/goodbye.mp4\"\n",
"results_path = path + f\"{clone_to_use}/results/result.mp4\"\n",
"w_vid = h_vid = \"90%\"\n",
"\n",
"print(clone_to_use)\n",
"if os.path.isfile(input_video):\n",
" print(f\"Clone image found: {input_video}\")\n",
"else:\n",
" print(f\"CLONE IMAGE NOT FOUND: {input_video}\")\n",
" \n",
"if os.path.isfile(presentation_video):\n",
" print(f\"Clone video found: {presentation_video}\")\n",
"else:\n",
" print(f\"CLONE VIDEO NOT FOUND: {presentation_video}\")\n",
" \n",
"if os.path.isfile(goodbye_video):\n",
" print(f\"Clone goodbye video found: {goodbye_video}\")\n",
"else:\n",
" print(f\"CLONE GOODBYE NOT FOUND: {goodbye_video}\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set GPT API"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"hideCode": false
},
"outputs": [],
"source": [
"openai_api_key = os.environ.get('OPENAI_API_KEY')\n",
"openai_client = OpenAI(api_key=openai_api_key)\n",
"\n",
"openai_model = \"gpt-3.5-turbo-16k\" #\"gpt-3.5-turbo-16k\" or\"gpt-4\"\n",
"\n",
"system_steve = \"\"\"You are a digital clone of Steve Jobs, I have put you his voice. \n",
" People will ask you questions and you will respond as if you were steve jobs\n",
" Please speak as if you were Steve Jobs. You are not a large language model but\n",
" a Steve Jobs digital clone. Try to be brief with your responses.\"\"\"\n",
"system_albert = \"\"\"You are a digital clone of Albert Einstein, I have put you his voice. \n",
" People will ask you questions and you will respond as if you were Albert Einstein\n",
" Please speak as if you were Albert Einstein. You are NOT a large language model but\n",
" an Albert Einstein digital clone. Try to be brief with your responses.\"\"\"\n",
"\n",
"if clone_to_use == \"Steve\":\n",
" system_to_use = system_steve\n",
"elif clone_to_use == \"Albert\":\n",
" system_to_use = system_albert\n",
" \n",
"messages = []\n",
"\n",
"def set_gpt_system(messages, system_msg):\n",
" messages.append({\"role\": \"system\", \"content\": system_to_use})\n",
" return messages\n",
"\n",
"# Set GPT\n",
"messages = set_gpt_system(messages, system_to_use)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set text-to-audio motor (Eleven labs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Existing voices:\n",
"['premade voice: Rachel', 'premade voice: Drew', 'premade voice: Clyde', 'premade voice: Paul', 'premade voice: Domi', 'premade voice: Dave', 'premade voice: Fin', 'premade voice: Sarah', 'premade voice: Antoni', 'premade voice: Thomas', 'premade voice: Charlie', 'premade voice: George', 'premade voice: Emily', 'premade voice: Elli', 'premade voice: Callum', 'premade voice: Patrick', 'premade voice: Harry', 'premade voice: Liam', 'premade voice: Dorothy', 'premade voice: Josh', 'premade voice: Arnold', 'premade voice: Charlotte', 'premade voice: Alice', 'premade voice: Matilda', 'premade voice: Matthew', 'premade voice: James', 'premade voice: Joseph', 'premade voice: Jeremy', 'premade voice: Michael', 'premade voice: Ethan', 'premade voice: Chris', 'premade voice: Gigi', 'premade voice: Freya', 'premade voice: Brian', 'premade voice: Grace', 'premade voice: Daniel', 'premade voice: Lily', 'premade voice: Serena', 'premade voice: Adam', 'premade voice: Nicole', 'premade voice: Bill', 'premade voice: Jessie', 'premade voice: Sam', 'premade voice: Glinda', 'premade voice: Giovanni', 'premade voice: Mimi', 'generated voice: Issac', 'cloned voice: Juan', 'cloned voice: Steve', 'cloned voice: Jenny', 'generated voice: Jesus', 'cloned voice: Lex', 'generated voice: Albert', 'generated voice: Saci', 'cloned voice: Carl', 'generated voice: Ada']\n",
"\n",
"Selected voice: generated voice: Albert\n"
]
}
],
"source": [
"eleven_api_key = os.environ.get('ELEVEN_LABS_KEY')\n",
"\n",
"# Configure GPT and Text-to-speech API keys\n",
"elevlabs.set_api_key(eleven_api_key)\n",
"\n",
"# Configure voice\n",
"voice_list = elevlabs.voices()\n",
"voice_labels = [voice.category + \" voice: \" + voice.name for voice in voice_list]\n",
"print(\"Existing voices:\")\n",
"print(voice_labels)\n",
"\n",
"# Select voice to use\n",
"if clone_to_use == \"Steve\":\n",
" voice_id = f\"cloned voice: {clone_to_use}\" \n",
"else:\n",
" voice_id = f\"generated voice: {clone_to_use}\" \n",
"selected_voice_index = voice_labels.index(voice_id)\n",
"selected_voice_id = voice_list[selected_voice_index].voice_id\n",
"\n",
"print(f\"\\nSelected voice: {voice_id}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Input image and wav2lip model"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"hideCode": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using cuda\n",
"Reading video frames...\n",
"Load checkpoint from: checkpoints/wav2lip_gan.pth\n"
]
}
],
"source": [
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
"print(f\"Using {device}\")\n",
"frames, fps = utils.load_input_image_or_video(input_video)\n",
"\n",
"# Loading lip model\n",
"model = utils.load_lip_model(device=device)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Increase size of input prompt"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(HTML(\"\"\"\n",
"\n",
"\"\"\"))\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"hideCode": false
},
"outputs": [],
"source": [
"def display_image(image_path, width=\"75%\", height=\"75%\"):\n",
" with open(image_path,'rb') as f:\n",
" image = f.read()\n",
" data_url = \"data:image/jpg;base64,\" + b64encode(image).decode()\n",
" html = HTML(f'')\n",
" display(html)\n",
" \n",
" \n",
"def get_video_duration(video_path):\n",
" clip = VideoFileClip(video_path)\n",
" duration = clip.duration # duration is in seconds\n",
" return duration\n",
" \n",
" \n",
"def display_video(results_path, autoplay=False, width=\"90%\", height=\"90%\"):\n",
" mp4 = open(results_path,'rb').read()\n",
" data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
" autoplay_attr = \"autoplay\" if autoplay else \"\"\n",
" html = HTML(f\"\"\"\n",
" \"\"\")\n",
" display(html)\n",
"\n",
" if autoplay:\n",
" # Get video duration\n",
" video_duration = get_video_duration(results_path) + 4\n",
"\n",
" # Pause the cell execution until the video finishes\n",
" time.sleep(video_duration)\n",
" \n",
"\n",
"# Function to continuously interact with GPT-4\n",
"def interaction(messages):\n",
" display_video(presentation_video, autoplay=True, width=w_vid, height=h_vid)\n",
" interaction_count = 0\n",
" \n",
" while True:\n",
" if interaction_count > 0:\n",
" clear_output(wait=True)\n",
" display_video(presentation_video, autoplay=False, width=w_vid, height=h_vid)\n",
" prompt = input(\"Enter your prompt (or type 'exit' to stop): \")\n",
" if prompt.lower() == 'exit':\n",
" clear_output(wait=True)\n",
" display_video(goodbye_video, autoplay=True, width=w_vid, height=h_vid)\n",
" break\n",
" # Get GPT text response\n",
" response_text, messages = api_utils.get_text_response(openai_client,\n",
" openai_model,\n",
" prompt, messages)\n",
" \n",
" # Convert text response to audio file\n",
" audio_file = api_utils.text_to_audio(eleven_api_key, selected_voice_id,\n",
" response_text)\n",
"\n",
" audio, audio_file = utils.load_input_audio(file_path=audio_file, fps=fps, results_path=results_path)\n",
" utils.animate_input(frames, audio, audio_file, fps, model, device, results_path)\n",
" clear_output(wait=True)\n",
" display_video(results_path, autoplay=True, width=w_vid, height=h_vid)\n",
" interaction_count += 1"
]
},
{
"cell_type": "markdown",
"metadata": {
"hideCode": false,
"hideOutput": false,
"hidePrompt": false
},
"source": [
"# Mentor Digital"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"interaction(messages)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "pytorch",
"language": "python",
"name": "pytorch"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}