hey-llama-code-editor

Running on CPU Upgrade

File size: 5,111 Bytes

import gradio as gr
from gradio_webrtc import (
    WebRTC,
    ReplyOnStopWords,
    AdditionalOutputs,
    audio_to_bytes,
    get_twilio_turn_credentials,
)
import numpy as np
import base64
import re
from groq import Groq

from dotenv import load_dotenv

load_dotenv()

spinner_html = open("spinner.html").read()
sandbox_html = open("sandbox.html").read()
something_happened_html = open("something_happened.html").read()

rtc_configuration = get_twilio_turn_credentials()


import logging

# Configure the root logger to WARNING to suppress debug messages from other libraries
logging.basicConfig(level=logging.WARNING)

# Create a console handler
console_handler = logging.FileHandler("gradio_webrtc.log")
console_handler.setLevel(logging.DEBUG)

# Create a formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
console_handler.setFormatter(formatter)

# Configure the logger for your specific library
logger = logging.getLogger("gradio_webrtc")
logger.setLevel(logging.DEBUG)
logger.addHandler(console_handler)


groq_client = Groq()

system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"


def extract_html_content(text):
    """
    Extract content including HTML tags.
    """
    match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
    return match.group(0) if match else None


def display_in_sandbox(code):
    encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
    data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
    return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'


def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
    yield AdditionalOutputs(history, spinner_html)

    sr, audio = user_message
    audio = audio.squeeze()

    text = groq_client.audio.transcriptions.create(
        file=("audio-file.mp3", audio_to_bytes((sr, audio))),
        model="whisper-large-v3-turbo",
        response_format="verbose_json",
    ).text

    user_msg_formatted = user_prompt.format(user_message=text, code=code)
    history.append({"role": "user", "content": user_msg_formatted})

    print("generating response")
    response = groq_client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=history,
        temperature=1,
        max_tokens=2048,
        top_p=1,
        stream=False,
    )
    print("finished generating response")

    output = response.choices[0].message.content
    try:
        html_code = extract_html_content(output)
    except Exception as e:
        html_code = something_happened_html
        print(e)
    history.append({"role": "assistant", "content": output})
    yield AdditionalOutputs(history, html_code)


with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
    history = gr.State([{"role": "system", "content": system_prompt}])
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML(
                """
                <h1 style='text-align: center'>
                Hello Llama! 🦙
                </h1>
                <p style='text-align: center'>
                Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question.
                </p>
                <p style='text-align: center'>
                Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
                </p>
                """
            )
            webrtc = WebRTC(
                rtc_configuration=rtc_configuration, mode="send", modality="audio"
            )
        with gr.Column(scale=10):
            with gr.Tabs():
                with gr.Tab("Sandbox"):
                    sandbox = gr.HTML(value=sandbox_html)
                with gr.Tab("Code"):
                    code = gr.Code(
                        language="html",
                        max_lines=50,
                        interactive=False,
                        elem_classes="code-component",
                    )
                with gr.Tab("Chat"):
                    cb = gr.Chatbot(type="messages")

    webrtc.stream(
        ReplyOnStopWords(
            generate,
            input_sample_rate=16000,
            stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"],
        ),
        inputs=[webrtc, history, code],
        outputs=[webrtc],
        time_limit=90,
        concurrency_limit=10,
    )
    webrtc.on_additional_outputs(
        lambda history, code: (history, code, history), outputs=[history, code, cb]
    )
    code.change(display_in_sandbox, code, sandbox, queue=False)

if __name__ == "__main__":
    demo.launch()