File size: 5,111 Bytes
5197529
aca1d5b
 
 
 
 
 
 
5197529
 
 
d52ab93
5197529
 
aca1d5b
5197529
 
 
aca1d5b
 
5197529
9cd3f82
d52ab93
5197529
d52ab93
aca1d5b
d52ab93
 
5197529
d52ab93
 
 
5197529
d52ab93
 
 
5197529
d52ab93
 
 
 
5197529
 
d52ab93
 
5197529
 
 
aca1d5b
5197529
 
 
 
aca1d5b
5197529
 
 
 
aca1d5b
5197529
aca1d5b
5197529
 
aca1d5b
5197529
 
d52ab93
 
 
 
 
 
 
 
5197529
 
 
aca1d5b
274649e
 
 
5197529
274649e
aca1d5b
274649e
 
5197529
274649e
aca1d5b
5197529
aca1d5b
 
 
 
 
5197529
 
 
 
 
 
 
 
 
 
 
d52ab93
5197529
 
d52ab93
5197529
 
 
 
 
aca1d5b
 
 
 
5197529
 
 
aca1d5b
5197529
aca1d5b
 
 
 
 
 
5197529
 
aca1d5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5197529
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import gradio as gr
from gradio_webrtc import (
    WebRTC,
    ReplyOnStopWords,
    AdditionalOutputs,
    audio_to_bytes,
    get_twilio_turn_credentials,
)
import numpy as np
import base64
import re
from groq import Groq

from dotenv import load_dotenv

load_dotenv()

spinner_html = open("spinner.html").read()
sandbox_html = open("sandbox.html").read()
something_happened_html = open("something_happened.html").read()

rtc_configuration = get_twilio_turn_credentials()


import logging

# Configure the root logger to WARNING to suppress debug messages from other libraries
logging.basicConfig(level=logging.WARNING)

# Create a console handler
console_handler = logging.FileHandler("gradio_webrtc.log")
console_handler.setLevel(logging.DEBUG)

# Create a formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
console_handler.setFormatter(formatter)

# Configure the logger for your specific library
logger = logging.getLogger("gradio_webrtc")
logger.setLevel(logging.DEBUG)
logger.addHandler(console_handler)


groq_client = Groq()

system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"


def extract_html_content(text):
    """
    Extract content including HTML tags.
    """
    match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
    return match.group(0) if match else None


def display_in_sandbox(code):
    encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
    data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
    return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'


def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
    yield AdditionalOutputs(history, spinner_html)

    sr, audio = user_message
    audio = audio.squeeze()

    text = groq_client.audio.transcriptions.create(
        file=("audio-file.mp3", audio_to_bytes((sr, audio))),
        model="whisper-large-v3-turbo",
        response_format="verbose_json",
    ).text

    user_msg_formatted = user_prompt.format(user_message=text, code=code)
    history.append({"role": "user", "content": user_msg_formatted})

    print("generating response")
    response = groq_client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=history,
        temperature=1,
        max_tokens=2048,
        top_p=1,
        stream=False,
    )
    print("finished generating response")

    output = response.choices[0].message.content
    try:
        html_code = extract_html_content(output)
    except Exception as e:
        html_code = something_happened_html
        print(e)
    history.append({"role": "assistant", "content": output})
    yield AdditionalOutputs(history, html_code)


with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
    history = gr.State([{"role": "system", "content": system_prompt}])
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML(
                """
                <h1 style='text-align: center'>
                Hello Llama! 🦙
                </h1>
                <p style='text-align: center'>
                Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question.
                </p>
                <p style='text-align: center'>
                Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
                </p>
                """
            )
            webrtc = WebRTC(
                rtc_configuration=rtc_configuration, mode="send", modality="audio"
            )
        with gr.Column(scale=10):
            with gr.Tabs():
                with gr.Tab("Sandbox"):
                    sandbox = gr.HTML(value=sandbox_html)
                with gr.Tab("Code"):
                    code = gr.Code(
                        language="html",
                        max_lines=50,
                        interactive=False,
                        elem_classes="code-component",
                    )
                with gr.Tab("Chat"):
                    cb = gr.Chatbot(type="messages")

    webrtc.stream(
        ReplyOnStopWords(
            generate,
            input_sample_rate=16000,
            stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"],
        ),
        inputs=[webrtc, history, code],
        outputs=[webrtc],
        time_limit=90,
        concurrency_limit=10,
    )
    webrtc.on_additional_outputs(
        lambda history, code: (history, code, history), outputs=[history, code, cb]
    )
    code.change(display_in_sandbox, code, sandbox, queue=False)

if __name__ == "__main__":
    demo.launch()