File size: 3,653 Bytes
0f9c852
eb450e3
6a03bd2
582395b
eb450e3
1de8c84
eb450e3
b51f88d
5113576
6a03bd2
dec918d
 
6a03bd2
 
0593466
dec918d
 
 
6a03bd2
 
0593466
 
 
 
 
 
 
582395b
dec918d
 
 
 
 
 
 
e3c453c
6a03bd2
3cfecb5
dec918d
 
 
fa8b0f1
 
 
dec918d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582395b
4ddfc2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dec918d
 
4ddfc2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da0a172
5113576
4ddfc2b
 
dec918d
4ddfc2b
 
 
 
 
 
 
5113576
eb450e3
4ddfc2b
ddf82d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import gradio as gr
from huggingface_hub import InferenceClient
import time

client = InferenceClient("lambdaindie/lambda-1v-1B", token = os.getenv("HF_TOKEN"))

css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');

* {
    font-family: 'JetBrains Mono', monospace !important;
}

html, body, .gradio-container {
    font-family: 'JetBrains Mono', monospace !important;
    background-color: #111 !important;
    color: #e0e0e0 !important;
}

textarea, input, button, select{
    background-color: transparent !important;
    color: #e0e0e0 !important;
    border: 1px solid #444 !important;
}


.markdown-think {
    background-color: #1e1e1e;
    border-left: 4px solid #555;
    padding: 10px;
    margin-bottom: 8px;
    font-style: italic;
    white-space: pre-wrap;
    animation: pulse 1.5s infinite ease-in-out;
}

@keyframes pulse {
    0% { opacity: 0.6; }
    50% { opacity: 1.0; }
    100% { opacity: 0.6; }
}
"""

theme = gr.themes.Base(
    primary_hue="gray",
    font=[
        gr.themes.GoogleFont("JetBrains Mono"),
        "monospace"
    ]
).set(
    body_background_fill="#111",
    body_text_color="#e0e0e0",
    button_primary_background_fill="#333",
    button_primary_text_color="#e0e0e0",
    input_background_fill="#222",
    input_border_color="#444",
    block_title_text_color="#fff"
)

def respond(message, history, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}] if system_message else []

    for user, assistant in history:
        if user:
            messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})

    thinking_prompt = messages + [{
        "role": "user",
        "content": f"{message}\n\nThink a bit step-by-step before answering."
    }]

    reasoning = ""
    yield '<div class="markdown-think">Thinking...</div>'

    start = time.time()

    for chunk in client.chat_completion(
        thinking_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        reasoning += token
        styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>'
        yield styled_thought

    elapsed = time.time() - start

    yield f"""
    <div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
            font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
        Pensou por {elapsed:.1f} segundos
    </div>
    """

    time.sleep(2)

    final_prompt = messages + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": reasoning.strip()},
        {"role": "user", "content": "Now answer based on your reasoning above."}
    ]

    final_answer = ""
    for chunk in client.chat_completion(
        final_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        final_answer += token
        yield final_answer.strip()

demo = gr.ChatInterface(
    fn=respond,
    title="λambdAI",
    theme=theme,
    css=css,
    additional_inputs=[
        gr.Textbox(value="", label="System Message"),
        gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
    ]
)

if __name__ == "__main__":
    demo.launch()