| import os | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import time | |
| client = InferenceClient("lambdaindie/lambda-1v-1B", token = os.getenv("HF_TOKEN")) | |
| css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap'); | |
| * { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| } | |
| html, body, .gradio-container { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| background-color: #111 !important; | |
| color: #e0e0e0 !important; | |
| } | |
| textarea, input, button, select{ | |
| background-color: transparent !important; | |
| color: #e0e0e0 !important; | |
| border: 1px solid #444 !important; | |
| } | |
| .markdown-think { | |
| background-color: #1e1e1e; | |
| border-left: 4px solid #555; | |
| padding: 10px; | |
| margin-bottom: 8px; | |
| font-style: italic; | |
| white-space: pre-wrap; | |
| animation: pulse 1.5s infinite ease-in-out; | |
| } | |
| @keyframes pulse { | |
| 0% { opacity: 0.6; } | |
| 50% { opacity: 1.0; } | |
| 100% { opacity: 0.6; } | |
| } | |
| """ | |
| theme = gr.themes.Base( | |
| primary_hue="gray", | |
| font=[ | |
| gr.themes.GoogleFont("JetBrains Mono"), | |
| "monospace" | |
| ] | |
| ).set( | |
| body_background_fill="#111", | |
| body_text_color="#e0e0e0", | |
| button_primary_background_fill="#333", | |
| button_primary_text_color="#e0e0e0", | |
| input_background_fill="#222", | |
| input_border_color="#444", | |
| block_title_text_color="#fff" | |
| ) | |
| def respond(message, history, system_message, max_tokens, temperature, top_p): | |
| messages = [{"role": "system", "content": system_message}] if system_message else [] | |
| for user, assistant in history: | |
| if user: | |
| messages.append({"role": "user", "content": user}) | |
| if assistant: | |
| messages.append({"role": "assistant", "content": assistant}) | |
| thinking_prompt = messages + [{ | |
| "role": "user", | |
| "content": f"{message}\n\nThink a bit step-by-step before answering." | |
| }] | |
| reasoning = "" | |
| yield '<div class="markdown-think">Thinking...</div>' | |
| start = time.time() | |
| for chunk in client.chat_completion( | |
| thinking_prompt, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = chunk.choices[0].delta.content or "" | |
| reasoning += token | |
| styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>' | |
| yield styled_thought | |
| elapsed = time.time() - start | |
| yield f""" | |
| <div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888; | |
| font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;"> | |
| Pensou por {elapsed:.1f} segundos | |
| </div> | |
| """ | |
| time.sleep(2) | |
| final_prompt = messages + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": reasoning.strip()}, | |
| {"role": "user", "content": "Now answer based on your reasoning above."} | |
| ] | |
| final_answer = "" | |
| for chunk in client.chat_completion( | |
| final_prompt, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = chunk.choices[0].delta.content or "" | |
| final_answer += token | |
| yield final_answer.strip() | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| title="λambdAI", | |
| theme=theme, | |
| css=css, | |
| additional_inputs=[ | |
| gr.Textbox(value="", label="System Message"), | |
| gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"), | |
| gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p") | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |