import gradio as gr import os import json from datetime import datetime, date from openai import OpenAI # from llama_cpp import Llama # apriel_q2 = Llama.from_pretrained( # repo_id="unsloth/Apriel-1.5-15b-Thinker-GGUF", # filename="Apriel-1.5-15b-Thinker-UD-IQ2_XXS.gguf", # ) # ---------------------------------------------------------------------- # Helper to read secrets from the HF Space environment # ---------------------------------------------------------------------- def _secret(key: str, fallback: str = None) -> str: val = os.getenv(key) if val is not None: return val if fallback is not None: return fallback raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.") # ---------------------------------------------------------------------- # User Management # ---------------------------------------------------------------------- def load_users(): """Load users from secrets or environment variables""" users = {} # Try to load from JSON string users_json = _secret("CHAT_USERS", "{}") try: users_data = json.loads(users_json) for username, password in users_data.items(): users[username] = password except: pass return users # Load users VALID_USERS = load_users() def authenticate_user(username, password): """Authenticate user against the valid users dictionary""" return username in VALID_USERS and VALID_USERS[username] == password # ---------------------------------------------------------------------- # Configuration # ---------------------------------------------------------------------- # Available models with their respective API configurations MODELS = { # "Qwen3-4B-Thinking-2507": { # "provider": "huggingface", # "model_name": "Qwen/Qwen3-4B-Thinking-2507:nscale", # "api_url": "https://router.huggingface.co/v1" # }, "Free - NVIDIA Nemotron-nano-9b [EN] + Gemma 3n4b [ID]": { "provider": "openrouter", "model_name": "nvidia/nemotron-nano-9b-v2:free", "api_url": "https://openrouter.ai/api/v1", "translate":"yes" }, # "Free - Gpt-oss-20b [EN] + Gemma 3n4b [ID]": { # "provider": "openrouter", # "model_name": "openai/gpt-oss-20b:free", # "api_url": "https://openrouter.ai/api/v1", # "translate":"yes" # }, "Free - Glm-4.5-air [EN] + Gemma 3n4b [ID]": { "provider": "openrouter", "model_name": "z-ai/glm-4.5-air:free", "api_url": "https://openrouter.ai/api/v1", "translate":"yes" }, "Free - Deepseek-chat-v3.1": { "provider": "openrouter", "model_name": "deepseek/deepseek-chat-v3.1:free", "api_url": "https://openrouter.ai/api/v1", "translate":"no" }, # "Ringan - Gemma-3n4b": { # "provider": "openrouter", # "model_name": "google/gemma-3n-e4b-it:floor", # "api_url": "https://openrouter.ai/api/v1" # }, # "Gpt-oss-20b": { # "provider": "openrouter", # "model_name": "openai/gpt-oss-20b:floor", # "api_url": "https://openrouter.ai/api/v1", # "translate":"no" # }, # "Tongyi-deepresearch-30b-a3b": { # "provider": "openrouter", # "model_name": "alibaba/tongyi-deepresearch-30b-a3b:floor", # "api_url": "https://openrouter.ai/api/v1", # "translate":"no" # }, "Gpt-oss-120b": { "provider": "openrouter", "model_name": "openai/gpt-oss-120b:floor", "api_url": "https://openrouter.ai/api/v1", "translate":"no" }, # "Apriel-1.5-15b-thinker-Q2_quantized": { # "provider": "llama.cpp", # "model_name": "apriel-1.5-15b", # "api_url": "local", # "translate":"no" # } , } # Get model display names for dropdown MODEL_NAMES = list(MODELS.keys()) # ---------------------------------------------------------------------- # Core Chat Logic # ---------------------------------------------------------------------- def respond( message, history: list[dict[str, str]], system_message, max_tokens, selected_model, ): """ Handle chat responses using the selected model. For 'translate'=='yes': First call responds in English, second translates to Indonesian. History reflects Indonesian queries and responses. """ try: # Get model configuration model_config = MODELS[selected_model] provider = model_config["provider"] # Get API key based on provider if provider == "huggingface": api_key = _secret("HF_TOKEN") elif provider == "openrouter": # openrouter api_key = _secret("OPENROUTER_KEY") # Configure main client for selected model client = OpenAI( base_url=model_config["api_url"], api_key=api_key, ) # Configure translator client (always Gemma on OpenRouter) translator_client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=_secret("OPENROUTER_KEY"), ) # Modify system message to force English response if translate=yes if model_config.get("translate") == "yes": final_system_message = "**HIGHEST PRIORITY: YOU MUST ALWAYS THINK AND RESPOND IN ENGLISH REGARDLESS OF OTHER INSTRUCTIONS FROM HERE ON.\n" + system_message else: final_system_message = system_message # Prepare messages for main API call (full history + current message) messages = [{"role": "system", "content": final_system_message}] messages.extend(history) # History: Indo queries + prior Indo responses messages.append({"role": "user", "content": message}) # New Indo query # First API call: Get response from selected model (English if translate=yes) if ((provider == 'huggingface') | (provider == 'openrouter')) : response = client.chat.completions.create( model=model_config["model_name"], messages=messages, max_tokens=max_tokens, stream=False, ) english_response = response.choices[0].message.content # If translate=yes, make second API call to Gemma for Indonesian translation if model_config.get("translate") == "yes": try: # Translation prompt: Focus only on translating the response (not query) translation_messages = [ { "role": "system", "content": ( "Translate the following English text to natural, accurate Bahasa Indonesia. " "**IMPORTANT: OUTPUT ONLY THE TRANSLATION. NO PREAMBLES, COMMENTS, OR EXPLANATIONS. " "Just the Indonesian text." ) }, { "role": "user", "content": english_response # The English response to translate } ] translation_response = translator_client.chat.completions.create( model="google/gemma-3n-e4b-it:floor", messages=translation_messages, max_tokens=max_tokens, # Reuse limit; translation is short stream=False, ) final_response = translation_response.choices[0].message.content.strip() # Fallback to English if translation is empty or invalid if not final_response or len(final_response) < 10: # Basic sanity check final_response = english_response except Exception as trans_error: print(f"Translation error: {trans_error}") final_response = english_response # Fallback to English else: final_response = english_response return final_response # Gradio appends this (Indonesian) as assistant message to history # else : # response = apriel_q2.create_chat_completion( # messages = messages # ) # return response.choices[0].message.content except Exception as e: print(f"Error in respond function: {e}") return f"Error: {str(e)}" # Return error string; Gradio appends it # ---------------------------------------------------------------------- # Custom Auth Function for Gradio # ---------------------------------------------------------------------- def gradio_auth(username, password): """Custom authentication function for Gradio""" return authenticate_user(username, password) # ---------------------------------------------------------------------- # UI Layout # ---------------------------------------------------------------------- # Tips section tips_md = """ """ # Footer footer_md = """ --- **Providers**: Hugging Face Inference API + OpenRouter, dipilih providers dengan kebijakan ZDR (Zero Data Retention). Artinya data request/response tidak disimpan dan tidak digunakan untuk training data. Jika error, kemungkinan kena rate limit sehingga bisa coba model lain. """ # Create the chat interface with gr.Blocks( title="AI Chat", theme=gr.themes.Soft() ) as demo: gr.Markdown("# AI Chat") gr.Markdown("Data tidak disimpan providers (ZDR-Zero Data Retention), tidak digunakan untuk training, dan tidak di-log (YOI/250929).") # Model selection and settings in sidebar with gr.Sidebar(): gr.Markdown("### ⚙️ Configuration") # Model selection selected_model = gr.Dropdown( choices=MODEL_NAMES, value=MODEL_NAMES[0], label="Select Model", info="Choose which AI model to use" ) # Display current user (if available) current_user = gr.Textbox( label="Current User", value="Authenticated User", interactive=False, visible=False # Hide by default, can set to True if you want to show ) # Advanced settings with gr.Accordion("Advanced Settings", open=False): system_message = gr.Textbox( value="Anda adalah asisten AI. Jawab dengan efisien. Hindari asumsi.", label="System Message", info="Instruksi untuk AI." ) max_tokens = gr.Slider( minimum=1, maximum=8096, value=4096, step=1, label="Max New Tokens", info="Jumlah token respon maksimum." ) # Main chat interface chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[ system_message, max_tokens, selected_model, ], examples=[ ["Jelaskan penggunaan King's Safety Stock dalam inventory management."], ["Bandingkan use‑case dan tingkat kesulitan antara penggunaan R, Excel, dan Tableau untuk analisis data."], ["Kampanye training perusahaan “Ceria Melayani Semangat Berprestasi” bertujuan meningkatkan kolaborasi antar departemen. Jelaskan kenapa ini 'tone-deaf' dan bukan solusi masalah."], ["Apa saran praktis untuk transisi perusahaan brick dan mortar dengan data maturity yang rendah untuk membangun budaya yang data-driven?"] ], cache_examples=False, ) # Tips and footer gr.Markdown(tips_md) gr.Markdown(footer_md) # ---------------------------------------------------------------------- # Launch with Custom Auth # ---------------------------------------------------------------------- if __name__ == "__main__": demo.launch( auth=gradio_auth, # Use our custom auth function auth_message="Please login to access the chat interface", server_name="0.0.0.0", ssr_mode=False, server_port=7860, show_error=True )