import gradio as gr from gradio import ChatMessage from typing import Iterator, List import google.generativeai as genai import os genai.configure(api_key=os.environ.get("GEMINI_API_KEY")) system_instruction = "You are a helpful AI assistant." model = genai.GenerativeModel( "gemini-2.0-flash-thinking-exp-1219", system_instruction=system_instruction ) def add_user_message(user_msg: str, messages: List[ChatMessage]) -> List[ChatMessage]: """ Adds the user's message to the chat history. """ messages.append(ChatMessage(role="user", content=user_msg)) return messages def stream_gemini_response( user_message: str, chat_session: genai.ChatSession, messages: List[ChatMessage] ) -> Iterator[List[ChatMessage]]: """ Streams both thoughts and responses from the Gemini model using the chat session. Handles errors gracefully. """ try: # Generate response using the chat session for history management response = chat_session.send_message(user_message, stream=True) # Initialize buffers thought_buffer = "" response_buffer = "" thinking_complete = False # Add initial thinking message messages.append( ChatMessage( role="assistant", content="", metadata={"title": "⏳ Thinking: *The thoughts produced by the Gemini 2.0 Flash model are experimental*"} ) ) yield messages for chunk in response: if not hasattr(chunk, 'candidates') or not chunk.candidates: continue # Skip invalid chunks parts = chunk.candidates[0].content.parts if not parts: continue current_chunk = parts[0].text if len(parts) == 2 and not thinking_complete: # Complete thought and start response thought_buffer += current_chunk messages[-1] = ChatMessage( role="assistant", content=thought_buffer, metadata={"title": "⏳ Thinking: *The thoughts produced by the Gemini 2.0 Flash model are experimental*"} ) # Add response message messages.append( ChatMessage( role="assistant", content=parts[1].text ) ) thinking_complete = True elif thinking_complete: # Continue streaming response response_buffer += current_chunk messages[-1] = ChatMessage( role="assistant", content=response_buffer ) else: # Continue streaming thoughts thought_buffer += current_chunk messages[-1] = ChatMessage( role="assistant", content=thought_buffer, metadata={"title": "⏳ Thinking: *The thoughts produced by the Gemini 2.0 Flash model are experimental*"} ) yield messages except Exception as e: # Handle errors by appending an error message error_msg = f"Error: {str(e)}" messages.append( ChatMessage( role="assistant", content=error_msg ) ) yield messages def reset_chat() -> tuple[List[ChatMessage], genai.ChatSession]: """ Resets the chat history and starts a new chat session. """ return [], model.start_chat() with gr.Blocks() as demo: gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭") chatbot = gr.Chatbot( type="messages", label="Gemini 2.0 'Thinking' Chatbot", render_markdown=True, height=500 ) input_box = gr.Textbox( lines=1, label="Chat Message", placeholder="Type your message here and press Enter..." ) clear_btn = gr.Button("Clear Chat") # State variables msg_store = gr.State("") # Temporary store for user message chat_session = gr.State(model.start_chat()) # Chat session for history # Event handlers input_box.submit( fn=lambda msg: msg, # Store the message inputs=[input_box], outputs=[msg_store], queue=False ).then( fn=lambda: "", # Clear the input box outputs=[input_box], queue=False ).then( fn=add_user_message, # Add user message to chatbot inputs=[msg_store, chatbot], outputs=[chatbot], queue=False ).then( fn=stream_gemini_response, # Stream the response inputs=[msg_store, chat_session, chatbot], outputs=[chatbot] ) # Clear button handler clear_btn.click( fn=reset_chat, outputs=[chatbot, chat_session] ) demo.launch()