import gradio as gr from transformers import pipeline from collections import deque from time import time generator = pipeline( "text-generation", model="tiiuae/falcon-rw-1b", pad_token_id=50256 ) class Memory: def __init__(self, max_history=3): self.history = deque(maxlen=max_history) def add(self, user, bot): self.history.append(f"User: {user}\nAssistant: {bot}") def get(self): return "\n".join(self.history) memory = Memory() def chat(user_input, history): start = time() context = memory.get() prompt = ( "The following is a conversation between a helpful assistant and a user.\n" f"{context}\nUser: {user_input}\nAssistant:" ) try: output = generator( prompt, max_new_tokens=80, temperature=0.7, do_sample=True, top_p=0.9, repetition_penalty=1.1 ) full_text = output[0]["generated_text"] reply = full_text[len(prompt):].strip().split("\n")[0] memory.add(user_input, reply) return reply + f"\n(⏱ {time() - start:.2f}s)" except Exception as e: return f"⚠️ Error: {str(e)}" demo = gr.ChatInterface( fn=chat, title="🦅 Falcon Chat (Free CPU Space)", description="Open-source chatbot using Falcon-RW-1B. No API needed. Remembers 3 messages.", examples=["What's AI?", "Tell me a joke", "Who built you?"], cache_examples=False ) if __name__ == "__main__": demo.launch()