import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline class SimpleTransformersLLM: """A simple wrapper for Hugging Face Transformers models.""" def __init__(self, model_name="google/flan-t5-small"): """Initialize with a small model that works on CPU.""" try: self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.pipe = pipeline( "text-generation", model=model_name, tokenizer=self.tokenizer, max_length=512, device_map="auto" ) except Exception as e: print(f"Error initializing model: {e}") self.pipe = None def complete(self, prompt): """Complete a prompt with the model.""" class Response: def __init__(self, text): self.text = text if self.pipe is None: return Response("Model initialization failed.") try: result = self.pipe(prompt, max_length=len(prompt) + 200, do_sample=True) generated_text = result[0]["generated_text"] # Extract only the new text (not including the prompt) response_text = generated_text[len(prompt):].strip() if not response_text: response_text = "I couldn't generate a proper response." return Response(response_text) except Exception as e: print(f"Error generating response: {e}") return Response(f"Error generating response: {str(e)}") def setup_llm(): """Set up a simple LLM that doesn't require API keys.""" try: # Try with a very small model first return SimpleTransformersLLM("google/flan-t5-small") except Exception as e: print(f"Error setting up LLM: {e}") # Fallback to dummy LLM class DummyLLM: def complete(self, prompt): class Response: def __init__(self, text): self.text = text return Response("This is a dummy response. The actual model couldn't be loaded.") return DummyLLM()