# app.py import os os.environ["TRANSFORMERS_CACHE"] = "/tmp" from fastapi import FastAPI from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("papahawk/keya-560m") model = AutoModelForCausalLM.from_pretrained("papahawk/keya-560m") # Specify the model name model_name = "papahawk/keya-560m" # Check if the model has been downloaded if not os.path.exists(model_name): # If not, download the model tokenizer = AutoTokenizer.from_pretrained("papahawk/keya-560m") model = AutoModelForCausalLM.from_pretrained("papahawk/keya-560m") # Save the model and tokenizer locally tokenizer.save_pretrained(model_name) model.save_pretrained(model_name) else: # If the model has been downloaded, load it from the local file system tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True) model = AutoModelForCausalLM.from_pretrained(model_name, local_files_only=True) app = FastAPI() @app.get("/") def read_root(): return {"Hello": "World"} @app.post("/generate") def generate_text(prompt: Optional[str] = None): if prompt is None: with open('prompt.txt', 'r') as file: prompt = file.read() inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(inputs["input_ids"]) text = tokenizer.decode(outputs[0]) return {"generated_text": text}