from transformers import AutoTokenizer, AutoModelForCausalLM from fastapi import FastAPI, Body import huggingface_hub huggingface_hub.login(username="shankarm08", password="cskrockz123") app = FastAPI() # Load pre-trained model and tokenizer tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B") model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B") @app.post("/generate") async def generate_text(input_text: str = Body(..., embed=True)): input_ids = tokenizer.encode(input_text, return_tensors='pt') output = model.generate(input_ids, max_length=50, num_beams=4) generated_text = tokenizer.decode(output[0], skip_special_tokens=True) return {"generated_text": generated_text} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)