from inference import Inference | |
import os | |
model_path = os.getenv("MODEL_PATH", "saved_model/pytorch_model.bin") | |
tokenizer_path = os.getenv("TOKENIZER_PATH", "saved_tokenizer") | |
inference = Inference(model_path, tokenizer_path) | |
def handler(event, context): | |
prompt = event["data"]["prompt"] | |
max_length = event["data"].get("max_length", 100) | |
response = inference.predict(prompt, max_length) | |
return {"response": response} | |