from io import BytesIO from flask import Flask, jsonify, request from transformers import GPT2Tokenizer, GPT2LMHeadModel # Load the fine-tuned model and tokenizer model_path = "gpt2" tokenizer = GPT2Tokenizer.from_pretrained(model_path) model = GPT2LMHeadModel.from_pretrained(model_path) def infer_title(input): input_text = "Q: " + input + " A:" input_ids = tokenizer.encode(input_text, return_tensors='pt') output = model.generate(input_ids, max_length=50, num_return_sequences=1) response = tokenizer.decode(output[0], skip_special_tokens=True) return response app = Flask(__name__) @app.route("/") def endpoint(): input = request.args.get("input") output = infer_title(input) return jsonify({"output": output}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)