from flask import Flask, request, jsonify import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer import transformers app = Flask(__name) @app.route('/api/generate_response', methods=['POST']) def generate_response(): data = request.get_json() prompt = data.get('prompt') token_limit = data.get('token_limit') # Your model loading and inference code here (from the code you provided) # ... responses = mistral_model(prompt, token_limit) return jsonify({"responses": responses}) if __name__ == "__main__": app.run()