import uvicorn from fastapi import FastAPI from transformers import AutoModelForCausalLM, AutoTokenizer app = FastAPI() model_name = 'facebook/incoder-1B' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, low_cpu_mem_usage=True) print('load ok') @app.get("/") def read_root(input_text, max_length, top_p, top_k, num_beams, temperature, repetition_penalty): inpt = tokenizer.encode(input_text, return_tensors="pt") out = model.generate(inpt, max_length=int(max_length), top_p=float(top_p), top_k=float(top_k), temperature=float(temperature), num_beams=int(num_beams), repetition_penalty=float(repetition_penalty)) res = tokenizer.decode(out[0]) return {"text": res}