"""Model hosted on Hugging face. Based on: https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo """ from fastapi import FastAPI, Request from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from transformers import T5Tokenizer, T5ForConditionalGeneration # import gpt4free # from gpt4free import Provider, forefront token_size_limit = None # FROM: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+Thomas%21+How+are+you%3F # LAST USED tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill") model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill") # tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-1B-distill") # model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-1B-distill") # token_size_limit = 128 # T5 model can use "any" sequence lenghth, but memory usage is O(L^2). # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small") # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small") # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base") # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base") # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large") # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large") token_size_limit = 512 # Too large for 16GB # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl") # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl") app = FastAPI() # { msg: string, temperature: float, max_length: number } @app.post('/reply') async def Reply(req: Request): request = await req.json() msg = request.get('msg') print(f'MSG: {msg}') # Hugging face input_ids = tokenizer(msg, return_tensors='pt').input_ids # .to('cuda') output = model.generate( input_ids[:, -token_size_limit:], do_sample=True, temperature=request.get('temperature', 0.9), max_length=request.get('max_length', 100), ) reply = tokenizer.batch_decode(output)[0] # It doesn't really work. # gpt4free # usage theb # reply = gpt4free.Completion.create(Provider.Theb, prompt=msg) print(f'REPLY: {reply}') return {'reply': reply} @app.get("/") def read_root(): return {"Hello": "World!"}