Spaces:

BeveledCube
/

bevelapi

Sleeping

App Files Files Community

BeveledCube commited on Jan 21, 2024

Commit

207c16a

verified ·

1 Parent(s): 638158f

Update main.py

Browse files

Files changed (1) hide show

main.py +40 -20

main.py CHANGED Viewed

@@ -5,7 +5,7 @@ from fastapi import FastAPI
 import os
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import torch
 app = FastAPI()
@@ -15,15 +15,14 @@ name = "microsoft/DialoGPT-small"
 # microsoft/DialoGPT-medium
 # microsoft/DialoGPT-large
-# PygmalionAI/pygmalion-350m
-# PygmalionAI/pygmalion-1.3b
-# PygmalionAI/pygmalion-6b
 # mistralai/Mixtral-8x7B-Instruct-v0.1
 # Load the Hugging Face GPT-2 model and tokenizer
-model = GPT2LMHeadModel.from_pretrained(name)
-tokenizer = GPT2Tokenizer.from_pretrained(name)
 class req(BaseModel):
   prompt: str
@@ -38,16 +37,37 @@ def read_root(data: req):
   print("Prompt:", data.prompt)
   print("Length:", data.length)
-  input_text = data.prompt
-  # Tokenize the input text
-  input_ids = tokenizer.encode(input_text, return_tensors="pt")
-  # Generate output using the model
-  output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
-  generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-  answer_data = { "answer": generated_text }
-  print("Answer:", generated_text)
-  return answer_data

 import os
+from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
 import torch
 app = FastAPI()
 # microsoft/DialoGPT-medium
 # microsoft/DialoGPT-large
 # mistralai/Mixtral-8x7B-Instruct-v0.1
 # Load the Hugging Face GPT-2 model and tokenizer
+model = AutoModelForCausalLM.from_pretrained(name)
+tokenizer = AutoTokenizer.from_pretrained(name)
+gpt2model = GPT2LMHeadModel.from_pretrained(name)
+gpt2tokenizer = GPT2Tokenizer.from_pretrained(name)
 class req(BaseModel):
   prompt: str
   print("Prompt:", data.prompt)
   print("Length:", data.length)
+  if name == "microsoft/DialoGPT-small" or name == "microsoft/DialoGPT-medium" or name == "microsoft/DialoGPT-large":
+    # tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+    # model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+    step = 1
+    # encode the new user input, add the eos_token and return a tensor in Pytorch
+    new_user_input_ids = tokenizer.encode(data.prompt + tokenizer.eos_token, return_tensors='pt')
+    # append the new user input tokens to the chat history
+    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
+    # generated a response while limiting the total chat history to 1000 tokens,
+    chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
+    generated_text = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
+    answer_data = { "answer": generated_text }
+    print("Answer:", generated_text)
+    return answer_data
+  else:
+    input_text = data.prompt
+    # Tokenize the input text
+    input_ids = gpt2tokenizer.encode(input_text, return_tensors="pt")
+    # Generate output using the model
+    output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
+    generated_text = gpt2tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    answer_data = { "answer": generated_text }
+    print("Answer:", generated_text)
+    return answer_data