File size: 2,467 Bytes
f7537f3 b12166e f7537f3 6765159 878bac0 6765159 5ca28fd 6765159 f7537f3 6765159 5ca28fd 6f1cdd7 6765159 f7537f3 6765159 f7537f3 6765159 f7537f3 6f1cdd7 6765159 f7537f3 52291b3 6765159 b12166e da6d98b 6765159 6f1cdd7 6765159 b12166e 6765159 3b6ab63 6765159 b12166e 6765159 52291b3 5ca28fd b12166e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Load the custom model and tokenizer
model_path = 'redael/model_udc'
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path)
# Check if CUDA is available and use GPU if possible, enable FP16 precision
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
if device.type == 'cuda':
model = model.half() # Use FP16 precision
def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=2.0):
# Prepare the prompt
prompt = f"User: {prompt}\nAssistant:"
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
outputs = model.generate(
inputs['input_ids'],
max_length=max_length,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
num_beams=num_beams, # Use a lower number of beams
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty, # Increased repetition penalty
early_stopping=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Post-processing to clean up the response
response = response.split("Assistant:")[-1].strip()
response_lines = response.split('\n')
clean_response = []
for line in response_lines:
if "User:" not in line and "Assistant:" not in line:
clean_response.append(line)
response = ' '.join(clean_response)
return response.strip()
def respond(message, history: list[tuple[str, str]]):
# Prepare the prompt from the history and the new message
system_message = "You are a friendly chatbot."
conversation = system_message + "\n"
for user_message, assistant_response in history:
conversation += f"User: {user_message}\nAssistant: {assistant_response}\n"
conversation += f"User: {message}\nAssistant:"
# Fixed values for generation parameters
max_tokens = 100 # Adjusted max tokens
temperature = 0.7
top_p = 0.9
response = generate_response(conversation, model, tokenizer, max_length=max_tokens, temperature=temperature, top_p=top_p)
return response
# Gradio Chat Interface without customizable inputs
demo = gr.ChatInterface(
respond
)
if __name__ == "__main__":
demo.launch()
|