Edit model card

SUMMARY

Just a model using to learn Fine Tuning of 'DialoGPT-medium'

  • on a self made datasets
  • on a self made special tokens
  • on a multiple fine tuned with ~30K dataset (in progress mode)

If interested in how I got to this point and how I created the datasets you can visit:
Crafting GPT2 for Personalized AI-Preparing Data the Long Way

DECLARING NEW SPECIAL TOKENS

special_tokens_dict = {
    'eos_token': '<|STOP|>',
    'bos_token': '<|STOP|>',
    'pad_token': '<|PAD|>',
    'additional_special_tokens': ['<|BEGIN_QUERY|>', '<|BEGIN_QUERY|>', 
                                  '<|BEGIN_ANALYSIS|>', '<|END_ANALYSIS|>',
                                  '<|BEGIN_RESPONSE|>', '<|END_RESPONSE|>',
                                  '<|BEGIN_SENTIMENT|>', '<|END_SENTIMENT|>',
                                  '<|BEGIN_CLASSIFICATION|>', '<|END_CLASSIFICATION|>',]
}

tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))

tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids('<|STOP|>')
tokenizer.bos_token_id = tokenizer.convert_tokens_to_ids('<|STOP|>')
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids('<|PAD|>')

The order of tokens is as follows:

def combine_text(user_prompt, analysis, sentiment, new_response, classification):
    user_q = f"<|STOP|><|BEGIN_QUERY|>{user_prompt}<|END_QUERY|>"
    analysis = f"<|BEGIN_ANALYSIS|>{analysis}<|END_ANALYSIS|>"
    new_response = f"<|BEGIN_RESPONSE|>{new_response}<|END_RESPONSE|>"
    sentiment = f"<|BEGIN_SENTIMENT|>Sentiment: {sentiment}<|END_SENTIMENT|><|STOP|>"
    classification = f"<|BEGIN_CLASSIFICATION|>{classification}<|END_CLASSIFICATION|>"
    return user_q + analysis + new_response + classification + sentiment

INFERANCING

I am currently testing two ways, if anyone knows a better one, please let me know!

import torch
from transformers import AutoModelForCausalLLM, AutoTokenizer

models_folder = "Deeokay/DialoGPT-special-tokens-medium4"

model = AutoModelForCausalLM.from_pretrained(models_folder)
tokenizer = AutoTokenizer.from_pretrained(models_folder)

# Device configuration <<change as needed>> 
device = torch.device("cpu")
model.to(device)

OPTION 1 INFERFENCE

import time

class Stopwatch:
    def __init__(self):
        self.start_time = None
        self.end_time = None

    def start(self):
        self.start_time = time.time()

    def stop(self):
        self.end_time = time.time()

    def elapsed_time(self):
        if self.start_time is None:
            return "Stopwatch hasn't been started"
        if self.end_time is None:
            return "Stopwatch hasn't been stopped"
        return self.end_time - self.start_time

stopwatch1 = Stopwatch()

def generate_response(input_text, max_length=250):
    
    stopwatch1.start()
    
    # Prepare the input
    # input_text = f"<|BEGIN_QUERY|>{input_text}<|END_QUERY|><|BEGIN_ANALYSIS|>{input_text}<|END_ANALYSIS|><|BEGIN_RESPONSE|>"
    input_text = f"<|BEGIN_QUERY|>{input_text}<|END_QUERY|><|BEGIN_ANALYSIS|>"
    
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

    # Create attention mask
    attention_mask = torch.ones_like(input_ids).to(device)
    
    # Generate
    output = model.generate(
        input_ids,
        max_new_tokens=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        attention_mask=attention_mask,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.convert_tokens_to_ids('<|STOP|>'),
    )
    
    stopwatch1.stop()
    return tokenizer.decode(output[0], skip_special_tokens=False)

OPTION 2 INFERNCE

import time

class Stopwatch:
    def __init__(self):
        self.start_time = None
        self.end_time = None

    def start(self):
        self.start_time = time.time()

    def stop(self):
        self.end_time = time.time()

    def elapsed_time(self):
        if self.start_time is None:
            return "Stopwatch hasn't been started"
        if self.end_time is None:
            return "Stopwatch hasn't been stopped"
        return self.end_time - self.start_time

stopwatch2 = Stopwatch()

def generate_response2(input_text, max_length=250):
    
    stopwatch2.start()
    
    # Prepare the input
    # input_text = f"<|BEGIN_QUERY|>{input_text}<|END_QUERY|><|BEGIN_ANALYSIS|>{input_text}<|END_ANALYSIS|><|BEGIN_RESPONSE|>"
    input_text = f"<|BEGIN_QUERY|>{input_text}<|END_QUERY|><|BEGIN_ANALYSIS|>"
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

    # Create attention mask
    attention_mask = torch.ones_like(input_ids).to(device)

    # # 2ND OPTION FOR : Generate
    output = model.generate(
        input_ids,
        max_new_tokens=max_length,
        attention_mask=attention_mask,
        do_sample=True,
        temperature=0.4,
        top_k=60,
        no_repeat_ngram_size=2,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    
    stopwatch2.stop()
    return tokenizer.decode(output[0], skip_special_tokens=False)

DECODING ANSWER

When I need just the response

def decode(text):
    full_text = text
    
    # Extract the response part
    start_token = "<|BEGIN_RESPONSE|>"
    end_token = "<|END_RESPONSE|>"
    start_idx = full_text.find(start_token)
    end_idx = full_text.find(end_token)
    
    if start_idx != -1 and end_idx != -1:
        response = full_text[start_idx + len(start_token):end_idx].strip()
    else:
        response = full_text.strip()
    
    return response

MY SETUP

I use the stopwatch to time the responses and I use both inference to see the difference

input_text = "Who is Steve Jobs and what was contribution?"
response1_full = generate_response(input_text)
#response1 = decode(response1_full)
print(f"Input: {input_text}")
print("=======================================")
print(f"Response1: {response1_full}")
elapsed1 = stopwatch1.elapsed_time()
print(f"Process took {elapsed1:.4f} seconds")
print("=======================================")
response2_full = generate_response2(input_text)
#response2 = decode(response2_full)
print(f"Response2: {response2_full}")
elapsed2 = stopwatch2.elapsed_time()
print(f"Process took {elapsed2:.4f} seconds")
print("=======================================")

Out-of-Scope Use

Well everything that has a factual data.. trust at your own risk!

Never tested on mathamatical knowledge.

I quite enjoy how the response feels closer to what I had in mind..

Downloads last month
9
Safetensors
Model size
355M params
Tensor type
F32
·
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.