File size: 2,526 Bytes
fe1089d
 
 
b0721f8
fe1089d
 
 
 
a597c76
fe1089d
 
2492536
b0721f8
 
 
c7e16d0
1f063be
c7e16d0
 
 
 
b0721f8
fe1089d
 
2492536
b0721f8
fe1089d
b0721f8
 
 
 
fe1089d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7e16d0
 
2492536
fe1089d
2492536
 
c7e16d0
fe1089d
 
2492536
fe1089d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# GODEL model module for chat interaction and model instance control

# external imports
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig

# internal imports
from utils import modelling as mdl

# global model and tokenizer instance (created on initial build)
TOKENIZER = AutoTokenizer.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq")
MODEL = AutoModelForSeq2SeqLM.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq")


# model config definition
CONFIG = GenerationConfig.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq")
base_config_dict = {
    "max_new_tokens": 64,
    "min_length": 8,
    "top_p": 0.9,
    "do_sample": True,
}
CONFIG.update(**base_config_dict)


# function to (re) set config
def set_config(config_dict: dict):

    # if config dict is not given, set to default
    if config_dict == {}:
        config_dict = base_config_dict
    CONFIG.update(**config_dict)


# formatting class to formatting input for the model
# CREDIT: Adapted from official interference example on Huggingface
## see https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq
def format_prompt(message: str, history: list, system_prompt: str, knowledge: str = ""):
    # user input prompt initialization
    prompt = ""

    # limits the prompt elements to the maximum token count
    message, history, system_prompt, knowledge = mdl.prompt_limiter(
        TOKENIZER, message, history, system_prompt, knowledge
    )

    # adds knowledge text if not empty
    if knowledge != "":
        knowledge = "[KNOWLEDGE] " + knowledge

    # adds conversation history to the prompt
    for conversation in history:
        prompt += f"EOS {conversation[0]} EOS {conversation[1]}"

    # adds the message to the prompt
    prompt += f" {message}"
    # combines the entire prompt
    full_prompt = f"{system_prompt} [CONTEXT] {prompt} {knowledge}"

    # returns the formatted prompt
    return full_prompt


# response class calling the model and returning the model output message
# CREDIT: Copied from official interference example on Huggingface
## see https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq
def respond(prompt):
    set_config({})

    # tokenizing input string
    input_ids = TOKENIZER(f"{prompt}", return_tensors="pt").input_ids

    # generating using config and decoding output
    outputs = MODEL.generate(input_ids, generation_config=CONFIG)
    output = TOKENIZER.decode(outputs[0], skip_special_tokens=True)

    # returns the model output string
    return output