jeffreymeetkai commited on
Commit
2e54059
1 Parent(s): 7fae8ba

add respone parsing remove code, update README

Browse files
Files changed (4) hide show
  1. README.md +38 -0
  2. config.json +4 -1
  3. modeling_functionary.py +126 -0
  4. tokenizer_config.json +1 -1
README.md CHANGED
@@ -19,6 +19,44 @@ The model determines when to execute functions, whether in parallel or serially,
19
  - Truly one of the best open-source alternative to GPT-4
20
  - Support code interpreter
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  ## Prompt Template
23
 
24
  We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.
 
19
  - Truly one of the best open-source alternative to GPT-4
20
  - Support code interpreter
21
 
22
+ ## How to Get Started
23
+
24
+ We provide custom
25
+
26
+ ```python
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer
28
+
29
+ tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-small-v2.5", trust_remote_code=True)
30
+ model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-small-v2.5", device_map="auto", trust_remote_code=True)
31
+
32
+ tools = [
33
+ {
34
+ "type": "function",
35
+ "function": {
36
+ "name": "get_current_weather",
37
+ "description": "Get the current weather",
38
+ "parameters": {
39
+ "type": "object",
40
+ "properties": {
41
+ "location": {
42
+ "type": "string",
43
+ "description": "The city and state, e.g. San Francisco, CA"
44
+ }
45
+ },
46
+ "required": ["location"]
47
+ }
48
+ }
49
+ }
50
+ ]
51
+ messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}]
52
+
53
+ final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False)
54
+ tokenizer.padding_side = "left"
55
+ inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")
56
+ pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer)
57
+ print(tokenizer.decode(pred.cpu()[0]))
58
+ ```
59
+
60
  ## Prompt Template
61
 
62
  We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.
config.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
- "LlamaForCausalLM"
5
  ],
 
 
 
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
 
1
  {
2
  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
+ "FunctionaryForCausalLM"
5
  ],
6
+ "auto_map": {
7
+ "AutoModelForCausalLM": "modeling_functionary.FunctionaryForCausalLM"
8
+ },
9
  "attention_bias": false,
10
  "attention_dropout": 0.0,
11
  "bos_token_id": 128000,
modeling_functionary.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ """PyTorch LLaMA model."""
21
+
22
+ import json
23
+ import re
24
+ from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
25
+
26
+ import torch
27
+ import torch.utils.checkpoint
28
+
29
+ from transformers.generation.configuration_utils import GenerationConfig
30
+ from transformers.generation.logits_process import LogitsProcessorList
31
+ from transformers.generation.stopping_criteria import StoppingCriteriaList
32
+ from transformers.generation.utils import (
33
+ GenerateBeamDecoderOnlyOutput,
34
+ GenerateBeamEncoderDecoderOutput,
35
+ GenerateDecoderOnlyOutput,
36
+ GenerateEncoderDecoderOutput
37
+ )
38
+ from transformers.models.llama.modeling_llama import LlamaForCausalLM
39
+ from transformers.utils import logging
40
+
41
+
42
+ if TYPE_CHECKING:
43
+ from transformers.modeling_utils import PreTrainedModel
44
+ from transformers.generation.streamers import BaseStreamer
45
+
46
+ logger = logging.get_logger(__name__)
47
+
48
+ GenerateNonBeamOutput = Union[GenerateDecoderOnlyOutput, GenerateEncoderDecoderOutput]
49
+ GenerateBeamOutput = Union[GenerateBeamDecoderOnlyOutput, GenerateBeamEncoderDecoderOutput]
50
+ GenerateOutput = Union[GenerateNonBeamOutput, GenerateBeamOutput]
51
+
52
+
53
+ class FunctionaryForCausalLM(LlamaForCausalLM):
54
+
55
+ def generate_tool_use(
56
+ self,
57
+ inputs: Optional[torch.Tensor] = None,
58
+ generation_config: Optional[GenerationConfig] = None,
59
+ logits_processor: Optional[LogitsProcessorList] = None,
60
+ stopping_criteria: Optional[StoppingCriteriaList] = None,
61
+ prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
62
+ synced_gpus: Optional[bool] = None,
63
+ assistant_model: Optional["PreTrainedModel"] = None,
64
+ streamer: Optional["BaseStreamer"] = None,
65
+ negative_prompt_ids: Optional[torch.Tensor] = None,
66
+ negative_prompt_attention_mask: Optional[torch.Tensor] = None,
67
+ **kwargs,
68
+ ) -> Union[GenerateOutput, torch.LongTensor]:
69
+
70
+ results = self.generate(
71
+ inputs=inputs,
72
+ generation_config=generation_config,
73
+ logits_processor=logits_processor,
74
+ stopping_criteria=stopping_criteria,
75
+ prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
76
+ synced_gpus=synced_gpus,
77
+ assistant_model=assistant_model,
78
+ streamer=streamer,
79
+ negative_prompt_ids=negative_prompt_ids,
80
+ negative_prompt_attention_mask=negative_prompt_attention_mask,
81
+ **kwargs,
82
+ )
83
+
84
+ tokenizer = kwargs.pop("tokenizer", None) # Pull this out first, we use it to parse raw output
85
+ input_ids = kwargs.pop("input_ids")
86
+ function_call_token = "<|reserved_special_token_249|>"
87
+
88
+ correct_results = []
89
+ for input_id, result in zip(input_ids, results):
90
+ final_output_json = {"role": "assistant", "content": None, "tool_calls": None}
91
+ tool_calls = []
92
+ raw_output_str = tokenizer.decode(result[len(input_id):].cpu())
93
+ has_text = False if raw_output_str.startswith(function_call_token) else True
94
+ chunks = raw_output_str.split(function_call_token)
95
+ for i, chunk in enumerate(chunks):
96
+ if len(chunk) == 0:
97
+ continue
98
+
99
+ chunk = chunk.replace(tokenizer.pad_token, "")
100
+ if i == 0 and has_text is not False:
101
+ final_output_json["content"] = chunk.strip[:-len("<|eot_id|>")] if chunk.endswith("<|eot_id|>") else chunk
102
+ else:
103
+ tool_calls.append(
104
+ {
105
+ "name": chunk[: chunk.index("\n{")],
106
+ "arguments": chunk[chunk.index("\n{") + 1: -len("<|eot_id|>")] if chunk.endswith("<|eot_id|>") else chunk[chunk.index("\n{") + 1:]
107
+ }
108
+ )
109
+ if len(tool_calls) > 0:
110
+ final_output_json["tool_calls"] = tool_calls
111
+ final_output_str = json.dumps(final_output_json, indent=4)
112
+ final_output_ids = tokenizer(final_output_str, add_special_tokens=False)["input_ids"]
113
+ correct_results.append(
114
+ torch.cat(
115
+ (result[:len(input_id)].cpu(), torch.tensor(final_output_ids))
116
+ )
117
+ )
118
+ max_len = max([tensor.shape[0] for tensor in correct_results])
119
+ correct_results = [
120
+ torch.nn.functional.pad(
121
+ correct_result, (0, max_len - correct_result.shape[0]), value=tokenizer.eos_token_id
122
+ ) for correct_result in correct_results
123
+ ]
124
+ correct_results = torch.stack(correct_results)
125
+
126
+ return correct_results
tokenizer_config.json CHANGED
@@ -2050,7 +2050,7 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|end_of_text|>",
2056
  "legacy": true,
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|end_of_text|>",
2056
  "legacy": true,