--- datasets: - mzbac/function-calling-phi-3-format-v1.1 --- # Model Fine-tuned the Phi3 instruction model for function calling via MLX-LM using https://huggingface.co/datasets/mzbac/function-calling-phi-3-format-v1.1 # Usage ```python from transformers import AutoTokenizer, AutoModelForCausalLM import torch model_id = "mzbac/Phi-3-mini-4k-instruct-function-calling" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto", ) tool = { "name": "search_web", "description": "Perform a web search for a given search terms.", "parameter": { "type": "object", "properties": { "search_terms": { "type": "array", "items": {"type": "string"}, "description": "The search queries for which the search is performed.", "required": True, } }, }, } messages = [ { "role": "user", "content": f"You are a helpful assistant with access to the following functions. Use them if required - {str(tool)}", }, {"role": "user", "content": "Any news in Melbourne today, May 7, 2024?"}, ] input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ).to(model.device) terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|end|>")] outputs = model.generate( input_ids, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.1, ) response = outputs[0] print(tokenizer.decode(response)) # <|user|> You are a helpful assistant with access to the following functions. Use them if required - {'name': 'search_web', 'description': 'Perform a web search for a given search terms.', 'parameter': {'type': 'object', 'properties': {'search_terms': {'type': 'array', 'items': {'type': 'string'}, 'description': 'The search queries for which the search is performed.', 'required': True}}}}<|end|><|assistant|> # <|user|> Any news in Melbourne today, May 7, 2024?<|end|> # <|assistant|> {"name": "search_web", "arguments": {"search_terms": ["news", "Melbourne", "May 7, 2024"]}}<|end|> ``` # Training hyperparameters lora_config.yaml ```yaml # The path to the local model directory or Hugging Face repo. model: "microsoft/Phi-3-mini-4k-instruct" # Whether or not to train (boolean) train: true # Directory with {train, valid, test}.jsonl files data: "data" # The PRNG seed seed: 0 # Number of layers to fine-tune lora_layers: 32 # Minibatch size. batch_size: 1 # Iterations to train for. iters: 111000 # Number of validation batches, -1 uses the entire validation set. val_batches: -1 # Adam learning rate. learning_rate: 1e-6 # Number of training steps between loss reporting. steps_per_report: 10 # Number of training steps between validations. steps_per_eval: 200 # Load path to resume training with the given adapter weights. # resume_adapter_file: "adapters/adapters.safetensors" # Save/load path for the trained adapter weights. adapter_path: "adapters" # Save the model every N iterations. save_every: 1000 # Evaluate on the test set after training test: false # Number of test set batches, -1 uses the entire test set. test_batches: 100 # Maximum sequence length. max_seq_length: 4096 # Use gradient checkpointing to reduce memory use. grad_checkpoint: false # LoRA parameters can only be specified in a config file lora_parameters: # The layer keys to apply LoRA to. # These will be applied for the last lora_layers keys: ['mlp.down_proj','mlp.gate_up_proj','self_attn.qkv_proj','self_attn.o_proj'] rank: 128 alpha: 256 scale: 10.0 dropout: 0.05 ``` *** Quantization of Model [mzbac/Phi-3-mini-4k-instruct-function-calling](https://huggingface.co/mzbac/Phi-3-mini-4k-instruct-function-calling). Created using [llm-quantizer](https://github.com/Nold360/llm-quantizer) Pipeline