Edit model card

What is this

This is a deepseek coder 7b model trained to predict commit messages for a diff.

Languages trained on:

LANGS = [
    "Python",
    "Rust",
    "JavaScript",
    "Java",
    "Go",
    "C++",
    "C#",
    "Ruby",
    "PHP",
    "TypeScript",
    "C",
    "Scala",
    "Swift",
    "Kotlin",
    "Objective-C",
    "Perl",
    "Haskell",
    "Bash",
    "Sh",
    "Lua",
    "R",
    "Julia",
]

How to prompt:

import difflib
class NDiff:
    def __init__(self, s1, s2):
        self.s1 = s1
        self.s2 = s2
        self.diff = difflib.ndiff(s1.split("\n"), s2.split("\n"))

    def __str__(self):
        return "\n".join([l for l in self.diff if l[0] != "?"])

    def str_colored(self):
        import colored

        buf = ""
        for l in self.diff:
            if l[0] == "?":
                continue
            if l[0] == "-":
                buf += colored.stylize(l, colored.fg("red"))
            elif l[0] == "+":
                buf += colored.stylize(l, colored.fg("green"))
            else:
                buf += l
            buf += "\n"
        return buf

    def num_removed(self):
        return len([l for l in self.diff if l[0] == "-"])

    def num_added(self):
        return len([l for l in self.diff if l[0] == "+"])

    def __repr__(self):
        return self.__str__()

def format_prompt(old, new):
    diff_header = "<diff>"
    instr_header = "<commit_message>"
    diff = str(NDiff(old, new))
    return f"{diff_header}\n{diff}\n{instr_header}\n"

def gen(old, new, max_new_tokens=200, temperature=0.45, top_p=0.90):
    prompt = format_prompt(old, new)
    toks = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
    outs = model.generate(toks, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_p=top_p)
    return [tokenizer.decode(out[len(toks[0]):], skip_special_tokens=True) for out in outs]

use the "gen" function with the old and new code

Example:

- import datasets
- from pathlib import Path
  from code_editing.models import CodeLlamaEditModel, LlamaChatModel, EditModel, EditCommand, ChatAdaptorEditModel, OctoCoderChatModel, codellama_edit_prompt_diff, apply_rel_diff_trim, OpenAIChatModel, StarCoderCommitEditModel
  from code_editing.humanevalpack import batch_prompts_from_example
  from code_editing.utils import gunzip_json_write
  from typing import List, Callable
  from tqdm import tqdm
  
  
  # NOTE: this is the factory for each model type. to add a new model type, add a new case here
  # and implement it in models.py. Also, add a new case in the argument parser below.
- def model_factory(model_type: str, quantize=False, num_gpus=1) -> Callable[[str], EditModel]:
+ def model_factory(
+         model_type: str,
+         quantize=False,
+         num_gpus=1,
+         system_supported=True,
+ ) -> Callable[[str], EditModel]:
      if model_type == "codellama" or model_type == "deepseek":
          return CodeLlamaEditModel
      elif model_type == "starcoder":
          return StarCoderCommitEditModel
      elif model_type == "codellama-diff":
          return (lambda path: CodeLlamaEditModel(path, prompt_format=codellama_edit_prompt_diff, post_process=apply_rel_diff_trim))
      elif model_type == "openai":
          return (lambda path: ChatAdaptorEditModel(OpenAIChatModel(path)))
      elif model_type == "codellama-chat":
-         return (lambda path: ChatAdaptorEditModel(LlamaChatModel(path, quantization=quantize, num_gpus=num_gpus)))
+         return (lambda path: ChatAdaptorEditModel(LlamaChatModel(path, quantization=quantize, num_gpus=num_gpus, system_supported=system_supported)))
      elif model_type == "octocoder":
          return (lambda path: ChatAdaptorEditModel(OctoCoderChatModel(path, quantization=quantize, num_gpus=num_gpus)))
      else:
          raise ValueError(f"Unknown model type: {model_type}")
  
  def complete_problem(example: EditCommand, model: EditModel, batch_size: int, completion_limit: int, **kwargs) -> List[str]:
      batches = batch_prompts_from_example(example, batch_size, completion_limit)
  
      completions = []
      for batch in batches:
          resps = model.generate(batch, **kwargs)
          for resp in resps:
              completions.append(resp["content"])
  
      return completions

Produced:

Add system_supported argument to model_factory
Downloads last month
6
Safetensors
Model size
6.74B params
Tensor type
BF16
·
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Dataset used to train cassanof/CommitMessageBackwards