Spaces:

mamiksik
/

commit-message-generator

Running

App Files Files Community

mamiksik commited on Jan 20, 2023

Commit

83b862c

1 Parent(s): 6af1ce6

Add t5predictor to app.py

Browse files

Files changed (2) hide show

app.py +89 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import re
+import gradio as gr
+import torch
+from transformers import T5ForConditionalGeneration, RobertaTokenizer
+tokenizer = RobertaTokenizer.from_pretrained("mamiksik/CommitPredictorT5PL", revision="fb08d01")
+model = T5ForConditionalGeneration.from_pretrained("mamiksik/CommitPredictorT5PL", revision="fb08d01")
+def parse_files(accumulator: list[str], patch: str):
+    lines = patch.splitlines()
+    filename_before = None
+    for line in lines:
+        if line.startswith("index") or line.startswith("diff"):
+            continue
+        if line.startswith("---"):
+            filename_before = line.split(" ", 1)[1][1:]
+            continue
+        if line.startswith("+++"):
+            filename_after = line.split(" ", 1)[1][1:]
+            if filename_before == filename_after:
+                accumulator.append(f"<ide><path>{filename_before}")
+            else:
+                accumulator.append(f"<add><path>{filename_after}")
+                accumulator.append(f"<del><path>{filename_before}")
+            continue
+        line = re.sub("@@[^@@]*@@", "", line)
+        if len(line) == 0:
+            continue
+        if line[0] == "+":
+            line = line.replace("+", "<add>", 1)
+        elif line[0] == "-":
+            line = line.replace("-", "<del>", 1)
+        else:
+            line = f"<ide>{line}"
+        accumulator.append(line)
+    return accumulator
+def predict(patch, max_length, min_length, num_beams, prediction_count):
+    accumulator = []
+    parse_files(accumulator, patch)
+    input_text = '\n'.join(accumulator)
+    with torch.no_grad():
+        token_count = tokenizer(input_text, return_tensors="pt").input_ids.shape[1]
+        input_ids = tokenizer(
+            input_text,
+            truncation=True,
+            padding=True,
+            return_tensors="pt",
+        ).input_ids
+        outputs = model.generate(
+            input_ids,
+            max_length=max_length,
+            min_length=min_length,
+            num_beams=num_beams,
+            num_return_sequences=prediction_count,
+        )
+    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return token_count, '\n'.join(accumulator), {k: 0 for k in result}
+iface = gr.Interface(fn=predict, inputs=[
+    gr.Textbox(label="Patch (as generated by git diff)"),
+    gr.Slider(1, 128, value=20, label="Max message length"),
+    gr.Slider(1, 128, value=5, label="Min message length"),
+    gr.Slider(1, 10, value=7, label="Number of beams"),
+    gr.Slider(1, 15, value=5, label="Number of predictions"),
+], outputs=[
+    gr.Textbox(label="Token count"),
+    gr.Textbox(label="Parsed patch"),
+    gr.Label(label="Predictions")
+ ])
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio~=3.16.2
+transformers~=4.25.1
+torch~=1.13.1