Improve model card: Add pipeline tag, library name, and paper link
Browse filesThis PR enhances the model card for `LLM4Binary/llm4decompile-6.7b-v2` by:
- Adding the `pipeline_tag: text-generation` to ensure better discoverability on the Hugging Face Hub.
- Including `library_name: transformers` to enable the automated "How to use in Transformers" widget, providing users with a quick and easy way to get started with the model.
- Adding a prominent link to the official Hugging Face paper page, [Decompile-Bench: Million-Scale Binary-Source Function Pairs for Real-World Binary Decompilation](https://huggingface.co/papers/2505.12668), at the beginning of the model card for improved accessibility.
These updates improve the model's documentation and user experience on the Hugging Face Hub.
README.md
CHANGED
|
@@ -3,8 +3,12 @@ license: mit
|
|
| 3 |
tags:
|
| 4 |
- decompile
|
| 5 |
- binary
|
|
|
|
|
|
|
| 6 |
---
|
| 7 |
|
|
|
|
|
|
|
| 8 |
### 1. Introduction of LLM4Decompile
|
| 9 |
|
| 10 |
LLM4Decompile aims to decompile x86 assembly instructions into C. The newly released V2 series are trained with a larger dataset (2B tokens) and a maximum token length of 4,096, with remarkable performance (up to 100% improvement) compared to the previous model.
|
|
@@ -53,6 +57,7 @@ Note: **Replace** func0 with the function name you want to decompile.
|
|
| 53 |
import os
|
| 54 |
import subprocess
|
| 55 |
from tqdm import tqdm,trange
|
|
|
|
| 56 |
|
| 57 |
OPT = ["O0", "O1", "O2", "O3"]
|
| 58 |
timeout_duration = 10
|
|
@@ -92,7 +97,8 @@ with tempfile.TemporaryDirectory() as temp_dir:
|
|
| 92 |
c_decompile = f.read()
|
| 93 |
c_func = []
|
| 94 |
flag = 0
|
| 95 |
-
for line in c_decompile.split('
|
|
|
|
| 96 |
if "Function: func0" in line:#**Replace** func0 with the function name you want to decompile.
|
| 97 |
flag = 1
|
| 98 |
c_func.append(line)
|
|
@@ -108,10 +114,14 @@ with tempfile.TemporaryDirectory() as temp_dir:
|
|
| 108 |
if 'func0' in c_func[idx_tmp]:
|
| 109 |
break
|
| 110 |
c_func = c_func[idx_tmp:]
|
| 111 |
-
input_asm = '
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
input_asm_prompt = before+input_asm.strip()+after
|
| 116 |
with open(fileName +'_' + opt +'.pseudo','w',encoding='utf-8') as f:
|
| 117 |
f.write(input_asm_prompt)
|
|
@@ -162,8 +172,10 @@ c_func_decompile = tokenizer.decode(outputs[0][len(inputs[0]):-1])
|
|
| 162 |
with open(fileName +'_' + OPT[0] +'.pseudo','r') as f:#original file
|
| 163 |
func = f.read()
|
| 164 |
|
| 165 |
-
print(f'pseudo function
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
|
| 168 |
```
|
| 169 |
|
|
@@ -172,4 +184,4 @@ This code repository is licensed under the MIT License.
|
|
| 172 |
|
| 173 |
### 5. Contact
|
| 174 |
|
| 175 |
-
If you have any questions, please raise an issue.
|
|
|
|
| 3 |
tags:
|
| 4 |
- decompile
|
| 5 |
- binary
|
| 6 |
+
pipeline_tag: text-generation
|
| 7 |
+
library_name: transformers
|
| 8 |
---
|
| 9 |
|
| 10 |
+
This repository contains the `LLM4Binary/llm4decompile-6.7b-v2` model. This model is associated with the paper [Decompile-Bench: Million-Scale Binary-Source Function Pairs for Real-World Binary Decompilation](https://huggingface.co/papers/2505.12668).
|
| 11 |
+
|
| 12 |
### 1. Introduction of LLM4Decompile
|
| 13 |
|
| 14 |
LLM4Decompile aims to decompile x86 assembly instructions into C. The newly released V2 series are trained with a larger dataset (2B tokens) and a maximum token length of 4,096, with remarkable performance (up to 100% improvement) compared to the previous model.
|
|
|
|
| 57 |
import os
|
| 58 |
import subprocess
|
| 59 |
from tqdm import tqdm,trange
|
| 60 |
+
import tempfile
|
| 61 |
|
| 62 |
OPT = ["O0", "O1", "O2", "O3"]
|
| 63 |
timeout_duration = 10
|
|
|
|
| 97 |
c_decompile = f.read()
|
| 98 |
c_func = []
|
| 99 |
flag = 0
|
| 100 |
+
for line in c_decompile.split('
|
| 101 |
+
'):
|
| 102 |
if "Function: func0" in line:#**Replace** func0 with the function name you want to decompile.
|
| 103 |
flag = 1
|
| 104 |
c_func.append(line)
|
|
|
|
| 114 |
if 'func0' in c_func[idx_tmp]:
|
| 115 |
break
|
| 116 |
c_func = c_func[idx_tmp:]
|
| 117 |
+
input_asm = '
|
| 118 |
+
'.join(c_func).strip()
|
| 119 |
+
|
| 120 |
+
before = f"# This is the assembly code:
|
| 121 |
+
"#prompt
|
| 122 |
+
after = "
|
| 123 |
+
# What is the source code?
|
| 124 |
+
"#prompt
|
| 125 |
input_asm_prompt = before+input_asm.strip()+after
|
| 126 |
with open(fileName +'_' + opt +'.pseudo','w',encoding='utf-8') as f:
|
| 127 |
f.write(input_asm_prompt)
|
|
|
|
| 172 |
with open(fileName +'_' + OPT[0] +'.pseudo','r') as f:#original file
|
| 173 |
func = f.read()
|
| 174 |
|
| 175 |
+
print(f'pseudo function:
|
| 176 |
+
{func}')# Note we only decompile one function, where the original file may contain multiple functions
|
| 177 |
+
print(f'refined function:
|
| 178 |
+
{c_func_decompile}')
|
| 179 |
|
| 180 |
```
|
| 181 |
|
|
|
|
| 184 |
|
| 185 |
### 5. Contact
|
| 186 |
|
| 187 |
+
If you have any questions, please raise an issue.
|