Add headers and todo
Browse files
README.md
CHANGED
@@ -11,11 +11,15 @@ widget:
|
|
11 |
- text: "pragma solidity ^0.5.7;\n// Context: ParentA | Functions: helloA helloB | Constants: constantA \ncontract HelloWorld is ParentA {"
|
12 |
---
|
13 |
|
14 |
-
# A code
|
15 |
-
|
|
|
|
|
16 |
- Header solidity version like `pragma solidity ^0.5.7`
|
17 |
- Ancestor class/library info, e.g. public functions and constants from `ParentA`
|
18 |
- Contract/Library/Interface declaration header, e.g. `HelloWorld` ended with `{`
|
|
|
|
|
19 |
|
20 |
```python
|
21 |
# !pip install transformers -q
|
@@ -38,35 +42,16 @@ print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
|
|
38 |
# Expect outcome
|
39 |
"""
|
40 |
string public constant name = "Hello World";
|
41 |
-
|
42 |
-
uint8 public constant decimals = 18;
|
43 |
-
uint256 public constant initialSupply = 0;
|
44 |
uint256 public constant override returns (uint256) {
|
45 |
return initialSupply;
|
46 |
}
|
47 |
function initialSupply() public view returns (uint256) {
|
48 |
-
|
49 |
-
}
|
50 |
-
function balanceOf(address _owner) public view returns (uint256) {
|
51 |
-
return balanceOf(_owner);
|
52 |
-
}
|
53 |
-
function transfer(address _to, uint256 _value) public returns (bool) {
|
54 |
-
balanceOf[msg.sender] -= _value;
|
55 |
-
balanceOf[_to] += _value;
|
56 |
-
emit Transfer(msg.sender, _to, _value);
|
57 |
-
return true;
|
58 |
-
}
|
59 |
-
function transferFrom(address _from, address _to, uint256 _value) public returns (bool) {
|
60 |
-
balanceOf[_from] -= _value;
|
61 |
-
balanceOf[_to] += _value;
|
62 |
-
emit Transfer(_from, _to, _value);
|
63 |
-
return true;
|
64 |
-
}
|
65 |
-
function approve(address _spender, uint256 _value) public returns (bool)
|
66 |
"""
|
67 |
```
|
68 |
|
69 |
-
|
70 |
- Base T5 code model: https://huggingface.co/Salesforce/codet5-large
|
71 |
- Source data: https://huggingface.co/datasets/mwritescode/slither-audited-smart-contracts
|
72 |
- Processing steps: Clean, contract-level segmentation sepration, split in and out
|
@@ -116,4 +101,9 @@ function approve(address _spender, uint256 _value) public returns (bool)
|
|
116 |
}
|
117 |
}
|
118 |
```
|
119 |
-
- Source training code: To be added
|
|
|
|
|
|
|
|
|
|
|
|
11 |
- text: "pragma solidity ^0.5.7;\n// Context: ParentA | Functions: helloA helloB | Constants: constantA \ncontract HelloWorld is ParentA {"
|
12 |
---
|
13 |
|
14 |
+
# A code generation T5 model for solidity (web3 smart contract)
|
15 |
+
|
16 |
+
## Hello World example
|
17 |
+
- A hello world example to use this model, notice the input `text` includes
|
18 |
- Header solidity version like `pragma solidity ^0.5.7`
|
19 |
- Ancestor class/library info, e.g. public functions and constants from `ParentA`
|
20 |
- Contract/Library/Interface declaration header, e.g. `HelloWorld` ended with `{`
|
21 |
+
- Or simply use the test widget on the right side of the window and test, however
|
22 |
+
the quality is known to be worse without decoding params
|
23 |
|
24 |
```python
|
25 |
# !pip install transformers -q
|
|
|
42 |
# Expect outcome
|
43 |
"""
|
44 |
string public constant name = "Hello World";
|
45 |
+
...
|
|
|
|
|
46 |
uint256 public constant override returns (uint256) {
|
47 |
return initialSupply;
|
48 |
}
|
49 |
function initialSupply() public view returns (uint256) {
|
50 |
+
...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
"""
|
52 |
```
|
53 |
|
54 |
+
## Background
|
55 |
- Base T5 code model: https://huggingface.co/Salesforce/codet5-large
|
56 |
- Source data: https://huggingface.co/datasets/mwritescode/slither-audited-smart-contracts
|
57 |
- Processing steps: Clean, contract-level segmentation sepration, split in and out
|
|
|
101 |
}
|
102 |
}
|
103 |
```
|
104 |
+
- Source training code: To be added
|
105 |
+
|
106 |
+
## Future TODO
|
107 |
+
- The model is significantly under-trained because of lack of GPU budget, need 10x colab resources (~$100 for full train)
|
108 |
+
- This is quite limited on how the model is used, potentially we could switch to GPT2 decoder-only to compare, but CodeT5 has its strong code optimization
|
109 |
+
- Need more classifiers (T5 or BERT alike) to detect potential defects.
|