terryyz commited on
Commit
5d50dcb
β€’
1 Parent(s): 9b1a775

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -51
app.py CHANGED
@@ -12,7 +12,54 @@ from peft import PeftModel
12
 
13
  from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css
14
 
15
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
  CHECKPOINT_URL = "Salesforce/codegen-350M-mono"
@@ -86,53 +133,6 @@ model_map = {
86
  }
87
 
88
 
89
- FIM_PREFIX = "<fim_prefix>"
90
- FIM_MIDDLE = "<fim_middle>"
91
- FIM_SUFFIX = "<fim_suffix>"
92
-
93
- FIM_INDICATOR = "<FILL_HERE>"
94
-
95
- FORMATS = """## Model Formats
96
-
97
- The model is pretrained on code and is formatted with special tokens in addition to the pure code data,\
98
- such as prefixes specifying the source of the file or tokens separating code from a commit message.\
99
- Use these templates to explore the model's capacities:
100
-
101
- ### 1. Prefixes 🏷️
102
- For pure code files, use any combination of the following prefixes:
103
-
104
- ```
105
- <reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
106
- ```
107
- STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+
108
-
109
- ### 2. Commits πŸ’Ύ
110
- The commits data is formatted as follows:
111
-
112
- ```
113
- <commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
114
- ```
115
-
116
- ### 3. Jupyter Notebooks πŸ““
117
- The model is trained on Jupyter notebooks as Python scripts and structured formats like:
118
-
119
- ```
120
- <start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
121
- ```
122
-
123
- ### 4. Issues πŸ›
124
- We also trained on GitHub issues using the following formatting:
125
- ```
126
- <issue_start><issue_comment>text<issue_comment>...<issue_closed>
127
- ```
128
-
129
- ### 5. Fill-in-the-middle 🧩
130
- Fill in the middle requires rearranging the model inputs. The playground handles this for you - all you need is to specify where to fill:
131
- ```
132
- code before<FILL_HERE>code after
133
- ```
134
- """
135
-
136
  theme = gr.themes.Monochrome(
137
  primary_hue="indigo",
138
  secondary_hue="blue",
@@ -182,9 +182,9 @@ def generate(
182
  else:
183
  output = ""
184
  # model.to(device)
185
- input_ids = tokenizer(prompt, return_tensors="pt").to(device)
186
- # generated_ids = model.generate(**input_ids, **generate_kwargs)
187
- generated_ids = model.generate(**input_ids)
188
 
189
  return tokenizer.decode(generated_ids[0][input_ids["input_ids"].shape[1]:], skip_special_tokens=True).strip()
190
 
 
12
 
13
  from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css
14
 
15
+
16
+ FORMATS = """## Model Formats
17
+
18
+ The model is pretrained on code and is formatted with special tokens in addition to the pure code data,\
19
+ such as prefixes specifying the source of the file or tokens separating code from a commit message.\
20
+ Use these templates to explore the model's capacities:
21
+
22
+ ### 1. Prefixes 🏷️
23
+ For pure code files, use any combination of the following prefixes:
24
+
25
+ ```
26
+ <reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
27
+ ```
28
+ STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+
29
+
30
+ ### 2. Commits πŸ’Ύ
31
+ The commits data is formatted as follows:
32
+
33
+ ```
34
+ <commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
35
+ ```
36
+
37
+ ### 3. Jupyter Notebooks πŸ““
38
+ The model is trained on Jupyter notebooks as Python scripts and structured formats like:
39
+
40
+ ```
41
+ <start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
42
+ ```
43
+
44
+ ### 4. Issues πŸ›
45
+ We also trained on GitHub issues using the following formatting:
46
+ ```
47
+ <issue_start><issue_comment>text<issue_comment>...<issue_closed>
48
+ ```
49
+
50
+ ### 5. Fill-in-the-middle 🧩
51
+ Fill in the middle requires rearranging the model inputs. The playground handles this for you - all you need is to specify where to fill:
52
+ ```
53
+ code before<FILL_HERE>code after
54
+ ```
55
+ """
56
+
57
+ if not torch.cuda.is_available():
58
+ FORMATS += "\n<p>Running on CPU πŸ₯Ά This demo does not work on CPU.</p>"
59
+
60
+ if torch.cuda.is_available():
61
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
62
+ print(device)
63
 
64
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
65
  CHECKPOINT_URL = "Salesforce/codegen-350M-mono"
 
133
  }
134
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  theme = gr.themes.Monochrome(
137
  primary_hue="indigo",
138
  secondary_hue="blue",
 
182
  else:
183
  output = ""
184
  # model.to(device)
185
+ input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
186
+ # generated_ids = model.generate(**input_ids
187
+ generated_ids = model.generate(**input_ids, **generate_kwargs)
188
 
189
  return tokenizer.decode(generated_ids[0][input_ids["input_ids"].shape[1]:], skip_special_tokens=True).strip()
190