loubnabnl HF staff commited on
Commit
00d2ff3
·
1 Parent(s): ce45012

update app

Browse files
Files changed (1) hide show
  1. app.py +24 -14
app.py CHANGED
@@ -6,31 +6,34 @@ from transformers import pipeline
6
  title = "InCoder Generator"
7
  description = "This is a subspace to make code generation with [InCoder](https://huggingface.co/facebook/incoder-6B), it is used in a larger [space](https://huggingface.co/spaces/loubnabnl/Code-generation-models-v1) for model comparison."
8
  example = [
9
- ["def print_hello_world():", "Sample", 8, 42],
10
- ["def get_file_size(filepath):", "Sample", 22, 42]]
 
11
  tokenizer = AutoTokenizer.from_pretrained("facebook/incoder-1B")
12
  model = AutoModelForCausalLM.from_pretrained("facebook/incoder-1B", low_cpu_mem_usage=True)
13
 
14
 
15
- def code_generation(gen_prompt, strategy, max_tokens, seed=42):
 
 
16
  set_seed(seed)
17
- gen_kwargs = {}
18
- gen_kwargs["do_sample"] = strategy == "Sample"
19
- gen_kwargs["max_new_tokens"] = max_tokens
20
- if gen_kwargs["do_sample"]:
21
- gen_kwargs["temperature"] = 0.2
22
- gen_kwargs["top_k"] = 0
23
- gen_kwargs["top_p"] = 0.95
24
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
25
- generated_text = pipe(gen_prompt, **gen_kwargs)[0]['generated_text']
26
  return generated_text
27
 
 
28
 
29
  iface = gr.Interface(
30
- fn=code_generation,
31
  inputs=[
32
  gr.Textbox(lines=10, label="Input code"),
33
- gr.Dropdown(choices=["Greedy", "Sample"], value="Sample"),
34
  gr.inputs.Slider(
35
  minimum=8,
36
  maximum=256,
@@ -38,6 +41,13 @@ iface = gr.Interface(
38
  default=8,
39
  label="Number of tokens to generate",
40
  ),
 
 
 
 
 
 
 
41
  gr.inputs.Slider(
42
  minimum=0,
43
  maximum=1000,
 
6
  title = "InCoder Generator"
7
  description = "This is a subspace to make code generation with [InCoder](https://huggingface.co/facebook/incoder-6B), it is used in a larger [space](https://huggingface.co/spaces/loubnabnl/Code-generation-models-v1) for model comparison."
8
  example = [
9
+ ["def count_words(filename):", 40, 0.6, 42],
10
+ ["def print_hello_world():", "Sample", 8, 0.6, 42],
11
+ ["def get_file_size(filepath):", "Sample", 22, 0.6, 42]]
12
  tokenizer = AutoTokenizer.from_pretrained("facebook/incoder-1B")
13
  model = AutoModelForCausalLM.from_pretrained("facebook/incoder-1B", low_cpu_mem_usage=True)
14
 
15
 
16
+ MAX_LENGTH = 2048
17
+ BOS = "<|endoftext|>"
18
+ def generate(gen_prompt, max_tokens, temperature=0.6, seed=42):
19
  set_seed(seed)
20
+ input_ids = tokenizer(gen_prompt, return_tensors="pt").input_ids
21
+ current_length = input_ids.flatten().size(0)
22
+ max_length = max_tokens + current_length
23
+ if max_length > MAX_LENGTH:
24
+ max_length = MAX_LENGTH
25
+ output = model.generate(input_ids=input_ids, do_sample=True, top_p=0.95, temperature=temperature, max_length=max_length)
26
+ generated_text = tokenizer.decode(output.flatten())
27
+ if generated_text.startswith(BOS):
28
+ generated_text = detok_hypo_str[len(BOS):]
29
  return generated_text
30
 
31
+ generation = generate(gen_prompt, length_limit=40, temperature=0.6)
32
 
33
  iface = gr.Interface(
34
+ fn=generate,
35
  inputs=[
36
  gr.Textbox(lines=10, label="Input code"),
 
37
  gr.inputs.Slider(
38
  minimum=8,
39
  maximum=256,
 
41
  default=8,
42
  label="Number of tokens to generate",
43
  ),
44
+ gr.inputs.Slider(
45
+ minimum=0,
46
+ maximum=2,
47
+ step=0.1,
48
+ default=0.6,
49
+ label="Temperature",
50
+ )
51
  gr.inputs.Slider(
52
  minimum=0,
53
  maximum=1000,