DHEIVER commited on
Commit
471343c
1 Parent(s): a9f0202

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -17
app.py CHANGED
@@ -2,57 +2,56 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, pipeline
3
 
4
 
5
- title = "Gerador de Código"
6
- description = "Este é um espaço para converter texto em inglês para código Python usando o modelo [codeparrot-small-text-to-code](https://huggingface.co/codeparrot/codeparrot-small-text-to-code),\
7
- um modelo de geração de código Python pré-treinado em um conjunto de dados de docstrings e código Python extraído de notebooks Jupyter disponível em [github-jupyter-text](https://huggingface.co/datasets/codeparrot/github-jupyter-text)."
8
  example = [
9
- ["Função de utilidade para calcular a precisão de predições usando métricas do sklearn", 65, 0.6, 42],
10
- ["Vamos implementar uma função que calcula o tamanho de um arquivo chamado filepath", 60, 0.6, 42],
11
- ["Vamos implementar o algoritmo de ordenação Bubble Sort em uma função auxiliar:", 87, 0.6, 42],
12
  ]
13
 
14
- # Altere o modelo para o modelo pré-treinado
15
  tokenizer = AutoTokenizer.from_pretrained("codeparrot/codeparrot-small-text-to-code")
16
  model = AutoModelForCausalLM.from_pretrained("codeparrot/codeparrot-small-text-to-code")
17
 
18
- def criar_docstring(gen_prompt):
19
  return "\"\"\"\n" + gen_prompt + "\n\"\"\"\n\n"
20
 
21
- def gerar_codigo(gen_prompt, max_tokens, temperatura=0.6, seed=42):
22
  set_seed(seed)
23
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
24
- prompt = criar_docstring(gen_prompt)
25
- generated_text = pipe(prompt, do_sample=True, top_p=0.95, temperature=temperatura, max_new_tokens=max_tokens)[0]['generated_text']
26
  return generated_text
27
 
28
 
29
  iface = gr.Interface(
30
- fn=gerar_codigo,
31
  inputs=[
32
- gr.Textbox(label="Instruções em inglês", placeholder="Digite as instruções em inglês..."),
33
  gr.inputs.Slider(
34
  minimum=8,
35
  maximum=256,
36
  step=1,
37
  default=8,
38
- label="Número de tokens para gerar",
39
  ),
40
  gr.inputs.Slider(
41
  minimum=0,
42
  maximum=2.5,
43
  step=0.1,
44
  default=0.6,
45
- label="Temperatura",
46
  ),
47
  gr.inputs.Slider(
48
  minimum=0,
49
  maximum=1000,
50
  step=1,
51
  default=42,
52
- label="Semente aleatória para a geração"
53
  )
54
  ],
55
- outputs=gr.Code(label="Código Python gerado", language="python", lines=10),
56
  examples=example,
57
  layout="horizontal",
58
  theme="peach",
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, pipeline
3
 
4
 
5
+ title = "Python Code Generator"
6
+ description = "This is a space to convert English text to Python code using the [codeparrot-small-text-to-code](https://huggingface.co/codeparrot/codeparrot-small-text-to-code) model, a pre-trained Python code generation model trained on a dataset of docstrings and Python code extracted from Jupyter notebooks available at [github-jupyter-text](https://huggingface.co/datasets/codeparrot/github-jupyter-text)."
 
7
  example = [
8
+ ["Utility function to calculate the precision of predictions using sklearn metrics", 65, 0.6, 42],
9
+ ["Let's implement a function that calculates the size of a file called filepath", 60, 0.6, 42],
10
+ ["Let's implement the Bubble Sort sorting algorithm in an auxiliary function:", 87, 0.6, 42],
11
  ]
12
 
13
+ # Change the model to the pre-trained model
14
  tokenizer = AutoTokenizer.from_pretrained("codeparrot/codeparrot-small-text-to-code")
15
  model = AutoModelForCausalLM.from_pretrained("codeparrot/codeparrot-small-text-to-code")
16
 
17
+ def create_docstring(gen_prompt):
18
  return "\"\"\"\n" + gen_prompt + "\n\"\"\"\n\n"
19
 
20
+ def generate_code(gen_prompt, max_tokens, temperature=0.6, seed=42):
21
  set_seed(seed)
22
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
23
+ prompt = create_docstring(gen_prompt)
24
+ generated_text = pipe(prompt, do_sample=True, top_p=0.95, temperature=temperature, max_new_tokens=max_tokens)[0]['generated_text']
25
  return generated_text
26
 
27
 
28
  iface = gr.Interface(
29
+ fn=generate_code,
30
  inputs=[
31
+ gr.Textbox(label="English instructions", placeholder="Enter English instructions..."),
32
  gr.inputs.Slider(
33
  minimum=8,
34
  maximum=256,
35
  step=1,
36
  default=8,
37
+ label="Number of tokens to generate",
38
  ),
39
  gr.inputs.Slider(
40
  minimum=0,
41
  maximum=2.5,
42
  step=0.1,
43
  default=0.6,
44
+ label="Temperature",
45
  ),
46
  gr.inputs.Slider(
47
  minimum=0,
48
  maximum=1000,
49
  step=1,
50
  default=42,
51
+ label="Random seed for generation"
52
  )
53
  ],
54
+ outputs=gr.Code(label="Generated Python code", language="python", lines=10),
55
  examples=example,
56
  layout="horizontal",
57
  theme="peach",