File size: 3,842 Bytes
193a8a4
 
 
 
471343c
 
193a8a4
471343c
 
 
53c5d75
 
 
 
 
 
 
 
193a8a4
471343c
193a8a4
 
 
471343c
193a8a4
 
dd1ccae
1a817cf
dd1ccae
 
 
 
 
 
 
 
 
471343c
dd1ccae
10ef2b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193a8a4
dd1ccae
fe71cb6
 
 
193a8a4
 
471343c
193a8a4
471343c
193a8a4
 
 
 
 
471343c
193a8a4
 
 
 
 
 
471343c
193a8a4
 
 
 
 
 
471343c
193a8a4
 
471343c
193a8a4
 
 
 
 
 
1a817cf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, pipeline


title = "Python Code Generator"
description = "This is a space to convert English text to Python code using the [codeparrot-small-text-to-code](https://huggingface.co/codeparrot/codeparrot-small-text-to-code) model, a pre-trained Python code generation model trained on a dataset of docstrings and Python code extracted from Jupyter notebooks available at [github-jupyter-text](https://huggingface.co/datasets/codeparrot/github-jupyter-text)."
example = [
    ["Utility function to calculate the precision of predictions using sklearn metrics", 65, 0.6, 42],
    ["Let's implement a function that calculates the size of a file called filepath", 60, 0.6, 42],
    ["Let's implement the Bubble Sort sorting algorithm in an auxiliary function:", 87, 0.6, 42],
    ["Function to calculate the nth Fibonacci number.", 65, 0.6, 42],
    ["Function to calculate the factorial of a number.", 65, 0.6, 42],
    ["Function to reverse a string.", 65, 0.6, 42],
    ["Function to check if a number is prime.", 65, 0.6, 42],
    ["Function to generate the Fibonacci sequence up to the nth term.", 65, 0.6, 42],
    ["Function to generate the factorial sequence up to the nth term.", 65, 0.6, 42],
]


# Change the model to the pre-trained model
tokenizer = AutoTokenizer.from_pretrained("codeparrot/codeparrot-small-text-to-code")
model = AutoModelForCausalLM.from_pretrained("codeparrot/codeparrot-small-text-to-code")

def create_docstring(gen_prompt):
    return "\"\"\"\n" + gen_prompt + "\n\"\"\"\n\n"

def validate_inputs(gen_prompt, max_tokens, temperature, seed):
    # Add validation logic here
    if not gen_prompt:
        raise ValueError("English instructions cannot be empty.")
    if max_tokens <= 0 or max_tokens > 256:
        raise ValueError("Number of tokens to generate must be between 1 and 256.")
    if temperature < 0 or temperature > 2.5:
        raise ValueError("Temperature must be between 0 and 2.5.")
    if seed < 0 or seed > 1000:
        raise ValueError("Random seed must be between 0 and 1000.")

def generate_code(gen_prompt, max_tokens, temperature=0.6, seed=42):
    validate_inputs(gen_prompt, max_tokens, temperature, seed)

    # Encode the input prompt
    input_ids = tokenizer.encode(gen_prompt, return_tensors="pt")

    # Set seed for reproducibility
    set_seed(seed)

    # Generate code tokens
    output = model.generate(
        input_ids,
        max_length=max_tokens + input_ids.shape[-1],
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1
    )

    # Decode the generated tokens into Python code
    generated_code = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)

    return generated_code



def save_to_text_file(output_text):
    with open("generated_code.txt", "w") as file:
        file.write(output_text)

iface = gr.Interface(
    fn=generate_code, 
    inputs=[
        gr.Textbox(label="English instructions", placeholder="Enter English instructions..."),
        gr.inputs.Slider(
            minimum=8,
            maximum=256,
            step=1,
            default=8,
            label="Number of tokens to generate",
        ),
        gr.inputs.Slider(
            minimum=0,
            maximum=2.5,
            step=0.1,
            default=0.6,
            label="Temperature",
        ),
        gr.inputs.Slider(
            minimum=0,
            maximum=1000,
            step=1,
            default=42,
            label="Random seed for generation"
        )
    ],
    outputs=gr.Code(label="Generated Python code", language="python", lines=10),
    examples=example,
    layout="horizontal",
    theme="peach",
    description=description,
    title=title
)
iface.launch()