File size: 4,791 Bytes
c9e8e4a 3bce3fb a16fa71 aa07439 41d27ac c9e8e4a 68bc50c cddb272 fa5e188 25f90dd c9e8e4a f4313df c9e8e4a 7c0d726 aa07439 25f90dd aa07439 7c0d726 25f90dd 7c0d726 2dc5a7a 1e77c56 807f36d c5fafcd 0d5adbc 1e77c56 0d5adbc f70b655 7212da7 1e77c56 25f90dd 9d2b32b 0b16412 1e77c56 4bd868a 0d5adbc 7036561 1e77c56 25f90dd 29136c5 46dbbb1 1e77c56 0d5adbc a7dffcb 1e77c56 0d5adbc 7036561 1e77c56 25f90dd 29136c5 606a970 29136c5 25f90dd 77615a0 25f90dd 596c6fa 606a970 12798fb 33147c8 12798fb 606a970 12798fb cc14b64 12798fb 25f90dd 06d2b63 33147c8 06d2b63 aa07439 06d2b63 aa07439 06d2b63 aa07439 a5aaccd cc3091d 091c31a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import json
import pandas as pd
import requests
import threading
import streamlit as st
MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
@st.cache()
def load_examples():
with open("utils/examples.json", "r") as f:
examples = json.load(f)
return examples
def read_markdown(path):
with open(path, "r") as f:
output = f.read()
st.markdown(output, unsafe_allow_html=True)
def generate_code(
generations, model_name, gen_prompt, max_new_tokens, temperature, seed
):
# call space using its API endpoint
url = (
f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
)
r = requests.post(
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
)
generated_text = r.json()["data"][0]
generations.append(generated_text)
def generate_code_threads(
generations, models, gen_prompt, max_new_tokens, temperature, seed
):
threads = []
for model_name in models:
# create the thread
threads.append(
threading.Thread(
target=generate_code,
args=(
generations,
model_name,
gen_prompt,
max_new_tokens,
temperature,
seed,
),
)
)
threads[-1].start()
for t in threads:
t.join()
st.set_page_config(page_icon=":laptop:", layout="wide")
with open("utils/table_contents.md", "r") as f:
contents = f.read()
st.sidebar.markdown(contents)
# Introduction
st.title("Code generation with 🤗")
read_markdown("utils/intro.md")
# Code datasets
st.subheader("1 - Code datasets")
read_markdown("datasets/intro.md")
read_markdown("datasets/github_code.md")
col1, col2 = st.columns([1, 2])
with col1:
selected_model = st.selectbox("", MODELS, key=1)
read_markdown(f"datasets/{selected_model.lower()}.md")
# Model architecture
st.subheader("2 - Model architecture")
read_markdown("architectures/intro.md")
col1, col2 = st.columns([1, 2])
with col1:
selected_model = st.selectbox("", MODELS, key=2)
read_markdown(f"architectures/{selected_model.lower()}.md")
# Model evaluation
st.subheader("3 - Code models evaluation")
read_markdown("evaluation/intro.md")
read_markdown("evaluation/demo_humaneval.md")
# Code generation
st.subheader("4 - Code generation ✨")
read_markdown("generation/intro.md")
col1, col2, col3 = st.columns([7, 1, 6])
with col1:
st.markdown("**Models**")
selected_models = st.multiselect(
"Select code generation models to compare:",
GENERATION_MODELS,
default=GENERATION_MODELS,
key=3,
)
st.markdown(" ")
st.markdown("**Examples**")
examples = load_examples()
example_names = [example["name"] for example in examples]
name2id = dict([(name, i) for i, name in enumerate(example_names)])
selected_example = st.selectbox(
"Select one of the following examples or implement yours:", example_names
)
example_text = examples[name2id[selected_example]]["value"]
default_length = examples[name2id[selected_example]]["length"]
with col3:
st.markdown("**Generation settings**")
temperature = st.slider(
"Temperature:", value=0.2, min_value=0.0, step=0.1, max_value=2.0
)
max_new_tokens = st.slider(
"Number of tokens to generate:",
value=default_length,
min_value=8,
step=4,
max_value=256,
)
seed = st.slider("Random seed:", value=42, min_value=0, step=1, max_value=1000)
gen_prompt = st.text_area(
"Generate code with prompt:",
value=example_text,
height=200,
).strip()
if st.button("Generate code!"):
with st.spinner("Generating code..."):
# use threading
generations = []
generate_code_threads(
generations,
selected_models,
gen_prompt=gen_prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
seed=seed,
)
for i in range(len(generations)):
st.markdown(f"**{selected_models[i]}**")
st.code(generations[i])
if len(generations) < len(selected_models):
st.markdown("<span style='color:red'>Warning: Some models run into timeout, you can try generating code using the original subspaces: [InCoder](https://huggingface.co/spaces/loubnabnl/incoder-subspace), [CodeGen](https://huggingface.co/spaces/loubnabnl/codegen-subspace), [CodeParrot](https://huggingface.co/spaces/loubnabnl/codeparrot-subspace)</span>", unsafe_allow_html=True)
# Resources
st.subheader("Resources")
read_markdown("utils/resources.md")
|