Loubna ben allal commited on
Commit
7cf1a13
β€’
1 Parent(s): cc79f05

update app

Browse files
Files changed (1) hide show
  1. app.py +50 -25
app.py CHANGED
@@ -29,49 +29,74 @@ selected_models = st.sidebar.multiselect('Select code generation models to compa
29
  models,
30
  default=["CodeParrot"])
31
  st.sidebar.header("Tasks")
32
- tasks = [" ","Model architecture", "Model evaluation", "Pretraining dataset", "Prompting"]
33
  selected_task = st.sidebar.selectbox("Select a task:", tasks)
34
 
35
- architectures = {}
36
- datasets = {}
 
 
 
 
 
37
  pipelines = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  if selected_task == " ":
40
  st.title("Code Generation Models comparison πŸ’»")
41
  with open("intro.txt", "r") as f:
42
  intro = f.read()
43
  st.markdown(intro)
44
-
45
  elif selected_task == "Pretraining dataset":
46
  st.title("Pretraining datasets πŸ“š")
47
  for model in selected_models:
48
  with open(f"datasets/{model.lower()}.txt", "r") as f:
49
  text = f.read()
50
  st.markdown(f"## {model}:")
51
- st.markdown(text)
52
-
53
  elif selected_task == "Model architecture":
54
  st.title("Model architecture πŸ”¨")
55
  for model in selected_models:
56
  with open(f"architectures/{model.lower()}.txt", "r") as f:
57
  text = f.read()
58
  st.markdown(f"## {model}:")
59
- st.markdown(text)
60
-
61
- elif selected_task == "Prompting":
62
- for model in selected_models:
63
- if model == "CodeParrot":
64
- tokenizer = load_tokenizer("lvwerra/codeparrot")
65
- model = load_model("lvwerra/codeparrot")
66
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
67
- pipelines[model] = pipe
68
- elif model == "InCoder":
69
- tokenizer = load_tokenizer("facebook/incoder-1B")
70
- model = load_model("facebook/incoder-1B")
71
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
72
- pipelines[model] = pipe
73
- else:
74
- tokenizer = load_tokenizer("facebook/opt-1.3b")
75
- model = load_model("facebook/opt-1.3b")
76
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
77
- pipelines[model] = pipe
 
 
 
 
29
  models,
30
  default=["CodeParrot"])
31
  st.sidebar.header("Tasks")
32
+ tasks = [" ","Model architecture", "Model evaluation", "Pretraining dataset", "Code generation"]
33
  selected_task = st.sidebar.selectbox("Select a task:", tasks)
34
 
35
+
36
+ tokenizer = load_tokenizer("lvwerra/codeparrot")
37
+ model = load_model("lvwerra/codeparrot")
38
+ tokenizer2 = load_tokenizer("facebook/incoder-1B")
39
+ model2 = load_model("facebook/incoder-1B")
40
+ tokenizer3 = load_tokenizer("facebook/opt-1.3b")
41
+ model3 = load_model("facebook/opt-1.3b")
42
  pipelines = {}
43
+ for model in models:
44
+ if model == "CodeParrot":
45
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
46
+ pipelines[model] = pipe
47
+ elif model == "InCoder":
48
+ tokenizer = load_tokenizer("facebook/incoder-1B")
49
+ model = load_model("facebook/incoder-1B")
50
+ pipe = pipeline("text-generation", model=model2, tokenizer=tokenizer2)
51
+ pipelines[model] = pipe
52
+ else:
53
+ tokenizer = load_tokenizer("facebook/opt-1.3b")
54
+ model = load_model("facebook/opt-1.3b")
55
+ pipe = pipeline("text-generation", model=model3, tokenizer=tokenizer3)
56
+ pipelines[model] = pipe
57
+
58
+ example_names = [example["name"] for example in examples]
59
+ name2id = dict([(name, i) for i, name in enumerate(example_names)])
60
+ set_seed(42)
61
+ gen_kwargs = {}
62
 
63
  if selected_task == " ":
64
  st.title("Code Generation Models comparison πŸ’»")
65
  with open("intro.txt", "r") as f:
66
  intro = f.read()
67
  st.markdown(intro)
 
68
  elif selected_task == "Pretraining dataset":
69
  st.title("Pretraining datasets πŸ“š")
70
  for model in selected_models:
71
  with open(f"datasets/{model.lower()}.txt", "r") as f:
72
  text = f.read()
73
  st.markdown(f"## {model}:")
74
+ st.markdown(text)
 
75
  elif selected_task == "Model architecture":
76
  st.title("Model architecture πŸ”¨")
77
  for model in selected_models:
78
  with open(f"architectures/{model.lower()}.txt", "r") as f:
79
  text = f.read()
80
  st.markdown(f"## {model}:")
81
+ st.markdown(text)
82
+ elif selected_task == "Code generation":
83
+ st.title("Code generation πŸ’»")
84
+ st.sidebar.header("Examples")
85
+ selected_example = st.sidebar.selectbox("Select one of the following examples:", example_names)
86
+ example_text = examples[name2id[selected_example]]["value"]
87
+ default_length = examples[name2id[selected_example]]["length"]
88
+ st.sidebar.header("Generation settings")
89
+ gen_kwargs["do_sample"] = st.sidebar.radio("Decoding strategy:", ["Greedy", "Sample"]) == "Sample"
90
+ gen_kwargs["max_new_tokens"] = st.sidebar.slider("Number of tokens to generate:", value=default_length, min_value=8, step=8, max_value=256)
91
+ if gen_kwargs["do_sample"]:
92
+ gen_kwargs["temperature"] = 0.2
93
+ gen_kwargs["top_k"] = 0
94
+ gen_kwargs["top_p"] = 0.95
95
+ gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
96
+ if st.button("Generate code!"):
97
+ with st.spinner("Generating code..."):
98
+ for model in selected_models:
99
+ pipe = pipelines[model]
100
+ generated_text = pipe(gen_prompt, **gen_kwargs)[0]['generated_text']
101
+ st.markdown(f"### {model}:")
102
+ st.code(generated_text)