loubnabnl HF staff commited on
Commit
a16fa71
β€’
1 Parent(s): 9be3f4c

update app

Browse files
Files changed (1) hide show
  1. app.py +21 -42
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
3
  from transformers import pipeline
4
  import torch
5
  import json
6
  import pandas as pd
 
7
 
8
  @st.cache(allow_output_mutation=True)
9
  def load_tokenizer(model_ckpt):
@@ -24,45 +25,20 @@ st.set_page_config(page_icon=':laptop:', layout="wide")
24
 
25
 
26
  st.sidebar.header("Models")
27
- models = ["CodeParrot", "OPT", "InCoder"]
28
- selected_models = st.sidebar.multiselect('Select code generation models to compare:',
29
- models,
30
- default=["CodeParrot"])
31
  st.sidebar.header("Tasks")
32
  tasks = [" ", "Model evaluation", "Pretraining datasets", "Model architecture", "Code generation"]
33
  selected_task = st.sidebar.selectbox("Select a task:", tasks)
34
 
35
 
36
- tokenizer1 = load_tokenizer("lvwerra/codeparrot")
37
- model1 = load_model("lvwerra/codeparrot")
38
- tokenizer2 = load_tokenizer("facebook/incoder-1B")
39
- model2 = load_model("facebook/incoder-1B")
40
- #tokenizer3 = load_tokenizer("facebook/opt-1.3b")
41
- #model3 = load_model("facebook/opt-1.3b")
42
- pipelines = {}
43
- for element in models:
44
- if element == "CodeParrot":
45
- pipelines[element] = pipeline("text-generation", model=model1, tokenizer=tokenizer1)
46
- elif element == "InCoder":
47
- tokenizer = load_tokenizer("facebook/incoder-1B")
48
- model = load_model("facebook/incoder-1B")
49
- pipelines[element] = pipeline("text-generation", model=model2, tokenizer=tokenizer2)
50
- #else:
51
- # tokenizer = load_tokenizer("facebook/opt-1.3b")
52
- # model = load_model("facebook/opt-1.3b")
53
- # pipelines[element] = pipeline("text-generation", model=model3, tokenizer=tokenizer3)
54
-
55
- examples = load_examples()
56
- example_names = [example["name"] for example in examples]
57
- name2id = dict([(name, i) for i, name in enumerate(example_names)])
58
- set_seed(42)
59
- gen_kwargs = {}
60
-
61
  if selected_task == " ":
62
  st.title("Code Generation Models comparison")
63
  with open("intro.txt", "r") as f:
64
  intro = f.read()
65
  st.markdown(intro)
 
66
  elif selected_task == "Pretraining datasets":
67
  st.title("Pretraining datasets πŸ“š")
68
  st.markdown("Preview of some code files from Github repositories")
@@ -72,7 +48,8 @@ elif selected_task == "Pretraining datasets":
72
  with open(f"datasets/{model.lower()}.txt", "r") as f:
73
  text = f.read()
74
  st.markdown(f"### {model}:")
75
- st.markdown(text)
 
76
  elif selected_task == "Model architecture":
77
  st.title("Model architecture πŸ”¨")
78
  for model in selected_models:
@@ -80,30 +57,32 @@ elif selected_task == "Model architecture":
80
  text = f.read()
81
  st.markdown(f"## {model}:")
82
  st.markdown(text)
 
83
  elif selected_task == "Model evaluation":
84
  st.title("Code models evaluation πŸ“Š")
85
  with open("evaluation/intro.txt", "r") as f:
86
  intro = f.read()
87
  st.markdown(intro)
 
88
  elif selected_task == "Code generation":
89
  st.title("Code generation πŸ’»")
90
  st.sidebar.header("Examples")
 
 
 
91
  selected_example = st.sidebar.selectbox("Select one of the following examples:", example_names)
92
  example_text = examples[name2id[selected_example]]["value"]
93
  default_length = examples[name2id[selected_example]]["length"]
94
  st.sidebar.header("Generation settings")
95
- gen_kwargs["do_sample"] = st.sidebar.radio("Decoding strategy:", ["Greedy", "Sample"]) == "Sample"
96
- gen_kwargs["max_new_tokens"] = st.sidebar.slider("Number of tokens to generate:", value=default_length, min_value=8, step=8, max_value=256)
97
- if gen_kwargs["do_sample"]:
98
- gen_kwargs["temperature"] = 0.2
99
- gen_kwargs["top_k"] = 0
100
- gen_kwargs["top_p"] = 0.95
101
  gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
102
  if st.button("Generate code!"):
103
  with st.spinner("Generating code..."):
104
  for model in selected_models:
105
- if model != "OPT":
106
- pipe = pipelines[model]
107
- generated_text = pipe(gen_prompt, **gen_kwargs)[0]['generated_text']
108
- st.markdown(f"{model}:")
109
- st.code(generated_text)
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from transformers import pipeline
4
  import torch
5
  import json
6
  import pandas as pd
7
+ import requests
8
 
9
  @st.cache(allow_output_mutation=True)
10
  def load_tokenizer(model_ckpt):
 
25
 
26
 
27
  st.sidebar.header("Models")
28
+ models = ["CodeParrot", "InCoder"]
29
+ selected_models = st.sidebar.multiselect('Select code generation models to compare:', models, default=["CodeParrot"])
30
+
 
31
  st.sidebar.header("Tasks")
32
  tasks = [" ", "Model evaluation", "Pretraining datasets", "Model architecture", "Code generation"]
33
  selected_task = st.sidebar.selectbox("Select a task:", tasks)
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  if selected_task == " ":
37
  st.title("Code Generation Models comparison")
38
  with open("intro.txt", "r") as f:
39
  intro = f.read()
40
  st.markdown(intro)
41
+
42
  elif selected_task == "Pretraining datasets":
43
  st.title("Pretraining datasets πŸ“š")
44
  st.markdown("Preview of some code files from Github repositories")
 
48
  with open(f"datasets/{model.lower()}.txt", "r") as f:
49
  text = f.read()
50
  st.markdown(f"### {model}:")
51
+ st.markdown(text)
52
+
53
  elif selected_task == "Model architecture":
54
  st.title("Model architecture πŸ”¨")
55
  for model in selected_models:
 
57
  text = f.read()
58
  st.markdown(f"## {model}:")
59
  st.markdown(text)
60
+
61
  elif selected_task == "Model evaluation":
62
  st.title("Code models evaluation πŸ“Š")
63
  with open("evaluation/intro.txt", "r") as f:
64
  intro = f.read()
65
  st.markdown(intro)
66
+
67
  elif selected_task == "Code generation":
68
  st.title("Code generation πŸ’»")
69
  st.sidebar.header("Examples")
70
+ examples = load_examples()
71
+ example_names = [example["name"] for example in examples]
72
+ name2id = dict([(name, i) for i, name in enumerate(example_names)])
73
  selected_example = st.sidebar.selectbox("Select one of the following examples:", example_names)
74
  example_text = examples[name2id[selected_example]]["value"]
75
  default_length = examples[name2id[selected_example]]["length"]
76
  st.sidebar.header("Generation settings")
77
+ temperature = st.sidebar.slider("Temperature:", value=0.2, min_value=0, step=0.1, max_value=2)
78
+ max_new_tokens = st.sidebar.slider("Number of tokens to generate:", value=default_length, min_value=8, step=8, max_value=256)
79
+ seed = st.sidebar.slider("Random seed:", value=42, min_value=0, step=1, max_value=1000)
 
 
 
80
  gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
81
  if st.button("Generate code!"):
82
  with st.spinner("Generating code..."):
83
  for model in selected_models:
84
+ url = f'https://hf.space/embed/loubnabnl/{model.lower()}-subspace/+/api/predict/'
85
+ r = requests.post(url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]})
86
+ generated_text = r.json()['data'][0]
87
+ st.markdown(f"{model}:")
88
+ st.code(generated_text)