loubnabnl HF staff commited on
Commit
7659c19
β€’
1 Parent(s): f6c8688

update app

Browse files
Files changed (3) hide show
  1. app.py +11 -21
  2. utils/languages.json +3 -3
  3. utils/table_contents.md +1 -1
app.py CHANGED
@@ -39,7 +39,7 @@ def load_model(values, language):
39
  model = values["model"]
40
  if not model:
41
  text = f"""No model is available for {language.capitalize()}. If you trained a model on this language, let us know in\
42
- in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\
43
  You can also train your own model on The Stack using the instructions below πŸš€"""
44
  st.write(text)
45
  if st.button("Fine-tune your own model", key=4):
@@ -50,8 +50,8 @@ def load_model(values, language):
50
  ```python
51
  from transformers import AutoModelForCausalLM, AutoTokenizer
52
 
53
- tokenizer = AutoTokenizer.from_pretrained({model})
54
- model = AutoModelForCausalLM.from_pretrained({model}, trust_remote_code=True)
55
 
56
  inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
57
  outputs = model.generate(inputs)
@@ -60,7 +60,6 @@ def load_model(values, language):
60
  """
61
  st.markdown(text)
62
  st.markdown(code)
63
- st.write(f"The scores of this model are the following: {values['scores']}")
64
 
65
  def generate_code(
66
  demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
@@ -78,31 +77,24 @@ def generate_code(
78
  generated_text = ""
79
  return generated_text
80
 
81
- def init_nested_buttons():
82
- if "Models trained on dataset" not in st.session_state:
83
- st.session_state["Models trained on dataset"] = False
84
-
85
- if "Generate code" not in st.session_state:
86
- st.session_state["Generate code"] = False
87
-
88
- if st.button("Models trained on dataset"):
89
- st.session_state["Models trained on dataset"] = not st.session_state["Models trained on dataset"]
90
-
91
-
92
  languages = load_languages()
93
 
 
 
94
  col1, col2 = st.columns([1, 1.5])
95
  with col1:
96
- selected_language = st.selectbox("Select one of 358 languages in The Stack", list(languages.keys()), key=1)
97
 
98
  st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
99
  code = how_to_load(selected_language)
100
- if st.button("More info about the dataset", key=2):
 
101
  st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
102
  # we can add some stats about files
103
 
104
- init_nested_buttons()
105
- if st.session_state["Models trained on dataset"]:
 
106
  load_model(languages[selected_language], selected_language)
107
 
108
  if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
@@ -114,8 +106,6 @@ if st.session_state["Models trained on dataset"]:
114
  ).strip()
115
 
116
  if st.button("Generate code"):
117
- st.session_state["Generate code"] = not st.session_state["Generate code"]
118
- if st.session_state["Generate code"]:
119
  with st.spinner("Generating code..."):
120
  generated_text = generate_code(
121
  demo=languages[selected_language]["gradio_demo"],
 
39
  model = values["model"]
40
  if not model:
41
  text = f"""No model is available for {language.capitalize()}. If you trained a model on this language, let us know in\
42
+ in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\n\
43
  You can also train your own model on The Stack using the instructions below πŸš€"""
44
  st.write(text)
45
  if st.button("Fine-tune your own model", key=4):
 
50
  ```python
51
  from transformers import AutoModelForCausalLM, AutoTokenizer
52
 
53
+ tokenizer = AutoTokenizer.from_pretrained("{model}")
54
+ model = AutoModelForCausalLM.from_pretrained("{model}", trust_remote_code=True)
55
 
56
  inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
57
  outputs = model.generate(inputs)
 
60
  """
61
  st.markdown(text)
62
  st.markdown(code)
 
63
 
64
  def generate_code(
65
  demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
 
77
  generated_text = ""
78
  return generated_text
79
 
 
 
 
 
 
 
 
 
 
 
 
80
  languages = load_languages()
81
 
82
+ st.header("Languages of The Stack πŸ“‘")
83
+ st.markdown("The Stack contains over 6TB of permissively-licensed source code files covering 358 programming languages. Select one to get started:")
84
  col1, col2 = st.columns([1, 1.5])
85
  with col1:
86
+ selected_language = st.selectbox("Programming Language", list(languages.keys()), label_visibility="collapsed", key=1)
87
 
88
  st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
89
  code = how_to_load(selected_language)
90
+
91
+ with st.expander("More info about the dataset"):
92
  st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
93
  # we can add some stats about files
94
 
95
+ st.header("Models trained on The Stack πŸ€–")
96
+ st.write("Here we show models trained on the language you select as part of BigCode project.")
97
+ with st.expander(f"Models trained on {selected_language.capitalize()}"):
98
  load_model(languages[selected_language], selected_language)
99
 
100
  if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
 
106
  ).strip()
107
 
108
  if st.button("Generate code"):
 
 
109
  with st.spinner("Generating code..."):
110
  generated_text = generate_code(
111
  demo=languages[selected_language]["gradio_demo"],
utils/languages.json CHANGED
@@ -1,6 +1,6 @@
1
- {"python": {"num_examples": 10, "model": "bigcode/santacoder", "scores": {"HumanEval-pass@1": 10, "HumanEval-pass@10": 20, "HumanEval-pass@100": 40}, "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
2
- "java": {"num_examples": 10, "model": "bigcode/santacoder", "scores": { "HumanEval-pass@1": 10, "HumanEval-pass@10": 20, "HumanEval-pass@100": 40}, "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
3
- "javascript": {"num_examples": 10, "model": "bigcode/santacoder", "scores": { "HumanEval-pass@1": 10, "HumanEval-pass@10": 20, "HumanEval-pass@100": 40}, "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
4
  "typescript": {"num_examples": 10, "model": ""},
5
  "go": {"num_examples": 10, "model": ""},
6
  "php": {"num_examples": 10, "model": ""},
 
1
+ {"python": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
2
+ "java": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
3
+ "javascript": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
4
  "typescript": {"num_examples": 10, "model": ""},
5
  "go": {"num_examples": 10, "model": ""},
6
  "php": {"num_examples": 10, "model": ""},
utils/table_contents.md CHANGED
@@ -6,4 +6,4 @@
6
 
7
  3 - Demos for code generation
8
 
9
- If you trained a model on The Stack, let us know so we can feature it! πŸš€
 
6
 
7
  3 - Demos for code generation
8
 
9
+ If you trained a model on The Stack, let us know in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) so we can feature it! πŸš€