add link to github-code data
Browse files
app.py
CHANGED
@@ -6,6 +6,9 @@ import json
|
|
6 |
import pandas as pd
|
7 |
import requests
|
8 |
|
|
|
|
|
|
|
9 |
@st.cache(allow_output_mutation=True)
|
10 |
def load_tokenizer(model_ckpt):
|
11 |
return AutoTokenizer.from_pretrained(model_ckpt)
|
@@ -40,7 +43,7 @@ if selected_task == " ":
|
|
40 |
|
41 |
elif selected_task == "Pretraining datasets":
|
42 |
st.title("Pretraining datasets π")
|
43 |
-
st.markdown("Preview of some code files from Github repositories")
|
44 |
df = pd.read_csv("utils/data_preview.csv")
|
45 |
st.dataframe(df)
|
46 |
for model in selected_models:
|
@@ -57,7 +60,7 @@ elif selected_task == "Model architecture":
|
|
57 |
st.markdown(f"## {model}")
|
58 |
st.markdown(text)
|
59 |
if model == "InCoder":
|
60 |
-
st.image(
|
61 |
|
62 |
elif selected_task == "Model evaluation":
|
63 |
st.title("Code models evaluation π")
|
|
|
6 |
import pandas as pd
|
7 |
import requests
|
8 |
|
9 |
+
GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
|
10 |
+
INCODER_IMG = "https://huggingface.co/datasets/loubnabnl/repo-images/raw/main/incoder.png"
|
11 |
+
|
12 |
@st.cache(allow_output_mutation=True)
|
13 |
def load_tokenizer(model_ckpt):
|
14 |
return AutoTokenizer.from_pretrained(model_ckpt)
|
|
|
43 |
|
44 |
elif selected_task == "Pretraining datasets":
|
45 |
st.title("Pretraining datasets π")
|
46 |
+
st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
|
47 |
df = pd.read_csv("utils/data_preview.csv")
|
48 |
st.dataframe(df)
|
49 |
for model in selected_models:
|
|
|
60 |
st.markdown(f"## {model}")
|
61 |
st.markdown(text)
|
62 |
if model == "InCoder":
|
63 |
+
st.image(INCODER_IMG, caption="Figure 1: InCoder training", width=700)
|
64 |
|
65 |
elif selected_task == "Model evaluation":
|
66 |
st.title("Code models evaluation π")
|