Spaces:
Sleeping
Sleeping
Commit
·
84952b3
1
Parent(s):
d934fdc
display a sample
Browse files
app.py
CHANGED
@@ -15,12 +15,13 @@ st.sidebar.write(
|
|
15 |
|
16 |
st.title("How to understand large textual datasets?")
|
17 |
st.info(
|
18 |
-
"We randomly sampled 40,000 articles from the English subset 20231101.en of the Wikipedia dataset. We then took the first 500 words of each articles in order to generate an abstract that will be used for topic modeling."
|
19 |
)
|
20 |
|
21 |
|
22 |
df = pd.read_csv("data/data_sample_wikipedia.csv", index_col=[0])
|
23 |
df = df[["text", "url"]]
|
|
|
24 |
st.dataframe(df, use_container_width=True)
|
25 |
|
26 |
|
|
|
15 |
|
16 |
st.title("How to understand large textual datasets?")
|
17 |
st.info(
|
18 |
+
"We randomly sampled 40,000 articles from the English subset 20231101.en of the Wikipedia dataset. We then took the first 500 words of each articles in order to generate an abstract that will be used for topic modeling. Here is a sample:"
|
19 |
)
|
20 |
|
21 |
|
22 |
df = pd.read_csv("data/data_sample_wikipedia.csv", index_col=[0])
|
23 |
df = df[["text", "url"]]
|
24 |
+
df = df.head(100)
|
25 |
st.dataframe(df, use_container_width=True)
|
26 |
|
27 |
|