Spaces:
Runtime error
Runtime error
charlesdedampierre
commited on
Commit
•
1b0a5d8
1
Parent(s):
2cf2fe6
Upload 8 files
Browse files- README.md +1 -1
- app.py +39 -0
- data/data_sample.csv +0 -0
- data/topics_info.csv +0 -0
- images/logo.png +0 -0
- images/map.png +0 -0
- images/map_prompt.html +0 -0
- images/pipeline.png +0 -0
README.md
CHANGED
@@ -10,4 +10,4 @@ pinned: false
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
|
app.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit.components.v1 as components
|
4 |
+
|
5 |
+
st.sidebar.image("images/logo.png", use_column_width=True)
|
6 |
+
st.sidebar.write("Bunka Summarizes & Visualizes Information as Maps using LLMs.")
|
7 |
+
st.sidebar.title("Github Page")
|
8 |
+
st.sidebar.write(
|
9 |
+
"Have a look at the following package on GitHub: https://github.com/charlesdedampierre/BunkaTopics"
|
10 |
+
)
|
11 |
+
st.sidebar.title("Dataset")
|
12 |
+
st.sidebar.write(
|
13 |
+
"We used a subset of Wikipedia dataset: https://huggingface.co/datasets/OpenAssistant/oasst2"
|
14 |
+
)
|
15 |
+
|
16 |
+
st.title("How to understand large textual datasets?")
|
17 |
+
|
18 |
+
df = pd.read_csv("data/data_sample.csv", index_col=[0])
|
19 |
+
df = df[["message_id", "text"]]
|
20 |
+
df = df.head(300)
|
21 |
+
st.dataframe(df, use_container_width=True)
|
22 |
+
st.title("Inside the OASST2 dataset")
|
23 |
+
element = open("images/map_prompt.html", "r", encoding="utf-8")
|
24 |
+
|
25 |
+
components.html(element.read(), height=900, width=900)
|
26 |
+
|
27 |
+
st.title("Some insights by territory")
|
28 |
+
df_info = pd.read_csv("data/topics_info.csv", index_col=[0])
|
29 |
+
df_info = df_info[["name", "size", "percent"]]
|
30 |
+
df_info["percent"] = df_info["percent"].apply(lambda x: str(int(x)) + "%")
|
31 |
+
df_info = df_info.reset_index(drop=True)
|
32 |
+
|
33 |
+
st.dataframe(df_info, use_container_width=True)
|
34 |
+
|
35 |
+
st.title("Bunka Exploration Engine")
|
36 |
+
st.image(
|
37 |
+
"images/pipeline.png",
|
38 |
+
use_column_width=True,
|
39 |
+
)
|
data/data_sample.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/topics_info.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/logo.png
ADDED
images/map.png
ADDED
images/map_prompt.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/pipeline.png
ADDED