nyanko7 commited on
Commit
5c4ad21
0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files
Files changed (4) hide show
  1. .gitattributes +37 -0
  2. README.md +13 -0
  3. app.py +107 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ danbooru_all_tags.json filter=lfs diff=lfs merge=lfs -text
37
+ danbooru_id_url.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Danbooru Images
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.35.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ import json
5
+ import plotly.graph_objects as go
6
+
7
+ st.set_page_config(layout="wide")
8
+
9
+ @st.cache_resource
10
+ def load_and_preprocess_data():
11
+ start_time = time.time()
12
+ df = pd.read_parquet(os.getenv('PARQUET_FILE'))
13
+ df = df.sort_values(by='post_id', ascending=False)
14
+ df["tags"] = df["tags"].apply(lambda x: set(x))
15
+ df.set_index('post_id', inplace=True)
16
+
17
+ sorted_indices = {
18
+ 'Post ID (Descending)': df.index,
19
+ 'Post ID (Ascending)': df.index[::-1],
20
+ 'Clip Score': df['clip_aesthetic'].sort_values(ascending=False).index,
21
+ 'Siglip Score': df['clip_aesthetic_2_5'].sort_values(ascending=False).index,
22
+ }
23
+ print(f"Data loaded and preprocessed: {time.time() - start_time:.2f} seconds")
24
+ return df, sorted_indices
25
+
26
+ st.title('Danbooru Images')
27
+ data, sorted_indices = load_and_preprocess_data()
28
+
29
+ # isdebar
30
+ st.sidebar.header('Filter Options')
31
+ st.sidebar.write('Adjust the filter options to refine the results.')
32
+ score_range = st.sidebar.slider('Select clip score range', min_value=0.0, max_value=10.0, value=(0.0, 10.0), step=0.1, help='Filter images based on their CLIP score range.')
33
+ score_range_v2 = st.sidebar.slider('Select siglip score range', min_value=0.0, max_value=10.0, value=(6.0, 10.0), step=0.1, help='Filter images based on their SigLIP score range.')
34
+ page_number = st.sidebar.number_input('Page', min_value=1, value=1, step=1, help='Navigate through the pages of filtered results.')
35
+ sort_option = st.sidebar.selectbox('Sort by (slow)', options=['Post ID (Descending)', 'Post ID (Ascending)', 'Clip Score', 'Siglip Score'], help='Select sorting option for the results.')
36
+
37
+ # user input
38
+ user_input_tags = st.text_input('Enter tags (space-separated)', help='Filter images based on tags. Use "-" to exclude tags.')
39
+ selected_tags = set([tag.strip() for tag in user_input_tags.split() if tag.strip() and not tag.strip().startswith('-')])
40
+ undesired_tags = set([tag[1:] for tag in user_input_tags.split() if tag.startswith('-')])
41
+ print(f"Selected tags: {selected_tags}, Undesired tags: {undesired_tags}")
42
+
43
+ # Function to filter data based on user input
44
+ def filter_data(df, score_range, score_range_v2, selected_tags, sort_option):
45
+ start_time = time.time()
46
+
47
+ filtered_data = df[
48
+ (df['clip_aesthetic'] >= score_range[0]) &
49
+ (df['clip_aesthetic'] <= score_range[1]) &
50
+ (df['clip_aesthetic_2_5'] >= score_range_v2[0]) &
51
+ (df['clip_aesthetic_2_5'] <= score_range_v2[1])
52
+ ]
53
+ print(f"Data filtered based on scores: {time.time() - start_time:.2f} seconds")
54
+
55
+ if sort_option != "Post ID (Descending)":
56
+ sorted_index = sorted_indices[sort_option]
57
+ sorted_index = sorted_index[sorted_index.isin(filtered_data.index)]
58
+ filtered_data = filtered_data.loc[sorted_index]
59
+ print(f"Applying indcies: {time.time() - start_time:.2f} seconds")
60
+
61
+ if selected_tags or undesired_tags:
62
+ filtered_data = filtered_data[filtered_data['tags'].apply(lambda x: selected_tags.issubset(x) and not undesired_tags.intersection(x))]
63
+
64
+ print(f"Data filtered: {time.time() - start_time:.2f} seconds")
65
+ return filtered_data
66
+
67
+ # Filter data
68
+ filtered_data = filter_data(data, score_range, score_range_v2, selected_tags, sort_option)
69
+ st.sidebar.write(f"Total filtered images: {len(filtered_data)}")
70
+
71
+ # Pagination
72
+ items_per_page = 30
73
+ start_idx = (page_number - 1) * items_per_page
74
+ end_idx = start_idx + items_per_page
75
+ current_data = filtered_data.iloc[start_idx:end_idx]
76
+
77
+ # Display the data
78
+ columns_per_row = 5
79
+ rows = [current_data.iloc[i:i + columns_per_row] for i in range(0, len(current_data), columns_per_row)]
80
+ for row in rows:
81
+ cols = st.columns(columns_per_row)
82
+ for col, (_, row_data) in zip(cols, row.iterrows()):
83
+ with col:
84
+ st.image(row_data['large_file_url'], caption=f"ID: {row_data.name}, CLIP: {row_data['clip_aesthetic']:.2f}, SigLIP: {row_data['clip_aesthetic_2_5']:.2f}", use_column_width=True)
85
+ # st.markdown("<div style='margin: 2px;'></div>", unsafe_allow_html=True)
86
+
87
+ def histogram_slider(df, column1, column2):
88
+ sample_data = df.sample(min(5000, len(df)))
89
+
90
+ fig = go.Figure()
91
+ fig.add_trace(go.Histogram(x=sample_data[column1], nbinsx=50, name=column1, opacity=0.75))
92
+ fig.add_trace(go.Histogram(x=sample_data[column2], nbinsx=50, name=column2, opacity=0.75))
93
+ fig.update_layout(
94
+ barmode='overlay',
95
+ bargap=0.1,
96
+ height=200,
97
+ xaxis=dict(showticklabels=True),
98
+ yaxis=dict(showticklabels=True),
99
+ margin=dict(l=0, r=0, t=0, b=0),
100
+ legend=dict(orientation='h', yanchor='bottom', y=-0.4, xanchor='center', x=0.5),
101
+ )
102
+ st.sidebar.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
103
+
104
+ # histogram
105
+ start_time = time.time()
106
+ histogram_slider(filtered_data, 'clip_aesthetic', 'clip_aesthetic_2_5')
107
+ print(f"Histogram displayed: {time.time() - start_time:.2f} seconds")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pyarrow>=16.0.0
2
+ pandas>=2.2.2
3
+ plotly