Commit
•
8683d51
1
Parent(s):
fb9df53
Upload 4 files
Browse files- analysis.py +69 -0
- app.py +91 -0
- data.pkl +3 -0
- requirements.txt +7 -0
analysis.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import json
|
4 |
+
import networkx as nx
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
+
# ---------------- DATA EXTRACTION ----------------
|
8 |
+
|
9 |
+
def parse_page(page_name):
|
10 |
+
"""Extract model and lora names from the given HTML page."""
|
11 |
+
with open(page_name, 'r', encoding='utf-8') as f:
|
12 |
+
html_content = f.read()
|
13 |
+
return extract_data_from_html(html_content)
|
14 |
+
|
15 |
+
def extract_data_from_html(html_content):
|
16 |
+
"""Extract specific data from the embedded JSON within the HTML content."""
|
17 |
+
pattern = r'id="__NEXT_DATA__" type="application/json">(.*?)</script><script defer'
|
18 |
+
match = re.search(pattern, html_content)
|
19 |
+
if not match:
|
20 |
+
return None
|
21 |
+
|
22 |
+
json_string = match.group(1)
|
23 |
+
data_dict = json.loads(json_string)
|
24 |
+
return get_model_and_resources(data_dict)
|
25 |
+
|
26 |
+
def get_model_and_resources(data_dict):
|
27 |
+
"""Retrieve model and associated resources from the parsed JSON data."""
|
28 |
+
model_name = data_dict['props']['pageProps']['trpcState']['json']['queries'][0]['state']['data']['meta']['Model']
|
29 |
+
lora_names = [r['name'] for r in data_dict['props']['pageProps']['trpcState']['json']['queries'][0]['state']['data']['meta']['resources'] if r['type'] == 'lora']
|
30 |
+
return model_name, lora_names
|
31 |
+
|
32 |
+
# ---------------- GRAPH CONSTRUCTION ----------------
|
33 |
+
|
34 |
+
def build_graph(data, degree_threshold):
|
35 |
+
"""Build a bipartite graph from the data and prune nodes with degrees below the threshold."""
|
36 |
+
B = nx.Graph()
|
37 |
+
|
38 |
+
for page_name, (model, loras) in data.items():
|
39 |
+
B.add_node(model, bipartite=0)
|
40 |
+
for lora in loras:
|
41 |
+
B.add_node(lora, bipartite=1)
|
42 |
+
B.add_edge(model, lora, page=page_name.split('.')[0])
|
43 |
+
|
44 |
+
nodes_to_remove = [node for node, degree in dict(B.degree()).items() if degree < degree_threshold]
|
45 |
+
B.remove_nodes_from(nodes_to_remove)
|
46 |
+
return B
|
47 |
+
|
48 |
+
|
49 |
+
# ---------------- VISUALIZATION AND ANALYSIS ----------------
|
50 |
+
|
51 |
+
def visualize_bipartite(B):
|
52 |
+
"""Visualize the bipartite graph."""
|
53 |
+
model_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
|
54 |
+
lora_nodes = set(B) - model_nodes
|
55 |
+
pos = nx.bipartite_layout(B, model_nodes)
|
56 |
+
|
57 |
+
plt.figure(figsize=(10, 5))
|
58 |
+
nx.draw(B, pos, with_labels=True, node_color=['#1f78b4' if node in model_nodes else '#33a02c' for node in B.nodes()])
|
59 |
+
plt.title("Bipartite Graph between Model Name and Lora Name")
|
60 |
+
plt.show()
|
61 |
+
|
62 |
+
def most_connected_models(B, top_n=10):
|
63 |
+
"""List the most connected models in the bipartite graph."""
|
64 |
+
model_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
|
65 |
+
sorted_models = sorted(model_nodes, key=lambda x: B.degree(x), reverse=True)
|
66 |
+
|
67 |
+
for model in sorted_models[:top_n]:
|
68 |
+
loras = list(B.neighbors(model))
|
69 |
+
print(f"Model: {model}, Connected Loras: {loras}")
|
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import networkx as nx
|
4 |
+
import os
|
5 |
+
import pickle
|
6 |
+
import tqdm
|
7 |
+
from analysis import build_graph, parse_page
|
8 |
+
|
9 |
+
def clean(results):
|
10 |
+
new = {}
|
11 |
+
for k in results:
|
12 |
+
if results[k] and len(results[k]) > 0:
|
13 |
+
new[k] = results[k]
|
14 |
+
return new
|
15 |
+
|
16 |
+
# Your existing functions here...
|
17 |
+
if "B_degree_threshold" not in st.session_state:
|
18 |
+
st.session_state.B_degree_threshold = 10
|
19 |
+
if "B" not in st.session_state:
|
20 |
+
if not os.path.exists('data.pkl'):
|
21 |
+
page_folder = 'pages'
|
22 |
+
pages = os.listdir(page_folder)
|
23 |
+
results = {}
|
24 |
+
|
25 |
+
for p in tqdm.tqdm(pages):
|
26 |
+
try:
|
27 |
+
results[p] = parse_page(os.path.join(page_folder, p))
|
28 |
+
except Exception as e:
|
29 |
+
pass
|
30 |
+
|
31 |
+
with open('data.pkl', 'wb') as f:
|
32 |
+
pickle.dump(results, f)
|
33 |
+
|
34 |
+
else:
|
35 |
+
with open('data.pkl', 'rb') as f:
|
36 |
+
results = pickle.load(f)
|
37 |
+
|
38 |
+
st.session_state.results = clean(results)
|
39 |
+
st.session_state.B = build_graph(st.session_state.results, st.session_state.B_degree_threshold)
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
# Streamlit app
|
44 |
+
def main():
|
45 |
+
st.title("Model-Lora Relationship Viewer")
|
46 |
+
|
47 |
+
# Sidebar for degree_threshold
|
48 |
+
B_degree_threshold = st.sidebar.slider("Select Degree Threshold", 1, 100, 10)
|
49 |
+
|
50 |
+
# Build the graph
|
51 |
+
if B_degree_threshold != st.session_state.B_degree_threshold:
|
52 |
+
st.session_state.B_degree_threshold = B_degree_threshold
|
53 |
+
st.session_state.B = build_graph(st.session_state.results, B_degree_threshold)
|
54 |
+
|
55 |
+
st.sidebar.write(f"There are {len(st.session_state.B)} node analyzed.")
|
56 |
+
|
57 |
+
# Filter out model nodes and lora nodes
|
58 |
+
model_nodes = {n for n, d in st.session_state.B.nodes(data=True) if d['bipartite']==0}
|
59 |
+
lora_nodes = set(st.session_state.B) - model_nodes
|
60 |
+
|
61 |
+
# Sort model nodes and lora nodes based on their degree
|
62 |
+
sorted_models = sorted(model_nodes, key=lambda x: st.session_state.B.degree(x), reverse=True)
|
63 |
+
sorted_loras = sorted(lora_nodes, key=lambda x: st.session_state.B.degree(x), reverse=True)
|
64 |
+
|
65 |
+
# Model selection
|
66 |
+
selected_model = st.selectbox("Select Model (sorted by degree)", sorted_models)
|
67 |
+
if selected_model:
|
68 |
+
loras_for_model = list(st.session_state.B.neighbors(selected_model))
|
69 |
+
page_names_for_model = [st.session_state.B[selected_model][lora]['page'] for lora in loras_for_model]
|
70 |
+
page_names_for_model = ['https://civitai.com/images/'+page for page in page_names_for_model]
|
71 |
+
|
72 |
+
# Convert DataFrame to HTML with clickable links
|
73 |
+
df = pd.DataFrame({"Lora Names": loras_for_model, "Image Link": page_names_for_model})
|
74 |
+
df["Image Link"] = df["Image Link"].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
|
75 |
+
st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)
|
76 |
+
|
77 |
+
# Lora selection
|
78 |
+
selected_lora = st.selectbox("Select Lora (sorted by degree)", sorted_loras)
|
79 |
+
if selected_lora:
|
80 |
+
models_for_lora = list(st.session_state.B.neighbors(selected_lora))
|
81 |
+
page_names_for_lora = [st.session_state.B[model][selected_lora]['page'] for model in models_for_lora]
|
82 |
+
page_names_for_lora = ['https://civitai.com/images/'+page for page in page_names_for_lora]
|
83 |
+
|
84 |
+
# Convert DataFrame to HTML with clickable links
|
85 |
+
df = pd.DataFrame({"Model Names": models_for_lora, "Image Link": page_names_for_lora})
|
86 |
+
df["Image Link"] = df["Image Link"].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
|
87 |
+
st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
main()
|
data.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ab9484f2c5b8fa2cb8fdccc4aeb559da173a8e4de2c325fe8f7286544f8d528
|
3 |
+
size 2856767
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib==3.7.0
|
2 |
+
networkx==3.0
|
3 |
+
pandas==1.5.3
|
4 |
+
requests==2.28.2
|
5 |
+
retrying==1.3.4
|
6 |
+
streamlit==1.25.0
|
7 |
+
tqdm==4.64.1
|