en-gin-eer commited on
Commit
8683d51
1 Parent(s): fb9df53

Upload 4 files

Browse files
Files changed (4) hide show
  1. analysis.py +69 -0
  2. app.py +91 -0
  3. data.pkl +3 -0
  4. requirements.txt +7 -0
analysis.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import networkx as nx
5
+ import matplotlib.pyplot as plt
6
+
7
+ # ---------------- DATA EXTRACTION ----------------
8
+
9
+ def parse_page(page_name):
10
+ """Extract model and lora names from the given HTML page."""
11
+ with open(page_name, 'r', encoding='utf-8') as f:
12
+ html_content = f.read()
13
+ return extract_data_from_html(html_content)
14
+
15
+ def extract_data_from_html(html_content):
16
+ """Extract specific data from the embedded JSON within the HTML content."""
17
+ pattern = r'id="__NEXT_DATA__" type="application/json">(.*?)</script><script defer'
18
+ match = re.search(pattern, html_content)
19
+ if not match:
20
+ return None
21
+
22
+ json_string = match.group(1)
23
+ data_dict = json.loads(json_string)
24
+ return get_model_and_resources(data_dict)
25
+
26
+ def get_model_and_resources(data_dict):
27
+ """Retrieve model and associated resources from the parsed JSON data."""
28
+ model_name = data_dict['props']['pageProps']['trpcState']['json']['queries'][0]['state']['data']['meta']['Model']
29
+ lora_names = [r['name'] for r in data_dict['props']['pageProps']['trpcState']['json']['queries'][0]['state']['data']['meta']['resources'] if r['type'] == 'lora']
30
+ return model_name, lora_names
31
+
32
+ # ---------------- GRAPH CONSTRUCTION ----------------
33
+
34
+ def build_graph(data, degree_threshold):
35
+ """Build a bipartite graph from the data and prune nodes with degrees below the threshold."""
36
+ B = nx.Graph()
37
+
38
+ for page_name, (model, loras) in data.items():
39
+ B.add_node(model, bipartite=0)
40
+ for lora in loras:
41
+ B.add_node(lora, bipartite=1)
42
+ B.add_edge(model, lora, page=page_name.split('.')[0])
43
+
44
+ nodes_to_remove = [node for node, degree in dict(B.degree()).items() if degree < degree_threshold]
45
+ B.remove_nodes_from(nodes_to_remove)
46
+ return B
47
+
48
+
49
+ # ---------------- VISUALIZATION AND ANALYSIS ----------------
50
+
51
+ def visualize_bipartite(B):
52
+ """Visualize the bipartite graph."""
53
+ model_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
54
+ lora_nodes = set(B) - model_nodes
55
+ pos = nx.bipartite_layout(B, model_nodes)
56
+
57
+ plt.figure(figsize=(10, 5))
58
+ nx.draw(B, pos, with_labels=True, node_color=['#1f78b4' if node in model_nodes else '#33a02c' for node in B.nodes()])
59
+ plt.title("Bipartite Graph between Model Name and Lora Name")
60
+ plt.show()
61
+
62
+ def most_connected_models(B, top_n=10):
63
+ """List the most connected models in the bipartite graph."""
64
+ model_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
65
+ sorted_models = sorted(model_nodes, key=lambda x: B.degree(x), reverse=True)
66
+
67
+ for model in sorted_models[:top_n]:
68
+ loras = list(B.neighbors(model))
69
+ print(f"Model: {model}, Connected Loras: {loras}")
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import networkx as nx
4
+ import os
5
+ import pickle
6
+ import tqdm
7
+ from analysis import build_graph, parse_page
8
+
9
+ def clean(results):
10
+ new = {}
11
+ for k in results:
12
+ if results[k] and len(results[k]) > 0:
13
+ new[k] = results[k]
14
+ return new
15
+
16
+ # Your existing functions here...
17
+ if "B_degree_threshold" not in st.session_state:
18
+ st.session_state.B_degree_threshold = 10
19
+ if "B" not in st.session_state:
20
+ if not os.path.exists('data.pkl'):
21
+ page_folder = 'pages'
22
+ pages = os.listdir(page_folder)
23
+ results = {}
24
+
25
+ for p in tqdm.tqdm(pages):
26
+ try:
27
+ results[p] = parse_page(os.path.join(page_folder, p))
28
+ except Exception as e:
29
+ pass
30
+
31
+ with open('data.pkl', 'wb') as f:
32
+ pickle.dump(results, f)
33
+
34
+ else:
35
+ with open('data.pkl', 'rb') as f:
36
+ results = pickle.load(f)
37
+
38
+ st.session_state.results = clean(results)
39
+ st.session_state.B = build_graph(st.session_state.results, st.session_state.B_degree_threshold)
40
+
41
+
42
+
43
+ # Streamlit app
44
+ def main():
45
+ st.title("Model-Lora Relationship Viewer")
46
+
47
+ # Sidebar for degree_threshold
48
+ B_degree_threshold = st.sidebar.slider("Select Degree Threshold", 1, 100, 10)
49
+
50
+ # Build the graph
51
+ if B_degree_threshold != st.session_state.B_degree_threshold:
52
+ st.session_state.B_degree_threshold = B_degree_threshold
53
+ st.session_state.B = build_graph(st.session_state.results, B_degree_threshold)
54
+
55
+ st.sidebar.write(f"There are {len(st.session_state.B)} node analyzed.")
56
+
57
+ # Filter out model nodes and lora nodes
58
+ model_nodes = {n for n, d in st.session_state.B.nodes(data=True) if d['bipartite']==0}
59
+ lora_nodes = set(st.session_state.B) - model_nodes
60
+
61
+ # Sort model nodes and lora nodes based on their degree
62
+ sorted_models = sorted(model_nodes, key=lambda x: st.session_state.B.degree(x), reverse=True)
63
+ sorted_loras = sorted(lora_nodes, key=lambda x: st.session_state.B.degree(x), reverse=True)
64
+
65
+ # Model selection
66
+ selected_model = st.selectbox("Select Model (sorted by degree)", sorted_models)
67
+ if selected_model:
68
+ loras_for_model = list(st.session_state.B.neighbors(selected_model))
69
+ page_names_for_model = [st.session_state.B[selected_model][lora]['page'] for lora in loras_for_model]
70
+ page_names_for_model = ['https://civitai.com/images/'+page for page in page_names_for_model]
71
+
72
+ # Convert DataFrame to HTML with clickable links
73
+ df = pd.DataFrame({"Lora Names": loras_for_model, "Image Link": page_names_for_model})
74
+ df["Image Link"] = df["Image Link"].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
75
+ st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)
76
+
77
+ # Lora selection
78
+ selected_lora = st.selectbox("Select Lora (sorted by degree)", sorted_loras)
79
+ if selected_lora:
80
+ models_for_lora = list(st.session_state.B.neighbors(selected_lora))
81
+ page_names_for_lora = [st.session_state.B[model][selected_lora]['page'] for model in models_for_lora]
82
+ page_names_for_lora = ['https://civitai.com/images/'+page for page in page_names_for_lora]
83
+
84
+ # Convert DataFrame to HTML with clickable links
85
+ df = pd.DataFrame({"Model Names": models_for_lora, "Image Link": page_names_for_lora})
86
+ df["Image Link"] = df["Image Link"].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')
87
+ st.markdown(df.to_html(escape=False, index=False), unsafe_allow_html=True)
88
+
89
+
90
+ if __name__ == "__main__":
91
+ main()
data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab9484f2c5b8fa2cb8fdccc4aeb559da173a8e4de2c325fe8f7286544f8d528
3
+ size 2856767
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ matplotlib==3.7.0
2
+ networkx==3.0
3
+ pandas==1.5.3
4
+ requests==2.28.2
5
+ retrying==1.3.4
6
+ streamlit==1.25.0
7
+ tqdm==4.64.1