Spaces:

HawkClaws
/

llm_stracture_diff

Sleeping

App Files Files Community

HawkClaws commited on May 22, 2024

Commit

d28e9af

verified ·

1 Parent(s): 5654dac

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -62

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import difflib
 import requests
 import os
@@ -8,60 +8,73 @@ import json
 FIREBASE_URL = os.getenv("FIREBASE_URL")
 def fetch_from_firebase(model_id):
     response = requests.get(f"{FIREBASE_URL}/model_structures/{model_id}.json")
     if response.status_code == 200:
         return response.json()
     return None
 def save_to_firebase(model_id, structure):
-    response = requests.put(f"{FIREBASE_URL}/model_structures/{model_id}.json", data=json.dumps(structure))
     return response.status_code == 200
-def get_model_structure(model_id):
-    structure = fetch_from_firebase(model_id)
-    if structure:
-        return structure
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16,
         device_map="cpu",
     )
     structure = {k: str(v.shape) for k, v in model.state_dict().items()}
-    save_to_firebase(model_id, structure)
-    return structure
-def compare_structures(struct1, struct2):
-    struct1_lines = [f"{k}: {v}" for k, v in struct1.items()]
-    struct2_lines = [f"{k}: {v}" for k, v in struct2.items()]
     diff = difflib.ndiff(struct1_lines, struct2_lines)
     return diff
 def display_diff(diff):
     left_lines = []
     right_lines = []
     diff_found = False
     for line in diff:
-        if line.startswith('- '):
-            left_lines.append(f'<span style="background-color: #ffdddd;">{line[2:]}</span>')
-            right_lines.append('')
             diff_found = True
-        elif line.startswith('+ '):
-            right_lines.append(f'<span style="background-color: #ddffdd;">{line[2:]}</span>')
-            left_lines.append('')
             diff_found = True
-        elif line.startswith('  '):
             left_lines.append(line[2:])
             right_lines.append(line[2:])
         else:
             pass
     left_html = "<br>".join(left_lines)
     right_html = "<br>".join(right_lines)
     return left_html, right_html, diff_found
 # Set Streamlit page configuration to wide mode
 st.set_page_config(layout="wide")
@@ -79,50 +92,30 @@ st.markdown(
     }
     </style>
     """,
-    unsafe_allow_html=True
 )
 st.title("Model Structure Comparison Tool")
 model_id1 = st.text_input("Enter the first HuggingFace Model ID")
 model_id2 = st.text_input("Enter the second HuggingFace Model ID")
-if "compare_button_clicked" not in st.session_state:
-    st.session_state.compare_button_clicked = False
-if st.session_state.compare_button_clicked:
-    with st.spinner('Comparing models and loading tokenizers...'):
-        if model_id1 and model_id2:
-            struct1 = get_model_structure(model_id1)
-            struct2 = get_model_structure(model_id2)
-            diff = compare_structures(struct1, struct2)
-            left_html, right_html, diff_found = display_diff(diff)
-            st.write("### Comparison Result")
-            if not diff_found:
-                st.success("The model structures are identical.")
-            col1, col2 = st.columns([1.5, 1.5])  # Adjust the ratio to make columns wider
-            with col1:
-                st.write("### Model 1")
-                st.markdown(left_html, unsafe_allow_html=True)
-            with col2:
-                st.write("### Model 2")
-                st.markdown(right_html, unsafe_allow_html=True)
-            # Tokenizer verification
-            try:
-                tokenizer1 = AutoTokenizer.from_pretrained(model_id1)
-                tokenizer2 = AutoTokenizer.from_pretrained(model_id2)
-                st.write(f"**{model_id1} Tokenizer Vocab Size**: {tokenizer1.vocab_size}")
-                st.write(f"**{model_id2} Tokenizer Vocab Size**: {tokenizer2.vocab_size}")
-            except Exception as e:
-                st.error(f"Error loading tokenizers: {e}")
-        else:
-            st.error("Please enter both model IDs.")
-        st.session_state.compare_button_clicked = False
-else:
-    if st.button("Compare Models"):
-        st.session_state.compare_button_clicked = True

 import streamlit as st
 import torch
+from transformers import AutoModelForCausalLM
 import difflib
 import requests
 import os
 FIREBASE_URL = os.getenv("FIREBASE_URL")
 def fetch_from_firebase(model_id):
     response = requests.get(f"{FIREBASE_URL}/model_structures/{model_id}.json")
     if response.status_code == 200:
         return response.json()
     return None
 def save_to_firebase(model_id, structure):
+    response = requests.put(
+        f"{FIREBASE_URL}/model_structures/{model_id}.json", data=json.dumps(structure)
+    )
     return response.status_code == 200
+def get_model_structure(model_id) -> list[str]:
+    struct_lines = fetch_from_firebase(model_id)
+    if struct_lines:
+        return struct_lines
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16,
         device_map="cpu",
     )
     structure = {k: str(v.shape) for k, v in model.state_dict().items()}
+    struct_lines = [f"{k}: {v}" for k, v in structure.items()]
+    save_to_firebase(model_id, struct_lines)
+    return struct_lines
+def compare_structures(struct1_lines: list[str], struct2_lines: list[str]):
+    # struct1_lines = [f"{k}: {v}" for k, v in struct1.items()]
+    # struct2_lines = [f"{k}: {v}" for k, v in struct2.items()]
     diff = difflib.ndiff(struct1_lines, struct2_lines)
     return diff
 def display_diff(diff):
     left_lines = []
     right_lines = []
     diff_found = False
     for line in diff:
+        if line.startswith("- "):
+            left_lines.append(
+                f'<span style="background-color: #ffdddd;">{line[2:]}</span>'
+            )
+            right_lines.append("")
             diff_found = True
+        elif line.startswith("+ "):
+            right_lines.append(
+                f'<span style="background-color: #ddffdd;">{line[2:]}</span>'
+            )
+            left_lines.append("")
             diff_found = True
+        elif line.startswith("  "):
             left_lines.append(line[2:])
             right_lines.append(line[2:])
         else:
             pass
     left_html = "<br>".join(left_lines)
     right_html = "<br>".join(right_lines)
     return left_html, right_html, diff_found
 # Set Streamlit page configuration to wide mode
 st.set_page_config(layout="wide")
     }
     </style>
     """,
+    unsafe_allow_html=True,
 )
 st.title("Model Structure Comparison Tool")
 model_id1 = st.text_input("Enter the first HuggingFace Model ID")
 model_id2 = st.text_input("Enter the second HuggingFace Model ID")
+if model_id1 and model_id2:
+    struct1 = get_model_structure(model_id1)
+    struct2 = get_model_structure(model_id2)
+    diff = compare_structures(struct1, struct2)
+    left_html, right_html, diff_found = display_diff(diff)
+    st.write("### Comparison Result")
+    if not diff_found:
+        st.success("The model structures are identical.")
+    col1, col2 = st.columns([1.5, 1.5])  # Adjust the ratio to make columns wider
+    with col1:
+        st.write("### Model 1")
+        st.markdown(left_html, unsafe_allow_html=True)
+    with col2:
+        st.write("### Model 2")
+        st.markdown(right_html, unsafe_allow_html=True)