Spaces:

HawkClaws
/

llm_stracture_diff

Sleeping

App Files Files Community

HawkClaws commited on May 22, 2024

Commit

cdee752

verified ·

1 Parent(s): 74debf3

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -2

app.py CHANGED Viewed

@@ -103,9 +103,11 @@ model_id2 = st.text_input("Enter the second HuggingFace Model ID")
 if st.button("Compare Models"):
     with st.spinner('Comparing models and loading tokenizers...'):
         if model_id1 and model_id2:
             struct1 = get_model_structure(model_id1)
             struct2 = get_model_structure(model_id2)
             diff = compare_structures(struct1, struct2)
             left_html, right_html, diff_found = display_diff(diff)
@@ -116,19 +118,26 @@ if st.button("Compare Models"):
             col1, col2 = st.columns([1.5, 1.5])  # Adjust the ratio to make columns wider
             with col1:
-                st.write("### Model 1")
                 st.markdown(left_html, unsafe_allow_html=True)
             with col2:
-                st.write("### Model 2")
                 st.markdown(right_html, unsafe_allow_html=True)
             # Tokenizer verification
             try:
                 vocab_size1 = get_tokenizer_vocab_size(model_id1)
                 vocab_size2 = get_tokenizer_vocab_size(model_id2)
                 st.write(f"**{model_id1} Tokenizer Vocab Size**: {vocab_size1}")
                 st.write(f"**{model_id2} Tokenizer Vocab Size**: {vocab_size2}")
             except Exception as e:
                 st.error(f"Error loading tokenizers: {e}")
         else:

 if st.button("Compare Models"):
     with st.spinner('Comparing models and loading tokenizers...'):
         if model_id1 and model_id2:
+            # Get model structures
             struct1 = get_model_structure(model_id1)
             struct2 = get_model_structure(model_id2)
+            # Compare model structures
             diff = compare_structures(struct1, struct2)
             left_html, right_html, diff_found = display_diff(diff)
             col1, col2 = st.columns([1.5, 1.5])  # Adjust the ratio to make columns wider
             with col1:
+                st.write(f"### Model 1: {model_id1}")
                 st.markdown(left_html, unsafe_allow_html=True)
             with col2:
+                st.write(f"### Model 2: {model_id2}")
                 st.markdown(right_html, unsafe_allow_html=True)
             # Tokenizer verification
             try:
                 vocab_size1 = get_tokenizer_vocab_size(model_id1)
                 vocab_size2 = get_tokenizer_vocab_size(model_id2)
+                if vocab_size1 == vocab_size2:
+                    st.success("The tokenizer vocab sizes are identical.")
+                else:
+                    st.warning("The tokenizer vocab sizes are different.")
                 st.write(f"**{model_id1} Tokenizer Vocab Size**: {vocab_size1}")
                 st.write(f"**{model_id2} Tokenizer Vocab Size**: {vocab_size2}")
             except Exception as e:
                 st.error(f"Error loading tokenizers: {e}")
         else: