Spaces:

Mattral
/

Excel-Match-Analysis

Sleeping

App Files Files Community

Mattral commited on Apr 26, 2024

Commit

e2e62c4

verified ·

1 Parent(s): 3c68af2

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -38

app.py CHANGED Viewed

@@ -63,36 +63,6 @@ def find_exact_matches(df1, df2, column_name):
-def find_similar_texts2(df1, df2, column_name, exact_matches, threshold=0.3):
-    # Find rows with similar texts in the specified column, excluding exact matches
-    similar_texts = []
-    exact_match_indices = set(exact_matches.index.tolist())
-    # Concatenate texts from both dataframes
-    all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
-    # Compute TF-IDF vectors
-    vectorizer = TfidfVectorizer()
-    tfidf_matrix = vectorizer.fit_transform(all_texts)
-    # Compute cosine similarity matrix
-    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
-    # Iterate over pairs of rows to find similar texts
-    for i, row1 in df1.iterrows():
-        for j, row2 in df2.iterrows():
-            if i not in exact_match_indices and j not in exact_match_indices:
-                similarity = similarity_matrix[i, len(df1) + j]
-                if similarity =1:  # EXact matches
-                    # Calculate Levenshtein distance between strings
-                    distance = levenshtein_distance(row1[column_name], row2[column_name])
-                    max_length = max(len(row1[column_name]), len(row2[column_name]))
-                    similarity_score = 1 - (distance / max_length)
-                    if similarity_score >= threshold:
-                        similar_texts.append((i, j, row1[column_name], row2[column_name]))
-    return similar_texts2
 def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.3):
     # Find rows with similar texts in the specified column, excluding exact matches
@@ -142,7 +112,6 @@ def main():
         warehouse_columns = warehouse_df.columns.tolist()
         industry_columns = industry_df.columns.tolist()
         # Select columns using dropdowns
         st.header("Select Columns")
         warehouse_column = st.selectbox("Choose column from warehouse item stocks:", warehouse_columns)
@@ -155,7 +124,6 @@ def main():
             # Find similar texts
             similar_texts = find_similar_texts(warehouse_df, industry_df, warehouse_column, exact_matches)
-            similar_texts2 = find_similar_texts(warehouse_df, industry_df, warehouse_column, exact_matches)
             # Display results
             st.header("Exact Matches")
@@ -169,12 +137,6 @@ def main():
                 st.write(f"Industry: {text_pair[3]}")
                 st.write
-            st.header("Exactly Same Texts")
-            for text_pair in similar_texts2:
-                st.write(f"Row {text_pair[0]} in warehouse item stocks is the same as Row {text_pair[1]} in industry item stocks:")
-                st.write(f"Warehouse: {text_pair[2]}")
-                st.write(f"Industry: {text_pair[3]}")
-                st.write
 if __name__ == "__main__":

 def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.3):
     # Find rows with similar texts in the specified column, excluding exact matches
         warehouse_columns = warehouse_df.columns.tolist()
         industry_columns = industry_df.columns.tolist()
         # Select columns using dropdowns
         st.header("Select Columns")
         warehouse_column = st.selectbox("Choose column from warehouse item stocks:", warehouse_columns)
             # Find similar texts
             similar_texts = find_similar_texts(warehouse_df, industry_df, warehouse_column, exact_matches)
             # Display results
             st.header("Exact Matches")
                 st.write(f"Industry: {text_pair[3]}")
                 st.write
 if __name__ == "__main__":