Spaces:
Sleeping
Sleeping
import pandas as pd | |
import streamlit as st | |
from difflib import SequenceMatcher | |
def read_csv_or_excel(file): | |
# Read CSV or Excel file | |
if file.name.endswith('.csv'): | |
return pd.read_csv(file) | |
elif file.name.endswith('.xlsx') or file.name.endswith('.xls'): | |
return pd.read_excel(file) | |
else: | |
raise ValueError("Unsupported file format. Only CSV and Excel files are supported.") | |
def find_exact_matches(df1, df2, column_name): | |
# Find rows with exact matches in the specified column | |
matches = pd.merge(df1, df2, on=column_name, how='inner') | |
return matches | |
def find_similar_texts(df1, df2, column_name, threshold=0.8): | |
# Find rows with similar texts in the specified column | |
similar_texts = [] | |
for index1, row1 in df1.iterrows(): | |
for index2, row2 in df2.iterrows(): | |
similarity = SequenceMatcher(None, str(row1[column_name]), str(row2[column_name])).ratio() | |
if similarity >= threshold: | |
similar_texts.append((index1, index2, row1[column_name], row2[column_name])) | |
return similar_texts | |
def main(): | |
st.title("Item Comparison App") | |
# Upload files | |
st.header("Upload Files") | |
warehouse_file = st.file_uploader("Upload Warehouse Item Stocks (CSV or Excel)") | |
industry_file = st.file_uploader("Upload Industry Item Stocks (CSV or Excel)") | |
if warehouse_file is not None and industry_file is not None: | |
# Read files | |
warehouse_df = read_csv_or_excel(warehouse_file) | |
industry_df = read_csv_or_excel(industry_file) | |
# Get column names | |
warehouse_columns = warehouse_df.columns.tolist() | |
industry_columns = industry_df.columns.tolist() | |
# Select columns using dropdowns | |
st.header("Select Columns") | |
warehouse_column = st.selectbox("Choose column from warehouse item stocks:", warehouse_columns) | |
industry_column = st.selectbox("Choose column from industry item stocks:", industry_columns) | |
# Find exact matches | |
exact_matches = find_exact_matches(warehouse_df, industry_df, warehouse_column) | |
# Find similar texts | |
similar_texts = find_similar_texts(warehouse_df, industry_df, warehouse_column) | |
# Display results | |
st.header("Exact Matches") | |
st.write(exact_matches) | |
st.header("Similar Texts") | |
for text_pair in similar_texts: | |
st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:") | |
st.write(f"Warehouse: {text_pair[2]}") | |
st.write(f"Industry: {text_pair[3]}") | |
st.write("") | |
if __name__ == "__main__": | |
main() | |