Spaces:

Jayesh13
/

Protein_repeat_comparator

Sleeping

File size: 3,621 Bytes

5cd21b9
 
 
b57c1d3
 
26343ed
b57c1d3
 
 
2114be5
b57c1d3
 
 
 
 
 
2c0f1ac
 
5223b02
128ce67
 
 
b533841
 
5a2a00f
a0d5a6c
5296403
5223b02
5296403
5223b02
 
 
128ce67
 
5223b02
5296403
128ce67
5223b02
5296403
 
 
128ce67
2114be5
5296403
128ce67
5223b02
2114be5
 
 
 
 
 
5223b02
5296403
b533841
 
2c0f1ac
5223b02
 
5a2a00f
 
5223b02
 
5296403
 
a0d5a6c
5223b02
2114be5
 
 
 
 
5a2a00f
2114be5
5a2a00f
2114be5
 
 
 
 
5a2a00f
 
 
 
2114be5
a0d5a6c
128ce67
5a2a00f
a0d5a6c
5223b02
a0d5a6c
2114be5
a0d5a6c
5a2a00f
a0d5a6c
 
 
5296403
26343ed
 
50c6839

import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")

import streamlit as st
import pandas as pd
from io import BytesIO

st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
st.title("🧬 Protein Repeat Comparator")
st.write("Upload two Excel files with protein data. Frequency values should start from the first row (header).")

uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])

if uploaded_file1 and uploaded_file2:
    try:
        df1 = pd.read_excel(uploaded_file1, header=0)
        df2 = pd.read_excel(uploaded_file2, header=0)

        df1.columns = df1.columns.astype(str)
        df2.columns = df2.columns.astype(str)

        id_col = df1.columns[0]
        name_col = df1.columns[1]
        repeat_columns = df1.columns[2:]

        differences = []

        for _, row1 in df1.iterrows():
            entry_id = row1[id_col]
            protein_name = row1[name_col]

            row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
            if row2_match.empty:
                continue

            row2 = row2_match.iloc[0]

            for repeat_col in repeat_columns:
                freq1 = row1[repeat_col]
                freq2 = row2[repeat_col]

                if pd.isna(freq1) or pd.isna(freq2):
                    continue

                if freq1 != freq2:
                    if freq1 == 0:
                        pct_change = "Infinity"
                    else:
                        pct_change = ((freq2 - freq1) / freq1) * 100
                        pct_change = round(pct_change, 2)

                    diff = abs(freq1 - freq2)
                    differences.append({
                        id_col: entry_id,
                        name_col: protein_name,
                        "Repeat": repeat_col,
                        "Frequency File 1": freq1,
                        "Frequency File 2": freq2,
                        "Difference": diff,
                        "%age Change": pct_change
                    })

        if differences:
            result_df = pd.DataFrame(differences)
            result_df = result_df.sort_values(by="Difference", ascending=False)

            # Show DataFrame in Streamlit app
            st.subheader("🔍 View Changed Repeats")
            st.dataframe(result_df, use_container_width=True)

            # Apply styling
            def color_pct(val):
                if isinstance(val, str) and val == "Infinity":
                    return 'color: green'
                elif isinstance(val, (int, float)):
                    if val > 0:
                        return 'color: green'
                    elif val < 0:
                        return 'color: red'
                return ''

            styled_df = result_df.style.applymap(color_pct, subset=["%age Change"])

            # Save styled output
            output = BytesIO()
            with pd.ExcelWriter(output, engine='openpyxl') as writer:
                styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats")
            output.seek(0)

            st.download_button(
                label="📥 Download Excel File",
                data=output,
                file_name="changed_repeats_with_percentage.xlsx",
                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            )
        else:
            st.info("No changes in repeat frequencies were found.")

    except Exception as e:
        st.error(f"⚠ Error: {e}")