Spaces:
Sleeping
Sleeping
File size: 3,621 Bytes
5cd21b9 b57c1d3 26343ed b57c1d3 2114be5 b57c1d3 2c0f1ac 5223b02 128ce67 b533841 5a2a00f a0d5a6c 5296403 5223b02 5296403 5223b02 128ce67 5223b02 5296403 128ce67 5223b02 5296403 128ce67 2114be5 5296403 128ce67 5223b02 2114be5 5223b02 5296403 b533841 2c0f1ac 5223b02 5a2a00f 5223b02 5296403 a0d5a6c 5223b02 2114be5 5a2a00f 2114be5 5a2a00f 2114be5 5a2a00f 2114be5 a0d5a6c 128ce67 5a2a00f a0d5a6c 5223b02 a0d5a6c 2114be5 a0d5a6c 5a2a00f a0d5a6c 5296403 26343ed 50c6839 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")
import streamlit as st
import pandas as pd
from io import BytesIO
st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
st.title("𧬠Protein Repeat Comparator")
st.write("Upload two Excel files with protein data. Frequency values should start from the first row (header).")
uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
if uploaded_file1 and uploaded_file2:
try:
df1 = pd.read_excel(uploaded_file1, header=0)
df2 = pd.read_excel(uploaded_file2, header=0)
df1.columns = df1.columns.astype(str)
df2.columns = df2.columns.astype(str)
id_col = df1.columns[0]
name_col = df1.columns[1]
repeat_columns = df1.columns[2:]
differences = []
for _, row1 in df1.iterrows():
entry_id = row1[id_col]
protein_name = row1[name_col]
row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
if row2_match.empty:
continue
row2 = row2_match.iloc[0]
for repeat_col in repeat_columns:
freq1 = row1[repeat_col]
freq2 = row2[repeat_col]
if pd.isna(freq1) or pd.isna(freq2):
continue
if freq1 != freq2:
if freq1 == 0:
pct_change = "Infinity"
else:
pct_change = ((freq2 - freq1) / freq1) * 100
pct_change = round(pct_change, 2)
diff = abs(freq1 - freq2)
differences.append({
id_col: entry_id,
name_col: protein_name,
"Repeat": repeat_col,
"Frequency File 1": freq1,
"Frequency File 2": freq2,
"Difference": diff,
"%age Change": pct_change
})
if differences:
result_df = pd.DataFrame(differences)
result_df = result_df.sort_values(by="Difference", ascending=False)
# Show DataFrame in Streamlit app
st.subheader("π View Changed Repeats")
st.dataframe(result_df, use_container_width=True)
# Apply styling
def color_pct(val):
if isinstance(val, str) and val == "Infinity":
return 'color: green'
elif isinstance(val, (int, float)):
if val > 0:
return 'color: green'
elif val < 0:
return 'color: red'
return ''
styled_df = result_df.style.applymap(color_pct, subset=["%age Change"])
# Save styled output
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats")
output.seek(0)
st.download_button(
label="π₯ Download Excel File",
data=output,
file_name="changed_repeats_with_percentage.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.info("No changes in repeat frequencies were found.")
except Exception as e:
st.error(f"β Error: {e}") |