Jayesh13 commited on
Commit
7d03065
·
verified ·
1 Parent(s): ef8ec9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -20
app.py CHANGED
@@ -54,32 +54,31 @@ def process_csv(file):
54
 
55
  return homorepeats, sequence_data
56
 
57
- # Function to generate and download Excel workbook with file names as separators
 
 
 
 
 
58
  def create_excel(sequences_data, homorepeats, filenames):
59
  output = BytesIO()
60
  workbook = xlsxwriter.Workbook(output, {'in_memory': True})
61
- worksheet = workbook.add_worksheet()
62
-
63
- row = 0
64
 
65
- # Iterate through sequences data grouped by filenames
66
  for file_index, file_data in enumerate(sequences_data):
67
  filename = filenames[file_index]
68
-
69
- # Write filename as a separator row
70
- worksheet.write(row, 0, f"File: {filename}")
71
- row += 1
72
 
73
  # Write the header for the current file
74
- worksheet.write(row, 0, "Entry ID")
75
- worksheet.write(row, 1, "Protein Name")
76
  col = 2
77
  for repeat in sorted(homorepeats):
78
- worksheet.write(row, col, repeat)
79
  col += 1
80
- row += 1
81
 
82
  # Write data for each sequence in the current file
 
83
  for entry_id, protein_name, freq in file_data:
84
  worksheet.write(row, 0, entry_id)
85
  worksheet.write(row, 1, protein_name)
@@ -89,9 +88,6 @@ def create_excel(sequences_data, homorepeats, filenames):
89
  col += 1
90
  row += 1
91
 
92
- # Add an empty row as a separator between files
93
- row += 1
94
-
95
  workbook.close()
96
  output.seek(0)
97
  return output
@@ -99,8 +95,8 @@ def create_excel(sequences_data, homorepeats, filenames):
99
  # Streamlit UI components
100
  st.title("Protein Homorepeat Analysis")
101
 
102
- # Step 1: Upload CSV Files
103
- uploaded_files = st.file_uploader("Upload CSV files", accept_multiple_files=True, type=["csv"])
104
 
105
  # Step 2: Process files and display results
106
  if uploaded_files:
@@ -109,7 +105,8 @@ if uploaded_files:
109
  filenames = []
110
 
111
  for file in uploaded_files:
112
- homorepeats, sequence_data = process_csv(file)
 
113
  if homorepeats is not None:
114
  all_homorepeats.update(homorepeats)
115
  all_sequences_data.append(sequence_data)
@@ -141,4 +138,21 @@ if uploaded_files:
141
  rows.append(row)
142
 
143
  result_df = pd.DataFrame(rows)
144
- st.dataframe(result_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  return homorepeats, sequence_data
56
 
57
+ import pandas as pd
58
+ import streamlit as st
59
+ from io import BytesIO
60
+ import xlsxwriter
61
+
62
+ # Function to generate and download Excel workbook with separate sheets for each input file
63
  def create_excel(sequences_data, homorepeats, filenames):
64
  output = BytesIO()
65
  workbook = xlsxwriter.Workbook(output, {'in_memory': True})
 
 
 
66
 
67
+ # Iterate through sequences data grouped by filenames and create separate sheets
68
  for file_index, file_data in enumerate(sequences_data):
69
  filename = filenames[file_index]
70
+ worksheet = workbook.add_worksheet(filename[:31]) # Limit sheet name to 31 characters
 
 
 
71
 
72
  # Write the header for the current file
73
+ worksheet.write(0, 0, "Entry ID")
74
+ worksheet.write(0, 1, "Protein Name")
75
  col = 2
76
  for repeat in sorted(homorepeats):
77
+ worksheet.write(0, col, repeat)
78
  col += 1
 
79
 
80
  # Write data for each sequence in the current file
81
+ row = 1
82
  for entry_id, protein_name, freq in file_data:
83
  worksheet.write(row, 0, entry_id)
84
  worksheet.write(row, 1, protein_name)
 
88
  col += 1
89
  row += 1
90
 
 
 
 
91
  workbook.close()
92
  output.seek(0)
93
  return output
 
95
  # Streamlit UI components
96
  st.title("Protein Homorepeat Analysis")
97
 
98
+ # Step 1: Upload Excel Files
99
+ uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
100
 
101
  # Step 2: Process files and display results
102
  if uploaded_files:
 
105
  filenames = []
106
 
107
  for file in uploaded_files:
108
+ excel_data = pd.ExcelFile(file)
109
+ homorepeats, sequence_data = process_excel(excel_data) # Modify your process_csv function to process_excel
110
  if homorepeats is not None:
111
  all_homorepeats.update(homorepeats)
112
  all_sequences_data.append(sequence_data)
 
138
  rows.append(row)
139
 
140
  result_df = pd.DataFrame(rows)
141
+ st.dataframe(result_df)
142
+
143
+ # Function to process the Excel file
144
+ def process_excel(excel_data):
145
+ # Custom logic to process each sheet within the Excel file
146
+ homorepeats = set()
147
+ sequence_data = []
148
+
149
+ for sheet_name in excel_data.sheet_names:
150
+ df = excel_data.parse(sheet_name)
151
+ for index, row in df.iterrows():
152
+ entry_id = row['Entry ID']
153
+ protein_name = row['Protein Name']
154
+ freq = {repeat: row[repeat] for repeat in df.columns[2:]} # Assuming repeats start from 3rd column
155
+ sequence_data.append((entry_id, protein_name, freq))
156
+ homorepeats.update(freq.keys())
157
+
158
+ return homorepeats, sequence_data