jiehou commited on
Commit
9f27665
·
verified ·
1 Parent(s): d30c23c

Upload example_data_loader.py

Browse files
Files changed (1) hide show
  1. example_data_loader.py +137 -0
example_data_loader.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Example Data Loader for RNA Motif Comparison Tool
3
+ Provides functionality to load example PDB files from data folder
4
+ """
5
+
6
+ import os
7
+ from pathlib import Path
8
+ import streamlit as st
9
+
10
+ def get_example_pdbs(data_folder="data"):
11
+ """
12
+ Get list of example PDB files from the data folder.
13
+
14
+ Args:
15
+ data_folder: Path to folder containing example PDB files
16
+
17
+ Returns:
18
+ Dictionary with filename as key and full path as value
19
+ """
20
+ examples = {}
21
+
22
+ if not os.path.exists(data_folder):
23
+ return examples
24
+
25
+ # Get all PDB files in the data folder
26
+ data_path = Path(data_folder)
27
+ for pdb_file in data_path.glob("*.pdb"):
28
+ examples[pdb_file.name] = str(pdb_file)
29
+
30
+ # Also check for uppercase .PDB extension
31
+ for pdb_file in data_path.glob("*.PDB"):
32
+ examples[pdb_file.name] = str(pdb_file)
33
+
34
+ return examples
35
+
36
+
37
+ def create_example_selector(data_folder="data"):
38
+ """
39
+ Create a Streamlit interface for selecting example PDB files.
40
+
41
+ Args:
42
+ data_folder: Path to folder containing example PDB files
43
+
44
+ Returns:
45
+ List of selected file paths
46
+ """
47
+ examples = get_example_pdbs(data_folder)
48
+
49
+ if not examples:
50
+ st.warning(f"⚠️ No example PDB files found in '{data_folder}/' folder")
51
+ return []
52
+
53
+ st.info(f"📁 Found {len(examples)} example PDB files in '{data_folder}/' folder")
54
+
55
+ # Create multiselect for choosing examples
56
+ selected_names = st.multiselect(
57
+ "Select example PDB files to load",
58
+ options=sorted(examples.keys()),
59
+ help="Choose one or more example structures"
60
+ )
61
+
62
+ # Return full paths of selected files
63
+ selected_paths = [examples[name] for name in selected_names]
64
+
65
+ if selected_paths:
66
+ st.success(f"✅ Selected {len(selected_paths)} example file(s)")
67
+
68
+ return selected_paths
69
+
70
+
71
+ def load_example_as_uploaded_file(file_path):
72
+ """
73
+ Load a PDB file and convert it to a format similar to Streamlit's UploadedFile.
74
+
75
+ Args:
76
+ file_path: Path to the PDB file
77
+
78
+ Returns:
79
+ File-like object with name and getbuffer() method
80
+ """
81
+ class MockUploadedFile:
82
+ def __init__(self, path):
83
+ self.name = os.path.basename(path)
84
+ self.path = path
85
+ with open(path, 'rb') as f:
86
+ self._content = f.read()
87
+
88
+ def getbuffer(self):
89
+ return self._content
90
+
91
+ def read(self):
92
+ return self._content
93
+
94
+ return MockUploadedFile(file_path)
95
+
96
+
97
+ def get_example_info(data_folder="data"):
98
+ """
99
+ Get information about example PDB files.
100
+
101
+ Args:
102
+ data_folder: Path to folder containing example PDB files
103
+
104
+ Returns:
105
+ Dictionary with file info
106
+ """
107
+ examples = get_example_pdbs(data_folder)
108
+ info = {}
109
+
110
+ for name, path in examples.items():
111
+ try:
112
+ with open(path, 'r') as f:
113
+ lines = f.readlines()
114
+
115
+ # Count atoms/residues
116
+ atom_count = sum(1 for line in lines if line.startswith('ATOM') or line.startswith('HETATM'))
117
+
118
+ # Get header info if available
119
+ header = ""
120
+ for line in lines:
121
+ if line.startswith('HEADER'):
122
+ header = line[10:].strip()
123
+ break
124
+
125
+ info[name] = {
126
+ 'path': path,
127
+ 'atoms': atom_count,
128
+ 'header': header,
129
+ 'lines': len(lines)
130
+ }
131
+ except Exception as e:
132
+ info[name] = {
133
+ 'path': path,
134
+ 'error': str(e)
135
+ }
136
+
137
+ return info