Spaces:
Sleeping
Sleeping
devjas1
commited on
Commit
·
2a2cf15
1
Parent(s):
81ec5ec
(FEAT)[Add validate_spectrum_modality Utility]:
Browse files- New function to check if spectrum data matches selected modality (raman/ftir).
- Validates wavenumber ranges, coverage, and typical modality properties.
- Returns validation status and details of any mismatches.
- utils/preprocessing.py +63 -0
utils/preprocessing.py
CHANGED
|
@@ -113,6 +113,69 @@ def validate_spectrum_range(x: np.ndarray, modality: str = "raman") -> bool:
|
|
| 113 |
return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
|
| 114 |
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
def preprocess_spectrum(
|
| 117 |
x: np.ndarray,
|
| 118 |
y: np.ndarray,
|
|
|
|
| 113 |
return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
|
| 114 |
|
| 115 |
|
| 116 |
+
def validate_spectrum_modality(
|
| 117 |
+
x_data: np.ndarray, y_data: np.ndarray, selected_modality: str
|
| 118 |
+
) -> Tuple[bool, list[str]]:
|
| 119 |
+
"""
|
| 120 |
+
Validate that spectrum characteristics match the selected modality.
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
x_data: Wavenumber array (cm⁻¹)
|
| 124 |
+
y_data: Intensity array
|
| 125 |
+
selected_modality: Selected modality ('raman' or 'ftir')
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
Tuple of (is_valid, list_of_issues)
|
| 129 |
+
"""
|
| 130 |
+
x_data = np.asarray(x_data)
|
| 131 |
+
y_data = np.asarray(y_data)
|
| 132 |
+
issues = []
|
| 133 |
+
|
| 134 |
+
if selected_modality not in MODALITY_RANGES:
|
| 135 |
+
issues.append(f"Unknown modality: {selected_modality}")
|
| 136 |
+
return False, issues
|
| 137 |
+
|
| 138 |
+
expected_min, expected_max = MODALITY_RANGES[selected_modality]
|
| 139 |
+
actual_min, actual_max = np.min(x_data), np.max(x_data)
|
| 140 |
+
|
| 141 |
+
# Check wavenumber range
|
| 142 |
+
if actual_min < expected_min * 0.8: # Allow 20% tolerance
|
| 143 |
+
issues.append(
|
| 144 |
+
f"Minimum wavenumber ({actual_min:.0f} cm⁻¹) is below typical {selected_modality.upper()} range (>{expected_min} cm⁻¹)"
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
if actual_max > expected_max * 1.2: # Allow 20% tolerance
|
| 148 |
+
issues.append(
|
| 149 |
+
f"Maximum wavenumber ({actual_max:.0f} cm⁻¹) is above typical {selected_modality.upper()} range (<{expected_max} cm⁻¹)"
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
# Check for reasonable data range coverage
|
| 153 |
+
data_range = actual_max - actual_min
|
| 154 |
+
expected_range = expected_max - expected_min
|
| 155 |
+
if data_range < expected_range * 0.3: # Should cover at least 30% of expected range
|
| 156 |
+
issues.append(
|
| 157 |
+
f"Data range ({data_range:.0f} cm⁻¹) seems narrow for {selected_modality.upper()} spectroscopy"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# FTIR-specific checks
|
| 161 |
+
if selected_modality == "ftir":
|
| 162 |
+
# Check for typical FTIR characteristics
|
| 163 |
+
if actual_min > 1000: # FTIR usually includes fingerprint region
|
| 164 |
+
issues.append(
|
| 165 |
+
"FTIR data should typically include fingerprint region (400-1500 cm⁻¹)"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Raman-specific checks
|
| 169 |
+
if selected_modality == "raman":
|
| 170 |
+
# Check for typical Raman characteristics
|
| 171 |
+
if actual_max < 1000: # Raman usually extends to higher wavenumbers
|
| 172 |
+
issues.append(
|
| 173 |
+
"Raman data typically extends to higher wavenumbers (>1000 cm⁻¹)"
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
return len(issues) == 0, issues
|
| 177 |
+
|
| 178 |
+
|
| 179 |
def preprocess_spectrum(
|
| 180 |
x: np.ndarray,
|
| 181 |
y: np.ndarray,
|