Spaces:
Sleeping
Sleeping
import openai | |
import json | |
import streamlit as st | |
from config import LLM_PROVIDERS, AVAILABLE_MODELS, get_default_provider_and_model | |
from soil_calculations import SoilCalculations | |
class LLMClient: | |
def __init__(self, model=None, api_key=None, provider=None): | |
# Get defaults if not provided | |
if not provider or not model: | |
default_provider, default_model = get_default_provider_and_model() | |
self.provider = provider or default_provider | |
self.model = model or default_model | |
else: | |
self.provider = provider | |
self.model = model | |
self.api_key = api_key | |
# Only create client if we have API key and provider | |
if not self.api_key or not self.provider: | |
self.client = None | |
self.calculator = SoilCalculations() | |
return | |
# Get provider configuration | |
provider_config = LLM_PROVIDERS.get(self.provider, {}) | |
base_url = provider_config.get("base_url", "https://openrouter.ai/api/v1") | |
self.client = openai.OpenAI( | |
base_url=base_url, | |
api_key=self.api_key, | |
) | |
self.calculator = SoilCalculations() | |
def _supports_images(self) -> bool: | |
"""Check if the current model supports image inputs""" | |
model_info = AVAILABLE_MODELS.get(self.model, {}) | |
return model_info.get('supports_images', False) | |
def analyze_soil_boring_log(self, text_content=None, image_base64=None): | |
"""Analyze soil boring log using LLM""" | |
# Standardize units in text content before analysis | |
if text_content: | |
text_content, unit_conversions = self.calculator.standardize_units(text_content) | |
if unit_conversions: | |
st.info(f"π Converted units: {', '.join([f'{k}β{v}' for k, v in unit_conversions.items()])}") | |
system_prompt = """You are an expert geotechnical engineer specializing in soil boring log interpretation. | |
IMPORTANT: You must respond with ONLY valid JSON data. Do not include any text before or after the JSON. | |
SAMPLE TYPE IDENTIFICATION (CRITICAL - FOLLOW EXACT ORDER): | |
**STEP 1 - FIRST COLUMN STRATIFICATION SYMBOLS (ABSOLUTE HIGHEST PRIORITY):** | |
ALWAYS look at the FIRST COLUMN of each layer for stratification symbols: | |
- **SS-1, SS-2, SS-18, SS18, SS-5** β SS (Split Spoon) sample | |
- **ST-1, ST-2, ST-5, ST5, ST-12** β ST (Shelby Tube) sample | |
- **SS1, SS2, SS3** (without dash) β SS sample | |
- **ST1, ST2, ST3** (without dash) β ST sample | |
- **Look for pattern: [SS|ST][-]?[0-9]+** in first column | |
**EXAMPLES of First Column Recognition:** | |
``` | |
SS-18 | Brown clay, N=8 β sample_type="SS" (SS-18 in first column) | |
ST-5 | Gray clay, Su=45 kPa β sample_type="ST" (ST-5 in first column) | |
SS12 | Sandy clay, SPT test β sample_type="SS" (SS12 in first column) | |
ST3 | Soft clay, unconfined β sample_type="ST" (ST3 in first column) | |
``` | |
**STEP 2 - If NO first column symbols, then check description keywords:** | |
- SS indicators: "split spoon", "SPT", "standard penetration", "disturbed" | |
- ST indicators: "shelby", "tube", "undisturbed", "UT", "unconfined compression" | |
**STEP 3 - If still unclear, use strength parameter type:** | |
- SPT-N values present β likely SS sample | |
- Su values from unconfined test β likely ST sample | |
CRITICAL SOIL CLASSIFICATION RULES (MANDATORY): | |
**SAND LAYER CLASSIFICATION REQUIREMENTS:** | |
1. **Sand layers MUST have sieve analysis evidence** - Look for: | |
- "Sieve #200: X% passing" or "#200 passing: X%" | |
- "Fines content: X%" (same as sieve #200) | |
- "Particle size analysis" or "gradation test" | |
- "% passing 0.075mm" (equivalent to #200 sieve) | |
2. **Classification Rules**: | |
- Sieve #200 >50% passing β CLAY (fine-grained) | |
- Sieve #200 <50% passing β SAND/GRAVEL (coarse-grained) | |
3. **NO SIEVE ANALYSIS = ASSUME CLAY (MANDATORY)**: | |
- If no sieve analysis data found β ALWAYS classify as CLAY | |
- Include note: "Assumed clay - no sieve analysis data available" | |
- Set sieve_200_passing: null (not a number) | |
**CRITICAL**: Never classify as sand/silt without explicit sieve analysis evidence | |
**CRITICAL**: Always look for sieve #200 data before classifying as sand | |
CRITICAL SS/ST SAMPLE RULES (MUST FOLLOW): | |
FOR SS (Split Spoon) SAMPLES: | |
1. ALWAYS use RAW N-VALUE (not N-corrected, N-correction, or adjusted N) | |
2. Look for: "N = 15", "SPT-N = 8", "raw N = 20", "field N = 12" | |
3. IGNORE: "N-corrected = 25", "N-correction = 18", "adjusted N = 30" | |
4. For clay: Use SPT-N parameter (will be converted to Su using Su=5*N) | |
5. For sand/silt: Use SPT-N parameter (will be converted to friction angle) | |
6. NEVER use unconfined compression Su values for SS samples - ONLY use N values | |
FOR ST (Shelby Tube) SAMPLES: | |
1. ALWAYS USE DIRECT Su values from unconfined compression test | |
2. If ST sample has Su value (e.g., "Su = 25 kPa"), use that EXACT value | |
3. NEVER convert SPT-N to Su for ST samples when direct Su is available | |
4. Priority: Direct Su measurement > any other value | |
EXTRACTION PRIORITY FOR SS SAMPLES: | |
1. Raw N, Field N, Measured N (highest priority) | |
2. N-value without "corrected" or "correction" terms | |
3. General SPT-N value (lowest priority) | |
4. NEVER use Su from unconfined compression for SS samples | |
CRITICAL UNIT CONVERSION REQUIREMENTS (MUST APPLY): | |
**MANDATORY SU UNIT CONVERSION - READ FROM IMAGE/FILE:** | |
When extracting Su values from images or text, you MUST convert to kPa BEFORE using the value: | |
1. **ksc or kg/cmΒ²**: Su_kPa = Su_ksc Γ 98.0 | |
Example: "Su = 2.5 ksc" β strength_value: 245 (not 2.5) | |
2. **t/mΒ² (tonnes/mΒ²)**: Su_kPa = Su_tonnes Γ 9.81 | |
Example: "Su = 3.0 t/mΒ²" β strength_value: 29.43 (not 3.0) | |
3. **psi**: Su_kPa = Su_psi Γ 6.895 | |
Example: "Su = 50 psi" β strength_value: 344.75 (not 50) | |
4. **psf**: Su_kPa = Su_psf Γ 0.048 | |
Example: "Su = 1000 psf" β strength_value: 48 (not 1000) | |
5. **kPa**: Use directly (no conversion needed) | |
Example: "Su = 75 kPa" β strength_value: 75 | |
6. **MPa**: Su_kPa = Su_MPa Γ 1000 | |
Example: "Su = 0.1 MPa" β strength_value: 100 (not 0.1) | |
**IMPORTANT**: Always include original unit in description for verification | |
**SPT-N values**: Keep as-is (no unit conversion needed) | |
CRITICAL SU-WATER CONTENT VALIDATION (MANDATORY): | |
**EXTRACT WATER CONTENT WHEN AVAILABLE:** | |
Always extract water content (w%) when mentioned in the description: | |
- \"water content = 25%\" β water_content: 25 | |
- \"w = 30%\" β water_content: 30 | |
- \"moisture content 35%\" β water_content: 35 | |
**VALIDATE SU-WATER CONTENT CORRELATION:** | |
For clay layers, Su and water content should correlate reasonably: | |
- Very soft clay: Su < 25 kPa, w% > 40% | |
- Soft clay: Su 25-50 kPa, w% 30-40% | |
- Medium clay: Su 50-100 kPa, w% 20-30% | |
- Stiff clay: Su 100-200 kPa, w% 15-25% | |
- Very stiff clay: Su 200-400 kPa, w% 10-20% | |
- Hard clay: Su > 400 kPa, w% < 15% | |
**CRITICAL UNIT CHECK SCENARIOS:** | |
- If Su > 1000 kPa with w% > 20%: CHECK if Su is in wrong units (psi, psf?) | |
- If Su < 5 kPa with w% < 15%: CHECK if Su is in wrong units (MPa, bar?) | |
- If correlation seems very off: VERIFY unit conversion was applied correctly | |
CRITICAL OUTPUT FORMAT (MANDATORY): | |
You MUST respond with ONLY a valid JSON object. Do not include: | |
- Explanatory text before or after the JSON | |
- Markdown formatting (```json ```) | |
- Comments or notes | |
- Multiple JSON objects | |
Start your response directly with { and end with } | |
LAYER GROUPING REQUIREMENTS: | |
1. MAXIMUM 7 LAYERS TOTAL - Group similar adjacent layers to achieve this limit | |
2. CLAY AND SAND MUST BE SEPARATE - Never combine clay layers with sand layers | |
3. Group adjacent layers with similar properties (same soil type and similar consistency) | |
4. Prioritize engineering significance over minor variations | |
Analyze the provided soil boring log and extract the following information in this exact JSON format: | |
{ | |
"project_info": { | |
"project_name": "string", | |
"boring_id": "string", | |
"location": "string", | |
"date": "string", | |
"depth_total": 10.0 | |
}, | |
"soil_layers": [ | |
{ | |
"layer_id": 1, | |
"depth_from": 0.0, | |
"depth_to": 2.5, | |
"soil_type": "clay", | |
"description": "Brown silty clay, ST sample, Su = 25 kPa", | |
"sample_type": "ST", | |
"strength_parameter": "Su", | |
"strength_value": 25, | |
"sieve_200_passing": 65, | |
"water_content": 35.5, | |
"color": "brown", | |
"moisture": "moist", | |
"consistency": "soft", | |
"su_source": "Unconfined Compression Test" | |
} | |
], | |
"water_table": { | |
"depth": 3.0, | |
"date_encountered": "2024-01-01" | |
}, | |
"notes": "Additional observations" | |
} | |
EXAMPLES OF CORRECT PROCESSING WITH UNIT CONVERSION AND SOIL CLASSIFICATION: | |
**SS SAMPLE EXAMPLES:** | |
1. "SS-18: Clay layer, N = 8, Su = 45 kPa from unconfined test" | |
β Use: sample_type="SS", strength_parameter="SPT-N", strength_value=8 | |
β IGNORE the Su=45 kPa value for SS samples | |
2. "SS18: Soft clay, field N = 6, N-corrected = 10" | |
β Use: sample_type="SS", strength_parameter="SPT-N", strength_value=6 (raw N) | |
β IGNORE N-corrected value | |
**ST SAMPLE EXAMPLES WITH UNIT CONVERSION:** | |
1. "ST-5: Stiff clay, Su = 85 kPa from unconfined compression" | |
β Use: sample_type="ST", strength_parameter="Su", strength_value=85 | |
2. "ST-12: Medium clay, Su = 2.5 ksc from unconfined test" | |
β Convert: 2.5 Γ 98 = 245 kPa | |
β Use: sample_type="ST", strength_parameter="Su", strength_value=245 | |
3. "ST sample: Clay, unconfined strength = 3.0 t/mΒ²" | |
β Convert: 3.0 Γ 9.81 = 29.43 kPa | |
β Use: sample_type="ST", strength_parameter="Su", strength_value=29.43 | |
**SOIL CLASSIFICATION EXAMPLES:** | |
1. "Brown silty clay, no sieve analysis data" | |
β soil_type="clay", sieve_200_passing=null | |
β Note: "Assumed clay - no sieve analysis data available" | |
2. "Sandy clay, sieve #200: 75% passing" | |
β soil_type="clay", sieve_200_passing=75 | |
β Classification: Clay (>50% passing) | |
3. "Medium sand, gradation test shows 25% passing #200" | |
β soil_type="sand", sieve_200_passing=25 | |
β Classification: Sand (<50% passing) | |
4. "Dense sand layer" (NO sieve data mentioned) | |
β soil_type="clay", sieve_200_passing=null | |
β Note: "Assumed clay - no sieve analysis data available" | |
β NEVER classify as sand without sieve data | |
CRITICAL LAYER GROUPING RULES: | |
1. MAXIMUM 7 LAYERS - If you identify more than 7 distinct zones, group adjacent similar layers | |
2. SEPARATE CLAY/SAND - Never group clay with sand, silt, or gravel layers | |
3. Group similar adjacent layers: | |
- Combine "soft clay" + "soft clay" into one "soft clay" layer | |
- Combine "medium sand" + "medium sand" into one "medium sand" layer | |
- Combine layers with similar strength values (within 30% difference) | |
4. Maintain engineering significance: | |
- Keep layers with significantly different strength parameters separate | |
- Preserve important transitions (e.g., clay to sand interface) | |
- Maintain water table interfaces as layer boundaries when significant | |
TECHNICAL RULES: | |
1. All numeric values must be numbers, not strings | |
2. For soil_type, use basic terms: "clay", "sand", "silt", "gravel" - do NOT include consistency | |
3. Include sample_type field: "SS" (Split Spoon) or "ST" (Shelby Tube) | |
4. Include sieve_200_passing field when available (percentage passing sieve #200) | |
5. Include water_content field when available (percentage water content for clay consistency checks) | |
6. Include su_source field: "Unconfined Compression Test" for direct measurements, or "Calculated from SPT-N" for conversions | |
7. Strength parameters: | |
- SS samples: ALWAYS use "SPT-N" with RAW N-value (will be converted based on soil type) | |
- ST samples with clay: Use "Su" with DIRECT value in kPa from unconfined compression test | |
- For sand/gravel: Always use "SPT-N" with N-value | |
- NEVER use Su for SS samples, NEVER calculate Su from SPT-N for ST samples that have direct Su | |
8. Put consistency separately in "consistency" field: "soft", "medium", "stiff", "loose", "dense", etc. | |
9. Ensure continuous depths (no gaps or overlaps) | |
10. All depths in meters, strength values as numbers | |
11. Return ONLY the JSON object, no additional text | |
GROUPING EXAMPLES: | |
- Original: [0-2m soft clay, 2-4m soft clay, 4-6m medium sand, 6-8m medium sand] | |
- Grouped: [0-4m soft clay, 4-8m medium sand] (4 layers reduced to 2) | |
STRENGTH PARAMETER EXAMPLES: | |
- SS sample: "Clay, N = 8 blows, Su = 40 kPa unconfined" β Use SPT-N = 8 (IGNORE Su for SS) | |
- ST sample: "Clay, Su = 45 kPa from unconfined test" β Use Su = 45 (DIRECT measurement) | |
- SS sample: "Clay, field N = 12, N-corrected = 18" β Use SPT-N = 12 (raw N, IGNORE corrected)""" | |
messages = [{"role": "system", "content": system_prompt}] | |
# Check if model supports images | |
supports_images = self._supports_images() | |
if text_content: | |
messages.append({ | |
"role": "user", | |
"content": f"Please analyze this soil boring log text:\n\n{text_content}" | |
}) | |
if image_base64 and supports_images: | |
messages.append({ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": "Please analyze this soil boring log image:" | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/png;base64,{image_base64}" | |
} | |
} | |
] | |
}) | |
elif image_base64 and not supports_images: | |
# Model doesn't support images, notify user and continue with text-only | |
model_name = AVAILABLE_MODELS.get(self.model, {}).get('name', self.model) | |
st.warning(f"β οΈ {model_name} doesn't support image analysis. Using text content only.") | |
if not text_content: | |
st.error("β No text content available for analysis. Please ensure your document has extractable text or use a model that supports images.") | |
return {"error": "No text content available and model doesn't support images"} | |
try: | |
response = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=2000, | |
temperature=0.1 | |
) | |
content = response.choices[0].message.content | |
# Try to extract JSON from response | |
try: | |
# Try different JSON extraction methods | |
json_str = content.strip() | |
# Remove markdown code blocks if present | |
if "```json" in json_str: | |
json_start = json_str.find("```json") + 7 | |
json_end = json_str.find("```", json_start) | |
json_str = json_str[json_start:json_end].strip() | |
elif "```" in json_str: | |
# Remove any code blocks | |
json_start = json_str.find("```") + 3 | |
json_end = json_str.rfind("```") | |
if json_end > json_start: | |
json_str = json_str[json_start:json_end].strip() | |
# Find JSON object boundaries | |
if not json_str.startswith("{"): | |
start_idx = json_str.find("{") | |
if start_idx != -1: | |
json_str = json_str[start_idx:] | |
if not json_str.endswith("}"): | |
end_idx = json_str.rfind("}") | |
if end_idx != -1: | |
json_str = json_str[:end_idx + 1] | |
# Parse JSON | |
result = json.loads(json_str) | |
# Validate required structure | |
if "soil_layers" not in result: | |
result["soil_layers"] = [] | |
if "project_info" not in result: | |
result["project_info"] = {} | |
# Validate and enhance soil classification | |
result = self.calculator.validate_soil_classification(result) | |
# Enhance layers with calculated parameters | |
if result["soil_layers"]: | |
result["soil_layers"] = self.calculator.enhance_soil_layers(result["soil_layers"]) | |
# Process with SS/ST classification | |
result = self.calculator.process_with_ss_st_classification(result) | |
# Enforce 7-layer limit and clay/sand separation | |
result["soil_layers"] = self._enforce_layer_grouping_rules(result["soil_layers"]) | |
return result | |
except json.JSONDecodeError as e: | |
st.error(f"Failed to parse LLM response as JSON: {str(e)}") | |
# Try to create a basic structure from the response | |
return self._fallback_parse(content) | |
except Exception as e: | |
error_msg = str(e) | |
# Check for model availability error | |
if "not a valid model ID" in error_msg: | |
st.error(f"β Model '{self.model}' is not available on OpenRouter") | |
st.info("π‘ Try switching to a different model in the sidebar (Claude-3.5 Sonnet or GPT-4 Turbo are recommended)") | |
return {"error": f"Model not available: {self.model}"} | |
else: | |
st.error(f"Error calling LLM API: {error_msg}") | |
return {"error": error_msg} | |
def _fallback_parse(self, content): | |
"""Fallback parser when JSON parsing fails""" | |
try: | |
import re | |
# Try to extract basic information using regex | |
layers = [] | |
# Look for depth patterns like "0-2m", "2-5m", etc. | |
depth_pattern = r'(\d+(?:\.\d+)?)\s*-\s*(\d+(?:\.\d+)?)m?\s*[:|]?\s*([^,\n]+)' | |
matches = re.findall(depth_pattern, content, re.IGNORECASE) | |
for i, match in enumerate(matches): | |
depth_from = float(match[0]) | |
depth_to = float(match[1]) | |
description = match[2].strip() | |
# Extract soil type from description | |
soil_type = "unknown" | |
if "clay" in description.lower(): | |
if "soft" in description.lower(): | |
soil_type = "soft clay" | |
elif "stiff" in description.lower(): | |
soil_type = "stiff clay" | |
else: | |
soil_type = "medium clay" | |
elif "sand" in description.lower(): | |
if "loose" in description.lower(): | |
soil_type = "loose sand" | |
elif "dense" in description.lower(): | |
soil_type = "dense sand" | |
else: | |
soil_type = "medium dense sand" | |
layers.append({ | |
"layer_id": i + 1, | |
"depth_from": depth_from, | |
"depth_to": depth_to, | |
"soil_type": soil_type, | |
"description": description, | |
"strength_parameter": "Su" if "clay" in soil_type else "SPT-N", | |
"strength_value": 50, # Default value | |
"color": "unknown", | |
"moisture": "unknown", | |
"consistency": "unknown" | |
}) | |
return { | |
"project_info": { | |
"project_name": "Unknown", | |
"boring_id": "Unknown", | |
"location": "Unknown", | |
"date": "Unknown", | |
"depth_total": max([layer["depth_to"] for layer in layers]) if layers else 0 | |
}, | |
"soil_layers": layers, | |
"water_table": {"depth": None, "date_encountered": None}, | |
"notes": "Parsed using fallback method - original response: " + content[:200] + "..." | |
} | |
except Exception as e: | |
return {"error": f"Fallback parsing failed: {str(e)}", "raw_response": content} | |
def _enforce_layer_grouping_rules(self, layers): | |
"""Enforce 7-layer maximum and clay/sand separation rules""" | |
if not layers or len(layers) <= 7: | |
return layers | |
st.info(f"π Grouping layers: {len(layers)} layers found, grouping to meet 7-layer limit") | |
# Group similar adjacent layers to reduce count to 7 or fewer | |
grouped_layers = [] | |
i = 0 | |
while i < len(layers) and len(grouped_layers) < 7: | |
current_layer = layers[i].copy() | |
# Check if we can group with next layers | |
if i < len(layers) - 1 and len(grouped_layers) < 6: # Leave room for at least one more layer | |
next_layer = layers[i + 1] | |
# Group if same soil type and similar consistency (but never clay with sand) | |
can_group = ( | |
current_layer.get('soil_type') == next_layer.get('soil_type') and | |
current_layer.get('consistency') == next_layer.get('consistency') and | |
not (current_layer.get('soil_type') == 'clay' and next_layer.get('soil_type') == 'sand') and | |
not (current_layer.get('soil_type') == 'sand' and next_layer.get('soil_type') == 'clay') | |
) | |
if can_group: | |
# Merge the layers | |
current_layer['depth_to'] = next_layer.get('depth_to', current_layer['depth_to']) | |
current_layer['description'] = f"Grouped: {current_layer.get('description', '')} + {next_layer.get('description', '')}" | |
# Average strength values | |
curr_strength = current_layer.get('strength_value', 0) or 0 | |
next_strength = next_layer.get('strength_value', 0) or 0 | |
if curr_strength and next_strength: | |
current_layer['strength_value'] = (curr_strength + next_strength) / 2 | |
elif next_strength: | |
current_layer['strength_value'] = next_strength | |
# Skip next layer since it's been merged | |
i += 2 | |
else: | |
i += 1 | |
else: | |
i += 1 | |
grouped_layers.append(current_layer) | |
# If still too many layers, group remaining similar layers into existing ones | |
if i < len(layers): | |
for remaining_layer in layers[i:]: | |
# Find a compatible layer to merge with | |
merged = False | |
for existing_layer in grouped_layers: | |
if (existing_layer.get('soil_type') == remaining_layer.get('soil_type') and | |
existing_layer.get('consistency') == remaining_layer.get('consistency')): | |
existing_layer['depth_to'] = max(existing_layer['depth_to'], remaining_layer.get('depth_to', 0)) | |
existing_layer['description'] += f" + {remaining_layer.get('description', '')}" | |
merged = True | |
break | |
if not merged and len(grouped_layers) < 7: | |
grouped_layers.append(remaining_layer) | |
# Update layer IDs | |
for idx, layer in enumerate(grouped_layers): | |
layer['layer_id'] = idx + 1 | |
# Add note about grouping | |
if len(grouped_layers) < len(layers): | |
st.success(f"β Grouped {len(layers)} layers into {len(grouped_layers)} layers (7-layer limit)") | |
return grouped_layers[:7] # Ensure maximum 7 layers | |
def refine_soil_layers(self, soil_data, user_feedback): | |
"""Refine soil layer interpretation based on user feedback""" | |
system_prompt = """You are an expert geotechnical engineer. The user has provided feedback on the initial soil boring log analysis. | |
Please refine the soil layer interpretation based on their input and return the updated JSON in the same format.""" | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": f"Original analysis: {json.dumps(soil_data, indent=2)}"}, | |
{"role": "user", "content": f"User feedback: {user_feedback}"} | |
] | |
try: | |
response = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=2000, | |
temperature=0.1 | |
) | |
content = response.choices[0].message.content | |
try: | |
if "```json" in content: | |
json_start = content.find("```json") + 7 | |
json_end = content.find("```", json_start) | |
json_str = content[json_start:json_end].strip() | |
else: | |
json_str = content | |
return json.loads(json_str) | |
except json.JSONDecodeError: | |
return {"error": "Invalid JSON response", "raw_response": content} | |
except Exception as e: | |
return {"error": str(e)} |