mtyrrell commited on
Commit
1582855
·
1 Parent(s): bae4d78

updated main

Browse files
Files changed (1) hide show
  1. app/main.py +182 -0
app/main.py CHANGED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EUDR INGESTOR
2
+
3
+ import gradio as gr
4
+ import os
5
+ import logging
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from gradio_client import Client, handle_file
9
+ import pandas as pd
10
+
11
+ # Local imports
12
+ from .utils import getconfig
13
+
14
+ config = getconfig("params.cfg")
15
+
16
+ # Configure logging
17
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # WHISP API configuration
21
+ WHISP_API_URL = config.get('whisp', 'WHISP_API_URL', fallback="https://giz-chatfed-whisp.hf.space/")
22
+
23
+ def get_value(df, colname):
24
+ """Fetch value from WhispAPI-style Column/Value dataframe"""
25
+ if "Column" in df.columns and "Value" in df.columns:
26
+ match = df.loc[df["Column"] == colname, "Value"]
27
+ if not match.empty:
28
+ return match.values[0]
29
+ return "No disponible"
30
+
31
+ def format_whisp_statistics(df):
32
+ """Format WhispAPI statistics into readable text for RAG context"""
33
+ try:
34
+ # Country code mapping
35
+ country_codes = {
36
+ 'HND': 'Honduras', 'GTM': 'Guatemala', 'ECU': 'Ecuador',
37
+ 'COL': 'Colombia', 'PER': 'Peru', 'BRA': 'Brasil',
38
+ 'BOL': 'Bolivia', 'CRI': 'Costa Rica', 'PAN': 'Panamá',
39
+ 'NIC': 'Nicaragua'
40
+ }
41
+
42
+ country_raw = get_value(df, "Country")
43
+ country = country_codes.get(country_raw, country_raw)
44
+ admin_level = get_value(df, "Admin_Level_1")
45
+ area_raw = get_value(df, "Area")
46
+
47
+ # Format area
48
+ try:
49
+ area_num = float(area_raw)
50
+ if area_num < 1:
51
+ area_text = f"{area_num:.3f} hectáreas"
52
+ elif area_num < 100:
53
+ area_text = f"{area_num:.2f} hectáreas"
54
+ else:
55
+ area_text = f"{area_num:,.1f} hectáreas"
56
+ except:
57
+ area_text = str(area_raw) if area_raw != "Not available" else "No disponible"
58
+
59
+ # Risk assessments
60
+ risk_pcrop = get_value(df, "risk_pcrop")
61
+ risk_acrop = get_value(df, "risk_acrop")
62
+ risk_timber = get_value(df, "risk_timber")
63
+ def_after_2020_raw = get_value(df, "TMF_def_after_2020")
64
+ def_before_2020_raw = get_value(df, "TMF_def_before_2020")
65
+
66
+ # Format for RAG context
67
+ context = f"""=== ANÁLISIS GEOGRÁFICO WHISP API ===
68
+ País: {country}
69
+ Región administrativa: {admin_level}
70
+ Área total: {area_text}
71
+
72
+ EVALUACIÓN DE RIESGO DE DEFORESTACIÓN:
73
+ - Cultivos permanentes (Café, cacao, aceite de palma): {risk_pcrop}
74
+ - Cultivos anuales (Soja, maíz, arroz): {risk_acrop}
75
+ - Extracción de madera: {risk_timber}
76
+
77
+ DATOS DE DEFORESTACIÓN:
78
+ - Deforestación antes de 2020: {def_before_2020_raw} hectáreas
79
+ - Deforestación después de 2020: {def_after_2020_raw} hectáreas
80
+
81
+ Fuente: Forest Data Partnership (FDaP) WhispAPI
82
+ Fecha de análisis: {datetime.now().isoformat()}"""
83
+
84
+ return context
85
+
86
+ except Exception as e:
87
+ return f"Error en el análisis geográfico: {str(e)}"
88
+
89
+ def process_geojson_whisp(file_content: bytes, filename: str) -> tuple[str, dict]:
90
+ """Process GeoJSON file through WHISP API and return formatted context"""
91
+ try:
92
+ # Create temporary file for WHISP API
93
+ import tempfile
94
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.geojson') as tmp_file:
95
+ tmp_file.write(file_content)
96
+ tmp_file_path = tmp_file.name
97
+
98
+ try:
99
+ # Call WHISP API
100
+ client = Client(WHISP_API_URL)
101
+ result = client.predict(
102
+ file=handle_file(tmp_file_path),
103
+ api_name="/get_statistics"
104
+ )
105
+
106
+ # Convert result to DataFrame
107
+ df = pd.DataFrame(result['data'], columns=result['headers'])
108
+
109
+ # Format for RAG context
110
+ formatted_context = format_whisp_statistics(df)
111
+
112
+ metadata = {
113
+ "analysis_type": "whisp_geojson",
114
+ "country": get_value(df, "Country"),
115
+ "admin_level": get_value(df, "Admin_Level_1"),
116
+ "area": get_value(df, "Area"),
117
+ "risk_levels": {
118
+ "pcrop": get_value(df, "risk_pcrop"),
119
+ "acrop": get_value(df, "risk_acrop"),
120
+ "timber": get_value(df, "risk_timber")
121
+ }
122
+ }
123
+
124
+ return formatted_context, metadata
125
+
126
+ finally:
127
+ # Clean up temporary file
128
+ os.unlink(tmp_file_path)
129
+
130
+ except Exception as e:
131
+ logger.error(f"WHISP API error: {str(e)}")
132
+ raise Exception(f"Failed to process GeoJSON through WHISP API: {str(e)}")
133
+
134
+ def ingest(file):
135
+ """Main ingestion function - processes GeoJSON file and returns WHISP analysis context"""
136
+ if file is None:
137
+ return "No file uploaded", ""
138
+
139
+ try:
140
+ with open(file.name, 'rb') as f:
141
+ file_content = f.read()
142
+
143
+ filename = os.path.basename(file.name)
144
+
145
+ # Check file extension
146
+ file_extension = os.path.splitext(filename)[1].lower()
147
+ if file_extension not in ['.geojson', '.json']:
148
+ raise ValueError(f"Unsupported file type: {file_extension}. Only GeoJSON files are supported.")
149
+
150
+ # Process through WHISP API
151
+ context, metadata = process_geojson_whisp(file_content, filename)
152
+
153
+ logger.info(f"Successfully processed GeoJSON {filename} through WHISP API")
154
+
155
+ return context
156
+
157
+ except Exception as e:
158
+ logger.error(f"GeoJSON processing failed: {str(e)}")
159
+ raise Exception(f"Processing failed: {str(e)}")
160
+
161
+ if __name__ == "__main__":
162
+ ui = gr.Interface(
163
+ fn=ingest,
164
+ inputs=gr.File(
165
+ label="GeoJSON Upload",
166
+ file_types=[".geojson", ".json"]
167
+ ),
168
+ outputs=gr.Textbox(
169
+ label="WHISP Analysis Context",
170
+ lines=15,
171
+ show_copy_button=True
172
+ ),
173
+ title="EUDR Ingestion Module - WHISP API",
174
+ description="Processes GeoJSON files through WHISP API and returns geographic analysis context for RAG pipelines.",
175
+ api_name="ingest"
176
+ )
177
+
178
+ ui.launch(
179
+ server_name="0.0.0.0",
180
+ server_port=7860,
181
+ show_error=True
182
+ )