Pulastya B commited on
Commit
7af9e82
·
1 Parent(s): 6c9c47f

Add HuggingFace storage integration - users can now persist datasets, models, and plots to their own HuggingFace account

Browse files
FILE_STORAGE_GUIDE.md ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File Storage Architecture - Implementation Guide
2
+
3
+ ## Overview
4
+
5
+ This document outlines the complete file storage architecture for persisting user files (plots, CSVs, reports, models) across sessions.
6
+
7
+ ## Architecture
8
+
9
+ ```
10
+ ┌─────────────────────────────────────────────────────────────────────────┐
11
+ │ STORAGE ARCHITECTURE │
12
+ ├─────────────────────────────────────────────────────────────────────────┤
13
+ │ │
14
+ │ Frontend (React) │
15
+ │ ┌─────────────────────────────────────────────────────────────────┐ │
16
+ │ │ • PlotRenderer.tsx - Renders Plotly charts from JSON │ │
17
+ │ │ • Assets panel - Shows user files from Supabase │ │
18
+ │ │ • Download buttons - Uses presigned R2 URLs │ │
19
+ │ └─────────────────────────────────────────────────────────────────┘ │
20
+ │ │ │
21
+ │ ▼ │
22
+ │ Backend (FastAPI) │
23
+ │ ┌─────────────────────────────────────────────────────────────────┐ │
24
+ │ │ /api/files - List user files │ │
25
+ │ │ /api/files/{id} - Get file with download URL │ │
26
+ │ │ /api/files/stats/{user_id} - Storage statistics │ │
27
+ │ └─────────────────────────────────────────────────────────────────┘ │
28
+ │ │ │
29
+ │ ┌───────────────┴───────────────┐ │
30
+ │ ▼ ▼ │
31
+ │ Supabase (Metadata) Cloudflare R2 (Files) │
32
+ │ ┌─────────────────┐ ┌─────────────────────┐ │
33
+ │ │ user_files │ │ /users/{user_id}/ │ │
34
+ │ │ - id │ ──────────► │ /plots/*.json.gz │ │
35
+ │ │ - user_id │ │ /data/*.csv.gz │ │
36
+ │ │ - r2_key │ │ /reports/*.html │ │
37
+ │ │ - expires_at │ │ /models/*.pkl.gz │ │
38
+ │ └─────────────────┘ └─────────────────────┘ │
39
+ │ │
40
+ └─────────────────────────────────────────────────────────────────────────┘
41
+ ```
42
+
43
+ ## Setup Steps
44
+
45
+ ### 1. Cloudflare R2 Setup
46
+
47
+ 1. Go to [Cloudflare Dashboard](https://dash.cloudflare.com)
48
+ 2. Navigate to R2 → Create Bucket → Name it `ds-agent-files`
49
+ 3. Go to R2 → Manage R2 API Tokens → Create API Token
50
+ 4. Note down:
51
+ - Account ID (from URL or overview page)
52
+ - Access Key ID
53
+ - Secret Access Key
54
+
55
+ ### 2. Environment Variables
56
+
57
+ Add to your `.env` file:
58
+
59
+ ```bash
60
+ # Cloudflare R2
61
+ R2_ACCOUNT_ID=your_account_id
62
+ R2_ACCESS_KEY_ID=your_access_key
63
+ R2_SECRET_ACCESS_KEY=your_secret_key
64
+ R2_BUCKET_NAME=ds-agent-files
65
+ R2_PUBLIC_URL= # Optional: custom domain
66
+
67
+ # Supabase (existing)
68
+ SUPABASE_URL=your_supabase_url
69
+ SUPABASE_SERVICE_KEY=your_service_key
70
+ ```
71
+
72
+ ### 3. Supabase Table
73
+
74
+ Run this SQL in Supabase SQL Editor:
75
+
76
+ ```sql
77
+ CREATE TABLE user_files (
78
+ id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
79
+ user_id UUID REFERENCES auth.users(id) ON DELETE CASCADE,
80
+ session_id TEXT,
81
+ file_type TEXT NOT NULL CHECK (file_type IN ('plot', 'csv', 'report', 'model')),
82
+ file_name TEXT NOT NULL,
83
+ r2_key TEXT NOT NULL UNIQUE,
84
+ size_bytes BIGINT,
85
+ mime_type TEXT,
86
+ metadata JSONB DEFAULT '{}',
87
+ created_at TIMESTAMPTZ DEFAULT NOW(),
88
+ expires_at TIMESTAMPTZ DEFAULT (NOW() + INTERVAL '7 days'),
89
+ is_deleted BOOLEAN DEFAULT FALSE
90
+ );
91
+
92
+ -- Indexes
93
+ CREATE INDEX idx_user_files_user_id ON user_files(user_id);
94
+ CREATE INDEX idx_user_files_session ON user_files(session_id);
95
+ CREATE INDEX idx_user_files_expires ON user_files(expires_at) WHERE NOT is_deleted;
96
+
97
+ -- RLS Policies
98
+ ALTER TABLE user_files ENABLE ROW LEVEL SECURITY;
99
+
100
+ CREATE POLICY "Users can view own files" ON user_files
101
+ FOR SELECT USING (auth.uid() = user_id);
102
+
103
+ CREATE POLICY "Users can insert own files" ON user_files
104
+ FOR INSERT WITH CHECK (auth.uid() = user_id);
105
+
106
+ CREATE POLICY "Users can delete own files" ON user_files
107
+ FOR DELETE USING (auth.uid() = user_id);
108
+ ```
109
+
110
+ ### 4. Python Dependencies
111
+
112
+ Add to `requirements.txt`:
113
+
114
+ ```
115
+ boto3>=1.28.0
116
+ ```
117
+
118
+ ## Usage in Orchestrator
119
+
120
+ When generating files in the orchestrator, save them to R2:
121
+
122
+ ```python
123
+ from src.storage.r2_storage import store_plotly_figure, store_dataframe_csv
124
+ from src.storage.user_files_service import get_files_service, FileType
125
+
126
+ # Store a Plotly figure
127
+ def save_plot(user_id: str, session_id: str, fig, plot_name: str):
128
+ r2_key, size = store_plotly_figure(user_id, fig, plot_name)
129
+
130
+ # Record in Supabase
131
+ files_service = get_files_service()
132
+ files_service.create_file_record(
133
+ user_id=user_id,
134
+ file_type=FileType.PLOT,
135
+ file_name=plot_name,
136
+ r2_key=r2_key,
137
+ size_bytes=size,
138
+ session_id=session_id,
139
+ mime_type='application/json',
140
+ metadata={'plot_type': 'plotly'}
141
+ )
142
+
143
+ return r2_key
144
+
145
+ # Store a CSV
146
+ def save_csv(user_id: str, session_id: str, df, filename: str):
147
+ r2_key, compressed_size, original_size = store_dataframe_csv(
148
+ user_id, df, filename, "Processed dataset"
149
+ )
150
+
151
+ files_service = get_files_service()
152
+ files_service.create_file_record(
153
+ user_id=user_id,
154
+ file_type=FileType.CSV,
155
+ file_name=filename,
156
+ r2_key=r2_key,
157
+ size_bytes=compressed_size,
158
+ session_id=session_id,
159
+ mime_type='text/csv',
160
+ metadata={
161
+ 'original_size': original_size,
162
+ 'compression_ratio': f"{(1 - compressed_size/original_size)*100:.1f}%"
163
+ }
164
+ )
165
+
166
+ return r2_key
167
+ ```
168
+
169
+ ## Storage Efficiency
170
+
171
+ ### Plot Storage (Before vs After)
172
+
173
+ | Format | Size | Load Time |
174
+ |--------|------|-----------|
175
+ | Plotly HTML | 200KB - 2MB | 2-5 seconds |
176
+ | Plotly JSON (gzip) | 5KB - 20KB | <0.5 seconds |
177
+
178
+ **95% reduction in storage!**
179
+
180
+ ### CSV Compression
181
+
182
+ | Original Size | Compressed (gzip) | Ratio |
183
+ |---------------|-------------------|-------|
184
+ | 10MB | 1-2MB | 80-90% |
185
+ | 100MB | 10-20MB | 80-90% |
186
+ | 1GB | 100-200MB | 80-90% |
187
+
188
+ ## Cleanup Strategy
189
+
190
+ ### Automatic Expiration
191
+
192
+ Files expire after 7 days by default. Run this cleanup job daily:
193
+
194
+ ```python
195
+ from src.storage.r2_storage import get_r2_service
196
+ from src.storage.user_files_service import get_files_service
197
+
198
+ def cleanup_expired_files():
199
+ files_service = get_files_service()
200
+ r2_service = get_r2_service()
201
+
202
+ # Get expired files from Supabase
203
+ expired = files_service.get_expired_files()
204
+
205
+ for file in expired:
206
+ # Delete from R2
207
+ r2_service.delete_file(file.r2_key)
208
+ # Delete from Supabase
209
+ files_service.hard_delete_file(file.id)
210
+
211
+ return len(expired)
212
+ ```
213
+
214
+ ### User Download Prompt
215
+
216
+ When files are about to expire (1 day left), show a notification:
217
+
218
+ ```typescript
219
+ // Frontend
220
+ const expiringFiles = files.filter(f =>
221
+ new Date(f.expires_at) < new Date(Date.now() + 24 * 60 * 60 * 1000)
222
+ );
223
+
224
+ if (expiringFiles.length > 0) {
225
+ showNotification(
226
+ `${expiringFiles.length} files expiring soon! Download them now.`
227
+ );
228
+ }
229
+ ```
230
+
231
+ ## Cost Estimates
232
+
233
+ ### Cloudflare R2 (10GB free, then $0.015/GB)
234
+
235
+ | Users | Files/User | Avg Size | Total Storage | Monthly Cost |
236
+ |-------|------------|----------|---------------|--------------|
237
+ | 100 | 50 | 500KB | 2.5GB | FREE |
238
+ | 1,000 | 50 | 500KB | 25GB | $0.23 |
239
+ | 10,000 | 50 | 500KB | 250GB | $3.60 |
240
+
241
+ **Zero egress fees = users can download unlimited files for free!**
242
+
243
+ ## Next Steps
244
+
245
+ 1. ✅ Created R2StorageService (`src/storage/r2_storage.py`)
246
+ 2. ✅ Created UserFilesService (`src/storage/user_files_service.py`)
247
+ 3. ✅ Added API endpoints to `app.py`
248
+ 4. ✅ Created PlotRenderer component
249
+ 5. ⏳ TODO: Integrate with orchestrator to save files during workflow
250
+ 6. ⏳ TODO: Update frontend Assets panel to fetch from API
251
+ 7. ⏳ TODO: Add expiration notifications
252
+ 8. ⏳ TODO: Set up daily cleanup cron job
FRRONTEEEND/components/AuthPage.tsx CHANGED
@@ -33,6 +33,7 @@ const steps = [
33
  { id: "personal", title: "Personal Info" },
34
  { id: "goals", title: "Data Science Goals" },
35
  { id: "professional", title: "Professional" },
 
36
  ];
37
 
38
  interface FormData {
@@ -46,6 +47,7 @@ interface FormData {
46
  profession: string;
47
  experience: string;
48
  industry: string;
 
49
  }
50
 
51
  const fadeInUp = {
@@ -84,6 +86,7 @@ export const AuthPage: React.FC<AuthPageProps> = ({ onSuccess, onSkip }) => {
84
  profession: "",
85
  experience: "",
86
  industry: "",
 
87
  });
88
 
89
  // If user is already authenticated (OAuth), pre-fill email and switch to signup mode for onboarding
@@ -209,6 +212,7 @@ export const AuthPage: React.FC<AuthPageProps> = ({ onSuccess, onSkip }) => {
209
  profession: formData.profession,
210
  experience: formData.experience,
211
  industry: formData.industry,
 
212
  onboarding_completed: true
213
  };
214
 
@@ -303,6 +307,9 @@ export const AuthPage: React.FC<AuthPageProps> = ({ onSuccess, onSkip }) => {
303
  return formData.primaryGoal !== "";
304
  case 2:
305
  return formData.profession.trim() !== "" && formData.industry !== "";
 
 
 
306
  default:
307
  return true;
308
  }
@@ -873,6 +880,119 @@ export const AuthPage: React.FC<AuthPageProps> = ({ onSuccess, onSkip }) => {
873
  </CardContent>
874
  </>
875
  )}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
876
  </motion.div>
877
  </AnimatePresence>
878
 
 
33
  { id: "personal", title: "Personal Info" },
34
  { id: "goals", title: "Data Science Goals" },
35
  { id: "professional", title: "Professional" },
36
+ { id: "integrations", title: "Connect Storage" },
37
  ];
38
 
39
  interface FormData {
 
47
  profession: string;
48
  experience: string;
49
  industry: string;
50
+ huggingfaceToken: string;
51
  }
52
 
53
  const fadeInUp = {
 
86
  profession: "",
87
  experience: "",
88
  industry: "",
89
+ huggingfaceToken: "",
90
  });
91
 
92
  // If user is already authenticated (OAuth), pre-fill email and switch to signup mode for onboarding
 
212
  profession: formData.profession,
213
  experience: formData.experience,
214
  industry: formData.industry,
215
+ huggingface_token: formData.huggingfaceToken || null,
216
  onboarding_completed: true
217
  };
218
 
 
307
  return formData.primaryGoal !== "";
308
  case 2:
309
  return formData.profession.trim() !== "" && formData.industry !== "";
310
+ case 3:
311
+ // HuggingFace token is optional, always valid
312
+ return true;
313
  default:
314
  return true;
315
  }
 
880
  </CardContent>
881
  </>
882
  )}
883
+
884
+ {currentStep === 3 && (
885
+ <>
886
+ <CardHeader>
887
+ <div className="flex justify-center mb-2">
888
+ <div className="w-12 h-12 bg-yellow-500/10 rounded-full flex items-center justify-center">
889
+ <svg className="w-6 h-6 text-yellow-400" viewBox="0 0 24 24" fill="currentColor">
890
+ <path d="M12 0C5.373 0 0 5.373 0 12s5.373 12 12 12 12-5.373 12-12S18.627 0 12 0zm0 2a9.95 9.95 0 017.07 2.929A9.95 9.95 0 0122 12a9.95 9.95 0 01-2.929 7.071A9.95 9.95 0 0112 22a9.95 9.95 0 01-7.071-2.929A9.95 9.95 0 012 12a9.95 9.95 0 012.929-7.071A9.95 9.95 0 0112 2zm0 3.5a6.5 6.5 0 100 13 6.5 6.5 0 000-13z"/>
891
+ </svg>
892
+ </div>
893
+ </div>
894
+ <CardTitle className="text-white text-center">Connect HuggingFace</CardTitle>
895
+ <CardDescription className="text-white/50 text-center">
896
+ Store your datasets, models & reports securely on HuggingFace
897
+ </CardDescription>
898
+ </CardHeader>
899
+ <CardContent className="space-y-4">
900
+ <motion.div
901
+ variants={fadeInUp}
902
+ className="p-4 bg-gradient-to-r from-yellow-500/10 to-orange-500/10 border border-yellow-500/20 rounded-xl"
903
+ >
904
+ <h4 className="text-sm font-semibold text-yellow-300 mb-2">🚀 Why connect HuggingFace?</h4>
905
+ <ul className="text-xs text-white/60 space-y-1.5">
906
+ <li className="flex items-start gap-2">
907
+ <span className="text-green-400 mt-0.5">✓</span>
908
+ <span><strong className="text-white/80">Persist your work</strong> - Datasets, models & plots saved permanently</span>
909
+ </li>
910
+ <li className="flex items-start gap-2">
911
+ <span className="text-green-400 mt-0.5">✓</span>
912
+ <span><strong className="text-white/80">One-click deployment</strong> - Deploy models as APIs instantly</span>
913
+ </li>
914
+ <li className="flex items-start gap-2">
915
+ <span className="text-green-400 mt-0.5">✓</span>
916
+ <span><strong className="text-white/80">Version control</strong> - Git-based versioning for free</span>
917
+ </li>
918
+ <li className="flex items-start gap-2">
919
+ <span className="text-green-400 mt-0.5">✓</span>
920
+ <span><strong className="text-white/80">You own your data</strong> - Everything stored in YOUR account</span>
921
+ </li>
922
+ </ul>
923
+ </motion.div>
924
+
925
+ <motion.div variants={fadeInUp} className="space-y-2">
926
+ <Label htmlFor="hfToken" className="text-white/70 flex items-center gap-2">
927
+ HuggingFace Access Token
928
+ <span className="text-xs text-white/40">(Optional - can add later)</span>
929
+ </Label>
930
+ <div className="relative">
931
+ <Input
932
+ id="hfToken"
933
+ type={showPassword ? "text" : "password"}
934
+ placeholder="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
935
+ value={formData.huggingfaceToken}
936
+ onChange={(e) => updateFormData("huggingfaceToken", e.target.value)}
937
+ className="bg-white/5 border-white/10 text-white placeholder:text-white/30 focus:border-yellow-500/50 pr-10 font-mono text-sm"
938
+ />
939
+ <button
940
+ type="button"
941
+ onClick={() => setShowPassword(!showPassword)}
942
+ className="absolute right-3 top-1/2 -translate-y-1/2 text-white/40 hover:text-white/60"
943
+ >
944
+ {showPassword ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
945
+ </button>
946
+ </div>
947
+ <p className="text-xs text-white/40">
948
+ Get your token from{" "}
949
+ <a
950
+ href="https://huggingface.co/settings/tokens"
951
+ target="_blank"
952
+ rel="noopener noreferrer"
953
+ className="text-yellow-400 hover:text-yellow-300 underline"
954
+ >
955
+ huggingface.co/settings/tokens
956
+ </a>
957
+ {" "}(needs write permissions)
958
+ </p>
959
+ </motion.div>
960
+
961
+ <motion.div
962
+ variants={fadeInUp}
963
+ className="p-3 bg-white/5 border border-white/10 rounded-lg"
964
+ >
965
+ <p className="text-xs text-white/50">
966
+ 🔒 <strong className="text-white/70">Security:</strong> Your token is encrypted and stored securely.
967
+ We only use it to save files to your HuggingFace account. You can revoke it anytime.
968
+ </p>
969
+ </motion.div>
970
+
971
+ <AnimatePresence>
972
+ {error && (
973
+ <motion.div
974
+ initial={{ opacity: 0, y: -10 }}
975
+ animate={{ opacity: 1, y: 0 }}
976
+ exit={{ opacity: 0 }}
977
+ className="p-3 bg-red-500/10 border border-red-500/20 rounded-lg text-red-400 text-sm"
978
+ >
979
+ {error}
980
+ </motion.div>
981
+ )}
982
+ {success && (
983
+ <motion.div
984
+ initial={{ opacity: 0, y: -10 }}
985
+ animate={{ opacity: 1, y: 0 }}
986
+ exit={{ opacity: 0 }}
987
+ className="p-3 bg-green-500/10 border border-green-500/20 rounded-lg text-green-400 text-sm"
988
+ >
989
+ {success}
990
+ </motion.div>
991
+ )}
992
+ </AnimatePresence>
993
+ </CardContent>
994
+ </>
995
+ )}
996
  </motion.div>
997
  </AnimatePresence>
998
 
FRRONTEEEND/components/PlotRenderer.tsx ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useEffect, useRef, useState } from 'react';
2
+ import { Loader2, AlertCircle, Download, Maximize2, Minimize2 } from 'lucide-react';
3
+
4
+ interface PlotData {
5
+ type: 'plotly' | 'chartjs';
6
+ name: string;
7
+ data: any;
8
+ created_at: string;
9
+ }
10
+
11
+ interface PlotRendererProps {
12
+ plotData?: PlotData;
13
+ plotUrl?: string; // Fallback for legacy HTML plots
14
+ title: string;
15
+ onClose?: () => void;
16
+ }
17
+
18
+ // Lazy load Plotly to reduce bundle size
19
+ const loadPlotly = (): Promise<any> => {
20
+ return new Promise((resolve, reject) => {
21
+ if ((window as any).Plotly) {
22
+ resolve((window as any).Plotly);
23
+ return;
24
+ }
25
+
26
+ const script = document.createElement('script');
27
+ script.src = 'https://cdn.plot.ly/plotly-2.27.0.min.js';
28
+ script.async = true;
29
+ script.onload = () => resolve((window as any).Plotly);
30
+ script.onerror = () => reject(new Error('Failed to load Plotly'));
31
+ document.head.appendChild(script);
32
+ });
33
+ };
34
+
35
+ export const PlotRenderer: React.FC<PlotRendererProps> = ({
36
+ plotData,
37
+ plotUrl,
38
+ title,
39
+ onClose
40
+ }) => {
41
+ const containerRef = useRef<HTMLDivElement>(null);
42
+ const [loading, setLoading] = useState(true);
43
+ const [error, setError] = useState<string | null>(null);
44
+ const [isFullscreen, setIsFullscreen] = useState(false);
45
+
46
+ useEffect(() => {
47
+ if (!plotData && !plotUrl) {
48
+ setError('No plot data provided');
49
+ setLoading(false);
50
+ return;
51
+ }
52
+
53
+ const renderPlot = async () => {
54
+ try {
55
+ setLoading(true);
56
+ setError(null);
57
+
58
+ if (plotData && plotData.type === 'plotly') {
59
+ // Render Plotly chart from JSON data
60
+ const Plotly = await loadPlotly();
61
+
62
+ if (containerRef.current) {
63
+ // Extract data and layout from the plot data
64
+ const { data, layout, config } = plotData.data;
65
+
66
+ // Apply dark theme
67
+ const darkLayout = {
68
+ ...layout,
69
+ paper_bgcolor: 'rgba(0,0,0,0)',
70
+ plot_bgcolor: 'rgba(0,0,0,0)',
71
+ font: { color: '#ffffff' },
72
+ xaxis: {
73
+ ...layout?.xaxis,
74
+ gridcolor: 'rgba(255,255,255,0.1)',
75
+ linecolor: 'rgba(255,255,255,0.2)'
76
+ },
77
+ yaxis: {
78
+ ...layout?.yaxis,
79
+ gridcolor: 'rgba(255,255,255,0.1)',
80
+ linecolor: 'rgba(255,255,255,0.2)'
81
+ },
82
+ margin: { t: 40, r: 20, b: 40, l: 60 }
83
+ };
84
+
85
+ const darkConfig = {
86
+ ...config,
87
+ responsive: true,
88
+ displayModeBar: true,
89
+ displaylogo: false,
90
+ modeBarButtonsToRemove: ['lasso2d', 'select2d']
91
+ };
92
+
93
+ Plotly.newPlot(containerRef.current, data, darkLayout, darkConfig);
94
+ }
95
+ }
96
+
97
+ setLoading(false);
98
+ } catch (err) {
99
+ console.error('Error rendering plot:', err);
100
+ setError(err instanceof Error ? err.message : 'Failed to render plot');
101
+ setLoading(false);
102
+ }
103
+ };
104
+
105
+ renderPlot();
106
+
107
+ // Cleanup
108
+ return () => {
109
+ if (containerRef.current && (window as any).Plotly) {
110
+ (window as any).Plotly.purge(containerRef.current);
111
+ }
112
+ };
113
+ }, [plotData, plotUrl]);
114
+
115
+ // Handle window resize
116
+ useEffect(() => {
117
+ const handleResize = () => {
118
+ if (containerRef.current && (window as any).Plotly && plotData) {
119
+ (window as any).Plotly.Plots.resize(containerRef.current);
120
+ }
121
+ };
122
+
123
+ window.addEventListener('resize', handleResize);
124
+ return () => window.removeEventListener('resize', handleResize);
125
+ }, [plotData]);
126
+
127
+ const handleDownload = () => {
128
+ if (containerRef.current && (window as any).Plotly) {
129
+ (window as any).Plotly.downloadImage(containerRef.current, {
130
+ format: 'png',
131
+ width: 1200,
132
+ height: 800,
133
+ filename: title.replace(/\s+/g, '_')
134
+ });
135
+ }
136
+ };
137
+
138
+ const toggleFullscreen = () => {
139
+ setIsFullscreen(!isFullscreen);
140
+ };
141
+
142
+ // If we only have a URL (legacy HTML plot), use iframe
143
+ if (!plotData && plotUrl) {
144
+ return (
145
+ <div className={`relative ${isFullscreen ? 'fixed inset-0 z-50 bg-black' : 'w-full h-full'}`}>
146
+ <div className="absolute top-2 right-2 flex gap-2 z-10">
147
+ <button
148
+ onClick={toggleFullscreen}
149
+ className="p-2 rounded-lg bg-white/10 hover:bg-white/20 transition-colors"
150
+ >
151
+ {isFullscreen ? <Minimize2 className="w-4 h-4" /> : <Maximize2 className="w-4 h-4" />}
152
+ </button>
153
+ </div>
154
+ <iframe
155
+ src={plotUrl}
156
+ className="w-full h-full border-0"
157
+ title={title}
158
+ sandbox="allow-scripts allow-same-origin"
159
+ />
160
+ </div>
161
+ );
162
+ }
163
+
164
+ return (
165
+ <div className={`relative ${isFullscreen ? 'fixed inset-0 z-50 bg-[#0a0a0a]' : 'w-full h-full'}`}>
166
+ {/* Controls */}
167
+ <div className="absolute top-2 right-2 flex gap-2 z-10">
168
+ <button
169
+ onClick={handleDownload}
170
+ className="p-2 rounded-lg bg-white/10 hover:bg-white/20 transition-colors"
171
+ title="Download as PNG"
172
+ >
173
+ <Download className="w-4 h-4" />
174
+ </button>
175
+ <button
176
+ onClick={toggleFullscreen}
177
+ className="p-2 rounded-lg bg-white/10 hover:bg-white/20 transition-colors"
178
+ title={isFullscreen ? 'Exit fullscreen' : 'Fullscreen'}
179
+ >
180
+ {isFullscreen ? <Minimize2 className="w-4 h-4" /> : <Maximize2 className="w-4 h-4" />}
181
+ </button>
182
+ </div>
183
+
184
+ {/* Loading state */}
185
+ {loading && (
186
+ <div className="absolute inset-0 flex items-center justify-center bg-black/50">
187
+ <div className="flex items-center gap-3 text-white/60">
188
+ <Loader2 className="w-6 h-6 animate-spin" />
189
+ <span>Loading visualization...</span>
190
+ </div>
191
+ </div>
192
+ )}
193
+
194
+ {/* Error state */}
195
+ {error && (
196
+ <div className="absolute inset-0 flex items-center justify-center">
197
+ <div className="flex items-center gap-3 text-red-400">
198
+ <AlertCircle className="w-6 h-6" />
199
+ <span>{error}</span>
200
+ </div>
201
+ </div>
202
+ )}
203
+
204
+ {/* Plot container */}
205
+ <div
206
+ ref={containerRef}
207
+ className="w-full h-full min-h-[400px]"
208
+ style={{ visibility: loading ? 'hidden' : 'visible' }}
209
+ />
210
+ </div>
211
+ );
212
+ };
213
+
214
+ export default PlotRenderer;
FRRONTEEEND/lib/supabase.ts CHANGED
@@ -219,6 +219,8 @@ export interface UserProfile {
219
  profession?: string;
220
  experience?: string;
221
  industry?: string;
 
 
222
  onboarding_completed: boolean;
223
  created_at?: string;
224
  updated_at?: string;
@@ -273,3 +275,52 @@ export const getUserProfile = async (userId: string) => {
273
  }
274
  };
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  profession?: string;
220
  experience?: string;
221
  industry?: string;
222
+ huggingface_token?: string; // Encrypted HF token for storage integration
223
+ huggingface_username?: string;
224
  onboarding_completed: boolean;
225
  created_at?: string;
226
  updated_at?: string;
 
275
  }
276
  };
277
 
278
+ // Update HuggingFace token for a user
279
+ export const updateHuggingFaceToken = async (userId: string, hfToken: string, hfUsername?: string) => {
280
+ try {
281
+ const { data, error } = await supabase
282
+ .from('user_profiles')
283
+ .update({
284
+ huggingface_token: hfToken,
285
+ huggingface_username: hfUsername,
286
+ updated_at: new Date().toISOString()
287
+ })
288
+ .eq('user_id', userId)
289
+ .select()
290
+ .single();
291
+
292
+ if (error) {
293
+ console.error('Failed to update HF token:', error);
294
+ return null;
295
+ }
296
+ return data;
297
+ } catch (err) {
298
+ console.error('HF token update error:', err);
299
+ return null;
300
+ }
301
+ };
302
+
303
+ // Get HuggingFace token for a user (returns masked token for security)
304
+ export const getHuggingFaceStatus = async (userId: string) => {
305
+ try {
306
+ const { data, error } = await supabase
307
+ .from('user_profiles')
308
+ .select('huggingface_token, huggingface_username')
309
+ .eq('user_id', userId)
310
+ .single();
311
+
312
+ if (error) {
313
+ return { connected: false };
314
+ }
315
+
316
+ return {
317
+ connected: !!data?.huggingface_token,
318
+ username: data?.huggingface_username,
319
+ tokenMasked: data?.huggingface_token ? `hf_****${data.huggingface_token.slice(-4)}` : null
320
+ };
321
+ } catch (err) {
322
+ console.error('HF status fetch error:', err);
323
+ return { connected: false };
324
+ }
325
+ };
326
+
requirements.txt CHANGED
@@ -90,6 +90,15 @@ google-cloud-storage==2.14.0 # For GCS artifact storage
90
  google-auth==2.25.2
91
  google-generativeai==0.3.2 # For Gemini LLM support
92
 
 
 
 
 
 
 
 
 
 
93
  # Testing
94
  pytest==7.4.3
95
  pytest-mock==3.12.0
 
90
  google-auth==2.25.2
91
  google-generativeai==0.3.2 # For Gemini LLM support
92
 
93
+ # Cloudflare R2 Storage (S3-compatible)
94
+ boto3>=1.28.0 # For R2 file storage
95
+
96
+ # HuggingFace Storage Integration
97
+ huggingface_hub>=0.20.0 # For storing user artifacts on HuggingFace
98
+
99
+ # Supabase Backend
100
+ supabase>=2.0.0 # For user file metadata
101
+
102
  # Testing
103
  pytest==7.4.3
104
  pytest-mock==3.12.0
src/api/app.py CHANGED
@@ -1053,6 +1053,238 @@ async def chat(request: ChatRequest) -> JSONResponse:
1053
  )
1054
 
1055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1056
  # Error handlers
1057
  @app.exception_handler(HTTPException)
1058
  async def http_exception_handler(request, exc):
 
1053
  )
1054
 
1055
 
1056
+ # ==================== FILE STORAGE API ====================
1057
+ # These endpoints handle persistent file storage with R2 + Supabase
1058
+
1059
+ class FileMetadataResponse(BaseModel):
1060
+ """Response model for file metadata."""
1061
+ id: str
1062
+ file_type: str
1063
+ file_name: str
1064
+ size_bytes: int
1065
+ created_at: str
1066
+ expires_at: str
1067
+ download_url: Optional[str] = None
1068
+ metadata: Dict[str, Any] = {}
1069
+
1070
+ class UserFilesResponse(BaseModel):
1071
+ """Response model for user files list."""
1072
+ success: bool
1073
+ files: List[FileMetadataResponse]
1074
+ total_count: int
1075
+ total_size_mb: float
1076
+
1077
+ @app.get("/api/files")
1078
+ async def get_user_files(
1079
+ user_id: str,
1080
+ file_type: Optional[str] = None,
1081
+ session_id: Optional[str] = None
1082
+ ):
1083
+ """
1084
+ Get all files for a user.
1085
+
1086
+ Query params:
1087
+ - user_id: User ID (required)
1088
+ - file_type: Filter by type (plot, csv, report, model)
1089
+ - session_id: Filter by chat session
1090
+ """
1091
+ try:
1092
+ from src.storage.user_files_service import get_files_service, FileType
1093
+ from src.storage.r2_storage import get_r2_service
1094
+
1095
+ files_service = get_files_service()
1096
+ r2_service = get_r2_service()
1097
+
1098
+ # Convert file_type string to enum if provided
1099
+ file_type_enum = None
1100
+ if file_type:
1101
+ file_type_enum = FileType(file_type)
1102
+
1103
+ files = files_service.get_user_files(
1104
+ user_id=user_id,
1105
+ file_type=file_type_enum,
1106
+ session_id=session_id
1107
+ )
1108
+
1109
+ # Generate download URLs
1110
+ file_responses = []
1111
+ total_size = 0
1112
+ for f in files:
1113
+ download_url = None
1114
+ if f.file_type == FileType.CSV:
1115
+ download_url = r2_service.get_csv_download_url(f.r2_key)
1116
+ elif f.file_type in [FileType.REPORT, FileType.PLOT]:
1117
+ download_url = r2_service.get_report_url(f.r2_key)
1118
+
1119
+ file_responses.append(FileMetadataResponse(
1120
+ id=f.id,
1121
+ file_type=f.file_type.value,
1122
+ file_name=f.file_name,
1123
+ size_bytes=f.size_bytes,
1124
+ created_at=f.created_at.isoformat(),
1125
+ expires_at=f.expires_at.isoformat(),
1126
+ download_url=download_url,
1127
+ metadata=f.metadata
1128
+ ))
1129
+ total_size += f.size_bytes
1130
+
1131
+ return UserFilesResponse(
1132
+ success=True,
1133
+ files=file_responses,
1134
+ total_count=len(files),
1135
+ total_size_mb=round(total_size / (1024 * 1024), 2)
1136
+ )
1137
+
1138
+ except ImportError:
1139
+ # Storage services not configured
1140
+ return UserFilesResponse(
1141
+ success=True,
1142
+ files=[],
1143
+ total_count=0,
1144
+ total_size_mb=0
1145
+ )
1146
+ except Exception as e:
1147
+ logger.error(f"Error fetching user files: {e}")
1148
+ raise HTTPException(status_code=500, detail=str(e))
1149
+
1150
+ @app.get("/api/files/{file_id}")
1151
+ async def get_file(file_id: str):
1152
+ """Get a specific file by ID with download URL."""
1153
+ try:
1154
+ from src.storage.user_files_service import get_files_service, FileType
1155
+ from src.storage.r2_storage import get_r2_service
1156
+
1157
+ files_service = get_files_service()
1158
+ r2_service = get_r2_service()
1159
+
1160
+ file = files_service.get_file_by_id(file_id)
1161
+ if not file:
1162
+ raise HTTPException(status_code=404, detail="File not found")
1163
+
1164
+ # Generate appropriate URL
1165
+ download_url = None
1166
+ if file.file_type == FileType.CSV:
1167
+ download_url = r2_service.get_csv_download_url(file.r2_key)
1168
+ elif file.file_type == FileType.PLOT:
1169
+ # For plots, return the plot data directly
1170
+ plot_data = r2_service.get_plot_data(file.r2_key)
1171
+ return {
1172
+ "success": True,
1173
+ "file": {
1174
+ "id": file.id,
1175
+ "file_type": file.file_type.value,
1176
+ "file_name": file.file_name,
1177
+ "metadata": file.metadata
1178
+ },
1179
+ "plot_data": plot_data
1180
+ }
1181
+ else:
1182
+ download_url = r2_service.get_report_url(file.r2_key)
1183
+
1184
+ return {
1185
+ "success": True,
1186
+ "file": FileMetadataResponse(
1187
+ id=file.id,
1188
+ file_type=file.file_type.value,
1189
+ file_name=file.file_name,
1190
+ size_bytes=file.size_bytes,
1191
+ created_at=file.created_at.isoformat(),
1192
+ expires_at=file.expires_at.isoformat(),
1193
+ download_url=download_url,
1194
+ metadata=file.metadata
1195
+ )
1196
+ }
1197
+
1198
+ except HTTPException:
1199
+ raise
1200
+ except Exception as e:
1201
+ logger.error(f"Error fetching file: {e}")
1202
+ raise HTTPException(status_code=500, detail=str(e))
1203
+
1204
+ @app.delete("/api/files/{file_id}")
1205
+ async def delete_file(file_id: str, user_id: str):
1206
+ """Delete a file (both from R2 and Supabase)."""
1207
+ try:
1208
+ from src.storage.user_files_service import get_files_service
1209
+ from src.storage.r2_storage import get_r2_service
1210
+
1211
+ files_service = get_files_service()
1212
+ r2_service = get_r2_service()
1213
+
1214
+ file = files_service.get_file_by_id(file_id)
1215
+ if not file:
1216
+ raise HTTPException(status_code=404, detail="File not found")
1217
+
1218
+ # Verify ownership
1219
+ if file.user_id != user_id:
1220
+ raise HTTPException(status_code=403, detail="Not authorized")
1221
+
1222
+ # Delete from R2
1223
+ r2_service.delete_file(file.r2_key)
1224
+
1225
+ # Delete from Supabase
1226
+ files_service.hard_delete_file(file_id)
1227
+
1228
+ return {"success": True, "message": "File deleted"}
1229
+
1230
+ except HTTPException:
1231
+ raise
1232
+ except Exception as e:
1233
+ logger.error(f"Error deleting file: {e}")
1234
+ raise HTTPException(status_code=500, detail=str(e))
1235
+
1236
+ @app.get("/api/files/stats/{user_id}")
1237
+ async def get_storage_stats(user_id: str):
1238
+ """Get storage statistics for a user."""
1239
+ try:
1240
+ from src.storage.user_files_service import get_files_service
1241
+
1242
+ files_service = get_files_service()
1243
+ stats = files_service.get_user_storage_stats(user_id)
1244
+
1245
+ return {
1246
+ "success": True,
1247
+ "stats": stats
1248
+ }
1249
+
1250
+ except Exception as e:
1251
+ logger.error(f"Error getting stats: {e}")
1252
+ return {
1253
+ "success": True,
1254
+ "stats": {
1255
+ "total_files": 0,
1256
+ "total_size_bytes": 0,
1257
+ "total_size_mb": 0,
1258
+ "by_type": {}
1259
+ }
1260
+ }
1261
+
1262
+ @app.post("/api/files/extend/{file_id}")
1263
+ async def extend_file_expiration(file_id: str, user_id: str, days: int = 7):
1264
+ """Extend a file's expiration date."""
1265
+ try:
1266
+ from src.storage.user_files_service import get_files_service
1267
+
1268
+ files_service = get_files_service()
1269
+
1270
+ file = files_service.get_file_by_id(file_id)
1271
+ if not file:
1272
+ raise HTTPException(status_code=404, detail="File not found")
1273
+
1274
+ if file.user_id != user_id:
1275
+ raise HTTPException(status_code=403, detail="Not authorized")
1276
+
1277
+ success = files_service.extend_expiration(file_id, days)
1278
+
1279
+ return {"success": success}
1280
+
1281
+ except HTTPException:
1282
+ raise
1283
+ except Exception as e:
1284
+ logger.error(f"Error extending expiration: {e}")
1285
+ raise HTTPException(status_code=500, detail=str(e))
1286
+
1287
+
1288
  # Error handlers
1289
  @app.exception_handler(HTTPException)
1290
  async def http_exception_handler(request, exc):
src/storage/huggingface_storage.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HuggingFace Storage Service
3
+
4
+ Stores user artifacts (datasets, models, plots, reports) directly to the user's
5
+ HuggingFace account, enabling:
6
+ 1. Persistent storage at no cost
7
+ 2. Easy model deployment
8
+ 3. User ownership of data
9
+ 4. Version control via Git
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import gzip
15
+ import tempfile
16
+ from pathlib import Path
17
+ from typing import Optional, Dict, Any, List, BinaryIO, Union
18
+ from datetime import datetime
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Optional: huggingface_hub for HF operations
24
+ try:
25
+ from huggingface_hub import HfApi, HfFolder, create_repo, upload_file, upload_folder
26
+ from huggingface_hub.utils import RepositoryNotFoundError
27
+ HF_AVAILABLE = True
28
+ except ImportError:
29
+ HF_AVAILABLE = False
30
+ logger.warning("huggingface_hub not installed. Install with: pip install huggingface_hub")
31
+
32
+
33
+ class HuggingFaceStorage:
34
+ """
35
+ Manages file storage on HuggingFace for user artifacts.
36
+
37
+ Storage structure on HuggingFace:
38
+ - Datasets repo: {username}/ds-agent-data
39
+ - /datasets/{session_id}/cleaned_data.csv.gz
40
+ - /datasets/{session_id}/encoded_data.csv.gz
41
+
42
+ - Models repo: {username}/ds-agent-models
43
+ - /models/{session_id}/{model_name}.pkl
44
+ - /models/{session_id}/model_config.json
45
+
46
+ - Spaces repo (for reports/plots): {username}/ds-agent-outputs
47
+ - /plots/{session_id}/correlation_heatmap.json
48
+ - /reports/{session_id}/eda_report.html.gz
49
+ """
50
+
51
+ def __init__(self, hf_token: Optional[str] = None):
52
+ """
53
+ Initialize HuggingFace storage.
54
+
55
+ Args:
56
+ hf_token: HuggingFace API token with write permissions
57
+ """
58
+ if not HF_AVAILABLE:
59
+ raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
60
+
61
+ self.token = hf_token or os.environ.get("HF_TOKEN")
62
+ if not self.token:
63
+ raise ValueError("HuggingFace token is required")
64
+
65
+ self.api = HfApi(token=self.token)
66
+ self._username: Optional[str] = None
67
+
68
+ # Repo names
69
+ self.DATA_REPO_SUFFIX = "ds-agent-data"
70
+ self.MODELS_REPO_SUFFIX = "ds-agent-models"
71
+ self.OUTPUTS_REPO_SUFFIX = "ds-agent-outputs"
72
+
73
+ @property
74
+ def username(self) -> str:
75
+ """Get the authenticated user's username."""
76
+ if self._username is None:
77
+ user_info = self.api.whoami()
78
+ self._username = user_info["name"]
79
+ return self._username
80
+
81
+ def _get_repo_id(self, repo_type: str) -> str:
82
+ """Get the full repo ID for a given type."""
83
+ suffix_map = {
84
+ "data": self.DATA_REPO_SUFFIX,
85
+ "models": self.MODELS_REPO_SUFFIX,
86
+ "outputs": self.OUTPUTS_REPO_SUFFIX
87
+ }
88
+ suffix = suffix_map.get(repo_type, self.OUTPUTS_REPO_SUFFIX)
89
+ return f"{self.username}/{suffix}"
90
+
91
+ def _ensure_repo_exists(self, repo_type: str, repo_kind: str = "dataset") -> str:
92
+ """
93
+ Ensure the repository exists, create if not.
94
+
95
+ Args:
96
+ repo_type: "data", "models", or "outputs"
97
+ repo_kind: "dataset", "model", or "space"
98
+
99
+ Returns:
100
+ The repo ID
101
+ """
102
+ repo_id = self._get_repo_id(repo_type)
103
+
104
+ try:
105
+ self.api.repo_info(repo_id=repo_id, repo_type=repo_kind)
106
+ logger.info(f"Repo {repo_id} exists")
107
+ except RepositoryNotFoundError:
108
+ logger.info(f"Creating repo {repo_id}")
109
+ create_repo(
110
+ repo_id=repo_id,
111
+ repo_type=repo_kind,
112
+ private=True, # Default to private
113
+ token=self.token
114
+ )
115
+
116
+ return repo_id
117
+
118
+ def upload_dataset(
119
+ self,
120
+ file_path: str,
121
+ session_id: str,
122
+ file_name: Optional[str] = None,
123
+ compress: bool = True,
124
+ metadata: Optional[Dict[str, Any]] = None
125
+ ) -> Dict[str, Any]:
126
+ """
127
+ Upload a dataset (CSV, Parquet) to user's HuggingFace.
128
+
129
+ Args:
130
+ file_path: Local path to the file
131
+ session_id: Session ID for organizing files
132
+ file_name: Optional custom filename
133
+ compress: Whether to gzip compress the file
134
+ metadata: Optional metadata to store alongside
135
+
136
+ Returns:
137
+ Dict with upload info (url, path, size, etc.)
138
+ """
139
+ repo_id = self._ensure_repo_exists("data", "dataset")
140
+
141
+ original_path = Path(file_path)
142
+ file_name = file_name or original_path.name
143
+
144
+ # Compress if requested and not already compressed
145
+ if compress and not file_name.endswith('.gz'):
146
+ with tempfile.NamedTemporaryFile(suffix='.gz', delete=False) as tmp:
147
+ with open(file_path, 'rb') as f_in:
148
+ with gzip.open(tmp.name, 'wb') as f_out:
149
+ f_out.write(f_in.read())
150
+ upload_path = tmp.name
151
+ file_name = f"{file_name}.gz"
152
+ else:
153
+ upload_path = file_path
154
+
155
+ # Upload to HuggingFace
156
+ path_in_repo = f"datasets/{session_id}/{file_name}"
157
+
158
+ try:
159
+ result = upload_file(
160
+ path_or_fileobj=upload_path,
161
+ path_in_repo=path_in_repo,
162
+ repo_id=repo_id,
163
+ repo_type="dataset",
164
+ token=self.token,
165
+ commit_message=f"Add dataset: {file_name}"
166
+ )
167
+
168
+ # Upload metadata if provided
169
+ if metadata:
170
+ metadata_path = f"datasets/{session_id}/{file_name}.meta.json"
171
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
172
+ json.dump({
173
+ **metadata,
174
+ "uploaded_at": datetime.now().isoformat(),
175
+ "original_name": original_path.name,
176
+ "compressed": compress
177
+ }, tmp)
178
+ tmp.flush()
179
+
180
+ upload_file(
181
+ path_or_fileobj=tmp.name,
182
+ path_in_repo=metadata_path,
183
+ repo_id=repo_id,
184
+ repo_type="dataset",
185
+ token=self.token,
186
+ commit_message=f"Add metadata for {file_name}"
187
+ )
188
+
189
+ file_size = os.path.getsize(upload_path)
190
+
191
+ return {
192
+ "success": True,
193
+ "repo_id": repo_id,
194
+ "path": path_in_repo,
195
+ "url": f"https://huggingface.co/datasets/{repo_id}/blob/main/{path_in_repo}",
196
+ "download_url": f"https://huggingface.co/datasets/{repo_id}/resolve/main/{path_in_repo}",
197
+ "size_bytes": file_size,
198
+ "compressed": compress
199
+ }
200
+
201
+ except Exception as e:
202
+ logger.error(f"Failed to upload dataset: {e}")
203
+ return {
204
+ "success": False,
205
+ "error": str(e)
206
+ }
207
+ finally:
208
+ # Clean up temp file if we created one
209
+ if compress and upload_path != file_path:
210
+ try:
211
+ os.unlink(upload_path)
212
+ except:
213
+ pass
214
+
215
+ def upload_model(
216
+ self,
217
+ model_path: str,
218
+ session_id: str,
219
+ model_name: str,
220
+ model_type: str = "sklearn",
221
+ metrics: Optional[Dict[str, float]] = None,
222
+ feature_names: Optional[List[str]] = None,
223
+ target_column: Optional[str] = None
224
+ ) -> Dict[str, Any]:
225
+ """
226
+ Upload a trained model to user's HuggingFace.
227
+
228
+ Args:
229
+ model_path: Local path to the model file (.pkl, .joblib, .pt, etc.)
230
+ session_id: Session ID
231
+ model_name: Name for the model
232
+ model_type: Type of model (sklearn, xgboost, pytorch, etc.)
233
+ metrics: Model performance metrics
234
+ feature_names: List of feature names the model expects
235
+ target_column: Target column name
236
+
237
+ Returns:
238
+ Dict with upload info
239
+ """
240
+ repo_id = self._ensure_repo_exists("models", "model")
241
+
242
+ path_in_repo = f"models/{session_id}/{model_name}"
243
+ model_file_name = Path(model_path).name
244
+
245
+ try:
246
+ # Upload the model file
247
+ upload_file(
248
+ path_or_fileobj=model_path,
249
+ path_in_repo=f"{path_in_repo}/{model_file_name}",
250
+ repo_id=repo_id,
251
+ repo_type="model",
252
+ token=self.token,
253
+ commit_message=f"Add model: {model_name}"
254
+ )
255
+
256
+ # Create and upload model card
257
+ model_card = self._generate_model_card(
258
+ model_name=model_name,
259
+ model_type=model_type,
260
+ metrics=metrics,
261
+ feature_names=feature_names,
262
+ target_column=target_column
263
+ )
264
+
265
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as tmp:
266
+ tmp.write(model_card)
267
+ tmp.flush()
268
+
269
+ upload_file(
270
+ path_or_fileobj=tmp.name,
271
+ path_in_repo=f"{path_in_repo}/README.md",
272
+ repo_id=repo_id,
273
+ repo_type="model",
274
+ token=self.token,
275
+ commit_message=f"Add model card for {model_name}"
276
+ )
277
+
278
+ # Upload config
279
+ config = {
280
+ "model_name": model_name,
281
+ "model_type": model_type,
282
+ "model_file": model_file_name,
283
+ "metrics": metrics or {},
284
+ "feature_names": feature_names or [],
285
+ "target_column": target_column,
286
+ "created_at": datetime.now().isoformat(),
287
+ "session_id": session_id
288
+ }
289
+
290
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
291
+ json.dump(config, tmp, indent=2)
292
+ tmp.flush()
293
+
294
+ upload_file(
295
+ path_or_fileobj=tmp.name,
296
+ path_in_repo=f"{path_in_repo}/config.json",
297
+ repo_id=repo_id,
298
+ repo_type="model",
299
+ token=self.token,
300
+ commit_message=f"Add config for {model_name}"
301
+ )
302
+
303
+ return {
304
+ "success": True,
305
+ "repo_id": repo_id,
306
+ "path": path_in_repo,
307
+ "url": f"https://huggingface.co/{repo_id}/tree/main/{path_in_repo}",
308
+ "model_type": model_type,
309
+ "metrics": metrics
310
+ }
311
+
312
+ except Exception as e:
313
+ logger.error(f"Failed to upload model: {e}")
314
+ return {
315
+ "success": False,
316
+ "error": str(e)
317
+ }
318
+
319
+ def upload_plot(
320
+ self,
321
+ plot_data: Union[str, Dict],
322
+ session_id: str,
323
+ plot_name: str,
324
+ plot_type: str = "plotly"
325
+ ) -> Dict[str, Any]:
326
+ """
327
+ Upload plot data (as JSON) to user's HuggingFace.
328
+
329
+ For Plotly charts, we store the JSON data and render client-side,
330
+ which is much smaller than storing full HTML.
331
+
332
+ Args:
333
+ plot_data: Either JSON string or dict of plot data
334
+ session_id: Session ID
335
+ plot_name: Name for the plot
336
+ plot_type: Type of plot (plotly, matplotlib, etc.)
337
+
338
+ Returns:
339
+ Dict with upload info
340
+ """
341
+ repo_id = self._ensure_repo_exists("outputs", "dataset")
342
+
343
+ # Ensure we have JSON string
344
+ if isinstance(plot_data, dict):
345
+ plot_json = json.dumps(plot_data)
346
+ else:
347
+ plot_json = plot_data
348
+
349
+ path_in_repo = f"plots/{session_id}/{plot_name}.json"
350
+
351
+ try:
352
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
353
+ tmp.write(plot_json)
354
+ tmp.flush()
355
+
356
+ upload_file(
357
+ path_or_fileobj=tmp.name,
358
+ path_in_repo=path_in_repo,
359
+ repo_id=repo_id,
360
+ repo_type="dataset",
361
+ token=self.token,
362
+ commit_message=f"Add plot: {plot_name}"
363
+ )
364
+
365
+ return {
366
+ "success": True,
367
+ "repo_id": repo_id,
368
+ "path": path_in_repo,
369
+ "url": f"https://huggingface.co/datasets/{repo_id}/blob/main/{path_in_repo}",
370
+ "download_url": f"https://huggingface.co/datasets/{repo_id}/resolve/main/{path_in_repo}",
371
+ "plot_type": plot_type,
372
+ "size_bytes": len(plot_json.encode())
373
+ }
374
+
375
+ except Exception as e:
376
+ logger.error(f"Failed to upload plot: {e}")
377
+ return {
378
+ "success": False,
379
+ "error": str(e)
380
+ }
381
+
382
+ def upload_report(
383
+ self,
384
+ report_path: str,
385
+ session_id: str,
386
+ report_name: str,
387
+ compress: bool = True
388
+ ) -> Dict[str, Any]:
389
+ """
390
+ Upload an HTML report to user's HuggingFace.
391
+
392
+ Args:
393
+ report_path: Local path to the HTML report
394
+ session_id: Session ID
395
+ report_name: Name for the report
396
+ compress: Whether to gzip compress
397
+
398
+ Returns:
399
+ Dict with upload info
400
+ """
401
+ repo_id = self._ensure_repo_exists("outputs", "dataset")
402
+
403
+ file_name = f"{report_name}.html"
404
+
405
+ # Compress if requested
406
+ if compress:
407
+ with tempfile.NamedTemporaryFile(suffix='.html.gz', delete=False) as tmp:
408
+ with open(report_path, 'rb') as f_in:
409
+ with gzip.open(tmp.name, 'wb') as f_out:
410
+ f_out.write(f_in.read())
411
+ upload_path = tmp.name
412
+ file_name = f"{file_name}.gz"
413
+ else:
414
+ upload_path = report_path
415
+
416
+ path_in_repo = f"reports/{session_id}/{file_name}"
417
+
418
+ try:
419
+ upload_file(
420
+ path_or_fileobj=upload_path,
421
+ path_in_repo=path_in_repo,
422
+ repo_id=repo_id,
423
+ repo_type="dataset",
424
+ token=self.token,
425
+ commit_message=f"Add report: {report_name}"
426
+ )
427
+
428
+ file_size = os.path.getsize(upload_path)
429
+
430
+ return {
431
+ "success": True,
432
+ "repo_id": repo_id,
433
+ "path": path_in_repo,
434
+ "url": f"https://huggingface.co/datasets/{repo_id}/blob/main/{path_in_repo}",
435
+ "download_url": f"https://huggingface.co/datasets/{repo_id}/resolve/main/{path_in_repo}",
436
+ "size_bytes": file_size,
437
+ "compressed": compress
438
+ }
439
+
440
+ except Exception as e:
441
+ logger.error(f"Failed to upload report: {e}")
442
+ return {
443
+ "success": False,
444
+ "error": str(e)
445
+ }
446
+ finally:
447
+ if compress and upload_path != report_path:
448
+ try:
449
+ os.unlink(upload_path)
450
+ except:
451
+ pass
452
+
453
+ def list_user_files(
454
+ self,
455
+ session_id: Optional[str] = None,
456
+ file_type: Optional[str] = None
457
+ ) -> Dict[str, List[Dict[str, Any]]]:
458
+ """
459
+ List all files for the user, optionally filtered by session or type.
460
+
461
+ Args:
462
+ session_id: Optional session ID to filter by
463
+ file_type: Optional type ("datasets", "models", "plots", "reports")
464
+
465
+ Returns:
466
+ Dict with lists of files by type
467
+ """
468
+ result = {
469
+ "datasets": [],
470
+ "models": [],
471
+ "plots": [],
472
+ "reports": []
473
+ }
474
+
475
+ try:
476
+ # List datasets
477
+ if file_type is None or file_type == "datasets":
478
+ repo_id = self._get_repo_id("data")
479
+ try:
480
+ files = self.api.list_repo_files(repo_id=repo_id, repo_type="dataset")
481
+ for f in files:
482
+ if f.startswith("datasets/") and not f.endswith(".meta.json"):
483
+ if session_id is None or f"/{session_id}/" in f:
484
+ result["datasets"].append({
485
+ "path": f,
486
+ "name": Path(f).name,
487
+ "session_id": f.split("/")[1] if len(f.split("/")) > 1 else None,
488
+ "download_url": f"https://huggingface.co/datasets/{repo_id}/resolve/main/{f}"
489
+ })
490
+ except:
491
+ pass
492
+
493
+ # List models
494
+ if file_type is None or file_type == "models":
495
+ repo_id = self._get_repo_id("models")
496
+ try:
497
+ files = self.api.list_repo_files(repo_id=repo_id, repo_type="model")
498
+ for f in files:
499
+ if f.startswith("models/") and f.endswith("config.json"):
500
+ if session_id is None or f"/{session_id}/" in f:
501
+ model_path = "/".join(f.split("/")[:-1])
502
+ result["models"].append({
503
+ "path": model_path,
504
+ "name": f.split("/")[-2] if len(f.split("/")) > 2 else None,
505
+ "session_id": f.split("/")[1] if len(f.split("/")) > 1 else None,
506
+ "url": f"https://huggingface.co/{repo_id}/tree/main/{model_path}"
507
+ })
508
+ except:
509
+ pass
510
+
511
+ # List plots and reports
512
+ if file_type is None or file_type in ["plots", "reports"]:
513
+ repo_id = self._get_repo_id("outputs")
514
+ try:
515
+ files = self.api.list_repo_files(repo_id=repo_id, repo_type="dataset")
516
+ for f in files:
517
+ if f.startswith("plots/"):
518
+ if session_id is None or f"/{session_id}/" in f:
519
+ result["plots"].append({
520
+ "path": f,
521
+ "name": Path(f).stem,
522
+ "session_id": f.split("/")[1] if len(f.split("/")) > 1 else None,
523
+ "download_url": f"https://huggingface.co/datasets/{repo_id}/resolve/main/{f}"
524
+ })
525
+ elif f.startswith("reports/"):
526
+ if session_id is None or f"/{session_id}/" in f:
527
+ result["reports"].append({
528
+ "path": f,
529
+ "name": Path(f).stem.replace(".html", ""),
530
+ "session_id": f.split("/")[1] if len(f.split("/")) > 1 else None,
531
+ "download_url": f"https://huggingface.co/datasets/{repo_id}/resolve/main/{f}"
532
+ })
533
+ except:
534
+ pass
535
+
536
+ except Exception as e:
537
+ logger.error(f"Failed to list files: {e}")
538
+
539
+ return result
540
+
541
+ def _generate_model_card(
542
+ self,
543
+ model_name: str,
544
+ model_type: str,
545
+ metrics: Optional[Dict[str, float]] = None,
546
+ feature_names: Optional[List[str]] = None,
547
+ target_column: Optional[str] = None
548
+ ) -> str:
549
+ """Generate a HuggingFace model card."""
550
+
551
+ metrics_str = ""
552
+ if metrics:
553
+ metrics_str = "\n".join([f"- **{k}**: {v:.4f}" for k, v in metrics.items()])
554
+
555
+ features_str = ""
556
+ if feature_names:
557
+ features_str = ", ".join(f"`{f}`" for f in feature_names[:20])
558
+ if len(feature_names) > 20:
559
+ features_str += f" ... and {len(feature_names) - 20} more"
560
+
561
+ return f"""---
562
+ license: apache-2.0
563
+ tags:
564
+ - tabular
565
+ - {model_type}
566
+ - ds-agent
567
+ ---
568
+
569
+ # {model_name}
570
+
571
+ This model was trained using [DS Agent](https://huggingface.co/spaces/Pulastya0/Data-Science-Agent),
572
+ an AI-powered data science assistant.
573
+
574
+ ## Model Details
575
+
576
+ - **Model Type**: {model_type}
577
+ - **Target Column**: {target_column or "Not specified"}
578
+ - **Created**: {datetime.now().strftime("%Y-%m-%d %H:%M")}
579
+
580
+ ## Performance Metrics
581
+
582
+ {metrics_str or "No metrics recorded"}
583
+
584
+ ## Features
585
+
586
+ {features_str or "Feature names not recorded"}
587
+
588
+ ## Usage
589
+
590
+ ```python
591
+ import joblib
592
+
593
+ # Load the model
594
+ model = joblib.load("model.pkl")
595
+
596
+ # Make predictions
597
+ predictions = model.predict(X_new)
598
+ ```
599
+
600
+ ## Training
601
+
602
+ This model was automatically trained using DS Agent's ML pipeline which includes:
603
+ - Automated data cleaning
604
+ - Feature engineering
605
+ - Hyperparameter optimization with Optuna
606
+ - Cross-validation
607
+
608
+ ---
609
+
610
+ *Generated by DS Agent*
611
+ """
612
+
613
+ def get_user_storage_stats(self) -> Dict[str, Any]:
614
+ """Get storage statistics for the user."""
615
+ stats = {
616
+ "datasets_count": 0,
617
+ "models_count": 0,
618
+ "plots_count": 0,
619
+ "reports_count": 0,
620
+ "total_files": 0
621
+ }
622
+
623
+ files = self.list_user_files()
624
+ stats["datasets_count"] = len(files["datasets"])
625
+ stats["models_count"] = len(files["models"])
626
+ stats["plots_count"] = len(files["plots"])
627
+ stats["reports_count"] = len(files["reports"])
628
+ stats["total_files"] = sum(stats.values()) - stats["total_files"]
629
+
630
+ return stats
631
+
632
+
633
+ # Convenience function for creating storage instance
634
+ def get_hf_storage(token: str) -> Optional[HuggingFaceStorage]:
635
+ """
636
+ Create a HuggingFace storage instance.
637
+
638
+ Args:
639
+ token: HuggingFace API token
640
+
641
+ Returns:
642
+ HuggingFaceStorage instance or None if not available
643
+ """
644
+ if not HF_AVAILABLE:
645
+ logger.error("huggingface_hub not installed")
646
+ return None
647
+
648
+ try:
649
+ return HuggingFaceStorage(hf_token=token)
650
+ except Exception as e:
651
+ logger.error(f"Failed to create HF storage: {e}")
652
+ return None
src/storage/r2_storage.py ADDED
File without changes
src/storage/user_files_service.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ User Files Service - Manages file metadata in Supabase
3
+
4
+ This service:
5
+ 1. Tracks all user files (plots, CSVs, reports, models) in Supabase
6
+ 2. Provides file listing for the Assets panel
7
+ 3. Handles file expiration and cleanup coordination
8
+ 4. Works with R2StorageService for actual file storage
9
+ """
10
+
11
+ import os
12
+ from datetime import datetime, timedelta
13
+ from typing import Optional, Dict, Any, List
14
+ from dataclasses import dataclass
15
+ from enum import Enum
16
+
17
+ # Supabase client import
18
+ try:
19
+ from supabase import create_client, Client
20
+ except ImportError:
21
+ print("Warning: supabase package not installed. Run: pip install supabase")
22
+ Client = None
23
+
24
+ SUPABASE_URL = os.getenv("SUPABASE_URL", "")
25
+ SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "") # Use service key for backend
26
+
27
+
28
+ class FileType(Enum):
29
+ PLOT = "plot"
30
+ CSV = "csv"
31
+ REPORT = "report"
32
+ MODEL = "model"
33
+
34
+
35
+ @dataclass
36
+ class UserFile:
37
+ """Represents a user file record."""
38
+ id: str
39
+ user_id: str
40
+ session_id: Optional[str]
41
+ file_type: FileType
42
+ file_name: str
43
+ r2_key: str
44
+ size_bytes: int
45
+ mime_type: str
46
+ metadata: Dict[str, Any]
47
+ created_at: datetime
48
+ expires_at: datetime
49
+ download_url: Optional[str] = None
50
+
51
+
52
+ class UserFilesService:
53
+ """Service for managing user file metadata in Supabase."""
54
+
55
+ def __init__(self):
56
+ """Initialize Supabase client."""
57
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
58
+ raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_KEY must be set")
59
+
60
+ self.client: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
61
+ self.table = "user_files"
62
+
63
+ # ==================== CREATE ====================
64
+
65
+ def create_file_record(
66
+ self,
67
+ user_id: str,
68
+ file_type: FileType,
69
+ file_name: str,
70
+ r2_key: str,
71
+ size_bytes: int,
72
+ session_id: Optional[str] = None,
73
+ mime_type: str = "application/octet-stream",
74
+ metadata: Optional[Dict[str, Any]] = None,
75
+ expires_in_days: int = 7
76
+ ) -> UserFile:
77
+ """
78
+ Create a file record in Supabase.
79
+
80
+ Args:
81
+ user_id: User ID
82
+ file_type: Type of file
83
+ file_name: Display name
84
+ r2_key: R2 storage key
85
+ size_bytes: File size
86
+ session_id: Optional chat session ID
87
+ mime_type: MIME type
88
+ metadata: Additional metadata (plot type, metrics, etc.)
89
+ expires_in_days: Days until file expires
90
+
91
+ Returns:
92
+ Created UserFile record
93
+ """
94
+ expires_at = datetime.utcnow() + timedelta(days=expires_in_days)
95
+
96
+ data = {
97
+ "user_id": user_id,
98
+ "session_id": session_id,
99
+ "file_type": file_type.value,
100
+ "file_name": file_name,
101
+ "r2_key": r2_key,
102
+ "size_bytes": size_bytes,
103
+ "mime_type": mime_type,
104
+ "metadata": metadata or {},
105
+ "expires_at": expires_at.isoformat()
106
+ }
107
+
108
+ result = self.client.table(self.table).insert(data).execute()
109
+
110
+ if result.data:
111
+ return self._to_user_file(result.data[0])
112
+ raise Exception("Failed to create file record")
113
+
114
+ # ==================== READ ====================
115
+
116
+ def get_user_files(
117
+ self,
118
+ user_id: str,
119
+ file_type: Optional[FileType] = None,
120
+ session_id: Optional[str] = None,
121
+ include_expired: bool = False
122
+ ) -> List[UserFile]:
123
+ """
124
+ Get all files for a user.
125
+
126
+ Args:
127
+ user_id: User ID
128
+ file_type: Optional filter by type
129
+ session_id: Optional filter by session
130
+ include_expired: Include expired files
131
+
132
+ Returns:
133
+ List of UserFile records
134
+ """
135
+ query = self.client.table(self.table)\
136
+ .select("*")\
137
+ .eq("user_id", user_id)\
138
+ .eq("is_deleted", False)
139
+
140
+ if file_type:
141
+ query = query.eq("file_type", file_type.value)
142
+
143
+ if session_id:
144
+ query = query.eq("session_id", session_id)
145
+
146
+ if not include_expired:
147
+ query = query.gt("expires_at", datetime.utcnow().isoformat())
148
+
149
+ query = query.order("created_at", desc=True)
150
+
151
+ result = query.execute()
152
+
153
+ return [self._to_user_file(row) for row in (result.data or [])]
154
+
155
+ def get_file_by_id(self, file_id: str) -> Optional[UserFile]:
156
+ """Get a specific file by ID."""
157
+ result = self.client.table(self.table)\
158
+ .select("*")\
159
+ .eq("id", file_id)\
160
+ .single()\
161
+ .execute()
162
+
163
+ if result.data:
164
+ return self._to_user_file(result.data)
165
+ return None
166
+
167
+ def get_file_by_r2_key(self, r2_key: str) -> Optional[UserFile]:
168
+ """Get a file by R2 key."""
169
+ result = self.client.table(self.table)\
170
+ .select("*")\
171
+ .eq("r2_key", r2_key)\
172
+ .single()\
173
+ .execute()
174
+
175
+ if result.data:
176
+ return self._to_user_file(result.data)
177
+ return None
178
+
179
+ def get_session_files(self, session_id: str) -> List[UserFile]:
180
+ """Get all files for a chat session."""
181
+ result = self.client.table(self.table)\
182
+ .select("*")\
183
+ .eq("session_id", session_id)\
184
+ .eq("is_deleted", False)\
185
+ .order("created_at", desc=True)\
186
+ .execute()
187
+
188
+ return [self._to_user_file(row) for row in (result.data or [])]
189
+
190
+ # ==================== UPDATE ====================
191
+
192
+ def extend_expiration(self, file_id: str, additional_days: int = 7) -> bool:
193
+ """Extend file expiration date."""
194
+ file = self.get_file_by_id(file_id)
195
+ if not file:
196
+ return False
197
+
198
+ new_expires = datetime.utcnow() + timedelta(days=additional_days)
199
+
200
+ result = self.client.table(self.table)\
201
+ .update({"expires_at": new_expires.isoformat()})\
202
+ .eq("id", file_id)\
203
+ .execute()
204
+
205
+ return bool(result.data)
206
+
207
+ # ==================== DELETE ====================
208
+
209
+ def soft_delete_file(self, file_id: str) -> bool:
210
+ """Soft delete a file (mark as deleted)."""
211
+ result = self.client.table(self.table)\
212
+ .update({"is_deleted": True})\
213
+ .eq("id", file_id)\
214
+ .execute()
215
+
216
+ return bool(result.data)
217
+
218
+ def hard_delete_file(self, file_id: str) -> bool:
219
+ """Permanently delete a file record."""
220
+ result = self.client.table(self.table)\
221
+ .delete()\
222
+ .eq("id", file_id)\
223
+ .execute()
224
+
225
+ return bool(result.data)
226
+
227
+ def get_expired_files(self) -> List[UserFile]:
228
+ """Get all expired files for cleanup."""
229
+ result = self.client.table(self.table)\
230
+ .select("*")\
231
+ .lt("expires_at", datetime.utcnow().isoformat())\
232
+ .eq("is_deleted", False)\
233
+ .execute()
234
+
235
+ return [self._to_user_file(row) for row in (result.data or [])]
236
+
237
+ # ==================== STATS ====================
238
+
239
+ def get_user_storage_stats(self, user_id: str) -> Dict[str, Any]:
240
+ """Get storage statistics for a user."""
241
+ files = self.get_user_files(user_id, include_expired=False)
242
+
243
+ stats = {
244
+ "total_files": len(files),
245
+ "total_size_bytes": sum(f.size_bytes for f in files),
246
+ "by_type": {}
247
+ }
248
+
249
+ for file_type in FileType:
250
+ type_files = [f for f in files if f.file_type == file_type]
251
+ stats["by_type"][file_type.value] = {
252
+ "count": len(type_files),
253
+ "size_bytes": sum(f.size_bytes for f in type_files)
254
+ }
255
+
256
+ stats["total_size_mb"] = round(stats["total_size_bytes"] / (1024 * 1024), 2)
257
+
258
+ return stats
259
+
260
+ # ==================== HELPERS ====================
261
+
262
+ def _to_user_file(self, row: Dict[str, Any]) -> UserFile:
263
+ """Convert database row to UserFile object."""
264
+ return UserFile(
265
+ id=row["id"],
266
+ user_id=row["user_id"],
267
+ session_id=row.get("session_id"),
268
+ file_type=FileType(row["file_type"]),
269
+ file_name=row["file_name"],
270
+ r2_key=row["r2_key"],
271
+ size_bytes=row.get("size_bytes", 0),
272
+ mime_type=row.get("mime_type", "application/octet-stream"),
273
+ metadata=row.get("metadata", {}),
274
+ created_at=datetime.fromisoformat(row["created_at"].replace("Z", "+00:00")),
275
+ expires_at=datetime.fromisoformat(row["expires_at"].replace("Z", "+00:00"))
276
+ )
277
+
278
+
279
+ # ==================== SINGLETON ====================
280
+
281
+ _files_service: Optional[UserFilesService] = None
282
+
283
+ def get_files_service() -> UserFilesService:
284
+ """Get or create UserFilesService singleton."""
285
+ global _files_service
286
+ if _files_service is None:
287
+ _files_service = UserFilesService()
288
+ return _files_service