vectorplasticity commited on
Commit
b36be34
·
verified ·
1 Parent(s): c5ca484

Fix GPU detection and OUTPUT_DIR attribute errors

Browse files
Files changed (1) hide show
  1. app/routers/system.py +64 -29
app/routers/system.py CHANGED
@@ -33,19 +33,24 @@ class SystemInfo(BaseModel):
33
  total_memory_gb: float
34
 
35
 
 
 
 
 
 
 
 
 
 
 
36
  class ResourceUsage(BaseModel):
37
  """Current resource usage."""
38
- cpu_percent: float
39
- memory_percent: float
40
- memory_used_gb: float
41
- memory_total_gb: float
42
- gpu_memory_used: Optional[float]
43
- gpu_memory_total: Optional[float]
44
- gpu_utilization: Optional[float]
45
- disk_used_gb: float
46
- disk_total_gb: float
47
- disk_percent: float
48
-
49
 
50
  class StorageInfo(BaseModel):
51
  """Storage information."""
@@ -103,29 +108,59 @@ async def get_resource_usage():
103
  # Disk
104
  disk = shutil.disk_usage('/')
105
 
106
- # GPU memory
107
- gpu_memory_used = None
108
- gpu_memory_total = None
109
- gpu_utilization = None
110
 
111
  if torch.cuda.is_available():
112
  try:
 
 
 
 
113
  gpu_memory_used = round(torch.cuda.memory_allocated() / (1024**3), 2)
114
  gpu_memory_total = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
115
- except:
116
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  return ResourceUsage(
119
- cpu_percent=round(cpu_percent, 1),
120
- memory_percent=round(memory.percent, 1),
121
- memory_used_gb=round(memory.used / (1024**3), 2),
122
- memory_total_gb=round(memory.total / (1024**3), 2),
123
- gpu_memory_used=gpu_memory_used,
124
- gpu_memory_total=gpu_memory_total,
125
- gpu_utilization=gpu_utilization,
126
- disk_used_gb=round(disk.used / (1024**3), 2),
127
- disk_total_gb=round(disk.total / (1024**3), 2),
128
- disk_percent=round((disk.used / disk.total) * 100, 1)
 
 
 
 
 
 
 
129
  )
130
 
131
 
@@ -136,8 +171,8 @@ async def get_storage_info():
136
  ("Models", settings.MODELS_DIR),
137
  ("Cache", settings.CACHE_DIR),
138
  ("Logs", settings.LOGS_DIR),
139
- ("Uploads", settings.UPLOADS_DIR),
140
- ("Outputs", settings.OUTPUTS_DIR)
141
  ]
142
 
143
  result = []
 
33
  total_memory_gb: float
34
 
35
 
36
+ class GPUInfo(BaseModel):
37
+ """GPU information."""
38
+ available: bool
39
+ count: int = 0
40
+ names: List[str] = []
41
+ memory_used_gb: Optional[float] = None
42
+ memory_total_gb: Optional[float] = None
43
+ utilization: Optional[float] = None
44
+
45
+
46
  class ResourceUsage(BaseModel):
47
  """Current resource usage."""
48
+ cpu: Dict[str, float]
49
+ memory: Dict[str, float]
50
+ disk: Dict[str, float]
51
+ gpu: GPUInfo
52
+ cache: Dict[str, Any]
53
+
 
 
 
 
 
54
 
55
  class StorageInfo(BaseModel):
56
  """Storage information."""
 
108
  # Disk
109
  disk = shutil.disk_usage('/')
110
 
111
+ # GPU info
112
+ gpu_info = GPUInfo(available=False, count=0, names=[])
 
 
113
 
114
  if torch.cuda.is_available():
115
  try:
116
+ gpu_names = []
117
+ for i in range(torch.cuda.device_count()):
118
+ gpu_names.append(torch.cuda.get_device_name(i))
119
+
120
  gpu_memory_used = round(torch.cuda.memory_allocated() / (1024**3), 2)
121
  gpu_memory_total = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)
122
+
123
+ gpu_info = GPUInfo(
124
+ available=True,
125
+ count=torch.cuda.device_count(),
126
+ names=gpu_names,
127
+ memory_used_gb=gpu_memory_used,
128
+ memory_total_gb=gpu_memory_total,
129
+ utilization=None
130
+ )
131
+ except Exception as e:
132
+ logger.error(f"Error getting GPU info: {e}")
133
+
134
+ # Cache info
135
+ cache_total_bytes = 0
136
+ cache_dirs = [settings.CACHE_DIR, settings.HF_CACHE_DIR]
137
+ for cache_path in cache_dirs:
138
+ if os.path.exists(cache_path):
139
+ for root, dirs, files in os.walk(cache_path):
140
+ for f in files:
141
+ try:
142
+ cache_total_bytes += os.path.getsize(os.path.join(root, f))
143
+ except:
144
+ pass
145
 
146
  return ResourceUsage(
147
+ cpu={
148
+ "percent": round(cpu_percent, 1)
149
+ },
150
+ memory={
151
+ "percent": round(memory.percent, 1),
152
+ "used_gb": round(memory.used / (1024**3), 2),
153
+ "total_gb": round(memory.total / (1024**3), 2)
154
+ },
155
+ disk={
156
+ "percent": round((disk.used / disk.total) * 100, 1),
157
+ "used_gb": round(disk.used / (1024**3), 2),
158
+ "total_gb": round(disk.total / (1024**3), 2)
159
+ },
160
+ gpu=gpu_info,
161
+ cache={
162
+ "total_bytes": cache_total_bytes
163
+ }
164
  )
165
 
166
 
 
171
  ("Models", settings.MODELS_DIR),
172
  ("Cache", settings.CACHE_DIR),
173
  ("Logs", settings.LOGS_DIR),
174
+ ("Uploads", settings.UPLOAD_DIR),
175
+ ("Outputs", settings.OUTPUT_DIR)
176
  ]
177
 
178
  result = []