File size: 3,976 Bytes
31b0744
 
f081192
 
31b0744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f081192
 
 
31b0744
 
 
 
 
 
 
 
 
 
 
 
 
f081192
 
 
 
31b0744
f081192
31b0744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
import uvicorn
import torch
import os

# GPU Verification on startup
print("=" * 50)
print("🚀 OpenManus FastAPI - GPU Verification")
print("=" * 50)
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"PyTorch version: {torch.__version__}")
else:
    print("⚠️  WARNING: CUDA not available - running on CPU")
print("=" * 50)

app = FastAPI(
    title="OpenManus FastAPI",
    description="High-performance FastAPI service with NVIDIA A10G GPU support",
    version="1.0.0",
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Mount static files
app.mount("/static", StaticFiles(directory="static"), name="static")


# Request models
class TextRequest(BaseModel):
    text: str
    max_length: int = 100


class HealthResponse(BaseModel):
    status: str
    gpu_available: bool
    cuda_devices: int
    device_name: str = None

@app.get("/")
async def serve_frontend():
    """Serve the frontend HTML"""
    return FileResponse("static/index.html")

@app.get("/api", response_model=dict)
async def root():
    """Root endpoint with API information"""
    return {
        "message": "OpenManus FastAPI Service",
        "version": "1.0.0",
        "endpoints": {"health": "/health", "gpu_info": "/gpu-info", "docs": "/docs"},
    }


@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint with GPU status"""
    gpu_available = torch.cuda.is_available()
    cuda_devices = torch.cuda.device_count() if gpu_available else 0
    device_name = (
        torch.cuda.get_device_name(0) if gpu_available and cuda_devices > 0 else None
    )

    return HealthResponse(
        status="healthy",
        gpu_available=gpu_available,
        cuda_devices=cuda_devices,
        device_name=device_name,
    )


@app.get("/gpu-info")
async def gpu_info():
    """Detailed GPU information"""
    if not torch.cuda.is_available():
        return {"error": "CUDA not available"}

    info = {
        "cuda_available": True,
        "device_count": torch.cuda.device_count(),
        "devices": [],
    }

    for i in range(torch.cuda.device_count()):
        device_props = torch.cuda.get_device_properties(i)
        info["devices"].append(
            {
                "id": i,
                "name": torch.cuda.get_device_name(i),
                "total_memory_gb": round(device_props.total_memory / 1024**3, 2),
                "major": device_props.major,
                "minor": device_props.minor,
                "multi_processor_count": device_props.multi_processor_count,
            }
        )

    return info


@app.post("/process")
async def process_text(request: TextRequest):
    """Example endpoint for text processing"""
    try:
        # Example processing logic
        result = {
            "input": request.text,
            "length": len(request.text),
            "max_length": request.max_length,
            "processed": request.text.upper(),  # Simple transformation
            "gpu_used": torch.cuda.is_available(),
        }
        return result
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False, workers=1)