multimodalart's picture
apply fix to the labels and error management
4ff2db9 verified
import { NextRequest, NextResponse } from 'next/server';
import { whoAmI, createRepo, uploadFiles, datasetInfo } from '@huggingface/hub';
import { readdir, stat, readFile } from 'fs/promises';
import path from 'path';
export async function POST(request: NextRequest) {
try {
const body = await request.json();
const { action, token, namespace, datasetName, datasetPath, datasetId, artifacts, manifest } = body;
if (!token) {
return NextResponse.json({ error: 'HF token is required' }, { status: 400 });
}
switch (action) {
case 'whoami':
try {
const user = await whoAmI({ accessToken: token });
return NextResponse.json({ user });
} catch (error) {
return NextResponse.json({ error: 'Invalid token or network error' }, { status: 401 });
}
case 'createDataset':
try {
if (!namespace || !datasetName) {
return NextResponse.json({ error: 'Namespace and dataset name required' }, { status: 400 });
}
const repoId = `datasets/${namespace}/${datasetName}`;
// Create repository
await createRepo({
repo: repoId,
accessToken: token,
private: false,
});
return NextResponse.json({ success: true, repoId });
} catch (error: any) {
if (error.message?.includes('already exists')) {
return NextResponse.json({ success: true, repoId: `${namespace}/${datasetName}`, exists: true });
}
return NextResponse.json({ error: error.message || 'Failed to create dataset' }, { status: 500 });
}
case 'uploadDataset':
try {
if (!namespace || !datasetName) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 });
}
const repoId = `datasets/${namespace}/${datasetName}`;
const structuredArtifacts = Array.isArray(artifacts) ? artifacts : [];
const hasStructuredArtifacts = structuredArtifacts.length > 0;
if (!hasStructuredArtifacts && !datasetPath) {
return NextResponse.json({ error: 'Dataset path could not be resolved' }, { status: 400 });
}
const filesToUpload: { path: string; content: any }[] = [];
const uploadedPaths = new Set<string>();
const normalizeRepoPath = (value: string) => value.replace(/\\/g, '/').replace(/^\/+/, '');
const addUploadContent = (repoFilePath: string, content: Blob) => {
const normalizedRepoPath = normalizeRepoPath(repoFilePath);
if (!normalizedRepoPath || uploadedPaths.has(normalizedRepoPath)) {
return;
}
uploadedPaths.add(normalizedRepoPath);
filesToUpload.push({ path: normalizedRepoPath, content });
};
const addUploadFile = async (absolutePath: string, repoFilePath: string) => {
const buffer = await readFile(absolutePath);
const blob = new Blob([buffer]);
addUploadContent(repoFilePath, blob);
};
const walkDirectory = async (basePath: string, repoPrefix: string) => {
const entries = await readdir(basePath, { withFileTypes: true });
for (const entry of entries) {
const entryPath = path.join(basePath, entry.name);
if (entry.isDirectory()) {
const nextPrefix = repoPrefix ? `${repoPrefix}/${entry.name}` : entry.name;
await walkDirectory(entryPath, nextPrefix);
} else if (entry.isFile()) {
const repoFilePath = repoPrefix ? `${repoPrefix}/${entry.name}` : entry.name;
await addUploadFile(entryPath, repoFilePath);
}
}
};
const processArtifact = async (localPath: string, repoPath: string) => {
const resolvedPath = path.resolve(localPath);
let stats;
try {
stats = await stat(resolvedPath);
} catch {
throw new Error(`Dataset path does not exist: ${localPath}`);
}
const normalizedRepoPrefix = repoPath ? normalizeRepoPath(repoPath) : '';
if (stats.isDirectory()) {
await walkDirectory(resolvedPath, normalizedRepoPrefix);
} else if (stats.isFile()) {
let destination = normalizedRepoPrefix;
if (!destination || destination.endsWith('/')) {
destination = `${destination}${path.basename(resolvedPath)}`;
} else if (!path.posix.extname(destination)) {
destination = `${destination}/${path.basename(resolvedPath)}`;
}
await addUploadFile(resolvedPath, destination);
} else {
throw new Error(`Unsupported artifact type for path: ${localPath}`);
}
};
if (hasStructuredArtifacts) {
for (const artifact of structuredArtifacts) {
if (!artifact?.localPath || !artifact?.repoPath) {
continue;
}
await processArtifact(artifact.localPath, artifact.repoPath);
}
} else {
const resolvedDatasetPath = path.resolve(datasetPath);
let datasetStats;
try {
datasetStats = await stat(resolvedDatasetPath);
} catch {
return NextResponse.json({ error: 'Dataset path does not exist' }, { status: 400 });
}
if (!datasetStats.isDirectory()) {
return NextResponse.json({ error: 'Dataset path must be a directory' }, { status: 400 });
}
await walkDirectory(resolvedDatasetPath, '');
}
if (manifest) {
const manifestBlob = new Blob([
JSON.stringify(manifest, null, 2)
], { type: 'application/json' });
addUploadContent('manifest.json', manifestBlob);
}
if (filesToUpload.length === 0) {
return NextResponse.json({ error: 'No files found to upload for dataset' }, { status: 400 });
}
await uploadFiles({
repo: repoId,
accessToken: token,
files: filesToUpload,
});
return NextResponse.json({ success: true, repoId });
} catch (error: any) {
console.error('Upload error:', error);
return NextResponse.json({ error: error.message || 'Failed to upload dataset' }, { status: 500 });
}
case 'listFiles':
try {
if (!datasetPath) {
return NextResponse.json({ error: 'Dataset path required' }, { status: 400 });
}
const files = await readdir(datasetPath, { withFileTypes: true });
const imageExtensions = ['.jpg', '.jpeg', '.png', '.webp', '.bmp'];
const imageFiles = files
.filter(file => file.isFile())
.filter(file => imageExtensions.some(ext => file.name.toLowerCase().endsWith(ext)))
.map(file => ({
name: file.name,
path: path.join(datasetPath, file.name),
}));
const captionFiles = files
.filter(file => file.isFile())
.filter(file => file.name.endsWith('.txt'))
.map(file => ({
name: file.name,
path: path.join(datasetPath, file.name),
}));
return NextResponse.json({
images: imageFiles,
captions: captionFiles,
total: imageFiles.length
});
} catch (error: any) {
return NextResponse.json({ error: error.message || 'Failed to list files' }, { status: 500 });
}
case 'validateDataset':
try {
if (!datasetId) {
return NextResponse.json({ error: 'Dataset ID required' }, { status: 400 });
}
// Try to get dataset info to validate it exists and is accessible
const dataset = await datasetInfo({
name: datasetId,
accessToken: token,
});
return NextResponse.json({
exists: true,
dataset: {
id: dataset.id,
author: dataset.author,
downloads: dataset.downloads,
likes: dataset.likes,
private: dataset.private,
}
});
} catch (error: any) {
if (error.message?.includes('404') || error.message?.includes('not found')) {
return NextResponse.json({ exists: false }, { status: 200 });
}
if (error.message?.includes('401') || error.message?.includes('403')) {
return NextResponse.json({ error: 'Dataset not accessible with current token' }, { status: 403 });
}
return NextResponse.json({ error: error.message || 'Failed to validate dataset' }, { status: 500 });
}
default:
return NextResponse.json({ error: 'Invalid action' }, { status: 400 });
}
} catch (error: any) {
console.error('HF Hub API error:', error);
return NextResponse.json({ error: error.message || 'Internal server error' }, { status: 500 });
}
}