Spaces:
Running
Running
| import { NextResponse } from 'next/server'; | |
| import fs from 'fs'; | |
| import { commit } from '@huggingface/hub'; | |
| import { HF_DATASET_ID, HF_DATASET_BASE_URL, getCorpus, getDocRepoPath, getDocLocalPath } from '../../../utils/config.js'; | |
| const isHFSpace = () => process.env.HF_TOKEN && process.env.NODE_ENV !== 'development'; | |
| /** | |
| * PUT /api/validate | |
| * Body: { corpus, document_index, page_number, dataset_index, updates } | |
| */ | |
| export async function PUT(request) { | |
| try { | |
| const { corpus: corpusId, document_index, page_number, dataset_index, updates } = await request.json(); | |
| const corpus = getCorpus(corpusId); | |
| if (document_index == null || page_number == null || dataset_index == null || !updates) { | |
| return NextResponse.json( | |
| { error: 'Missing document_index, page_number, dataset_index, or updates' }, | |
| { status: 400 } | |
| ); | |
| } | |
| let pagesData; | |
| if (isHFSpace()) { | |
| const repoPath = getDocRepoPath(corpus, document_index); | |
| const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`; | |
| const res = await fetch(url, { | |
| headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` } | |
| }); | |
| if (!res.ok) { | |
| return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 }); | |
| } | |
| pagesData = await res.json(); | |
| } else { | |
| const filePath = getDocLocalPath(corpus, document_index); | |
| if (!fs.existsSync(filePath)) { | |
| return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 }); | |
| } | |
| pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8')); | |
| } | |
| const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number); | |
| if (pageIdx === -1) { | |
| return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 }); | |
| } | |
| const datasets = pagesData[pageIdx].datasets || []; | |
| if (dataset_index < 0 || dataset_index >= datasets.length) { | |
| return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 }); | |
| } | |
| // Per-annotator validation | |
| const currentEntry = pagesData[pageIdx].datasets[dataset_index]; | |
| const annotator = updates.annotator || 'unknown'; | |
| const validationFields = ['human_validated', 'human_verdict', 'human_notes', 'annotator', 'validated_at']; | |
| const isValidation = validationFields.some(f => f in updates); | |
| if (isValidation) { | |
| const validations = currentEntry.validations || []; | |
| const existingIdx = validations.findIndex(v => v.annotator === annotator); | |
| const validationEntry = { | |
| human_validated: updates.human_validated, | |
| human_verdict: updates.human_verdict, | |
| human_notes: updates.human_notes || null, | |
| annotator, | |
| validated_at: updates.validated_at || new Date().toISOString(), | |
| }; | |
| if (existingIdx >= 0) { | |
| validations[existingIdx] = validationEntry; | |
| } else { | |
| validations.push(validationEntry); | |
| } | |
| pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, validations }; | |
| } else { | |
| pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, ...updates }; | |
| } | |
| // Save back | |
| if (isHFSpace()) { | |
| const repoPath = getDocRepoPath(corpus, document_index); | |
| const content = JSON.stringify(pagesData, null, 2); | |
| await commit({ | |
| repo: { type: 'dataset', name: HF_DATASET_ID }, | |
| credentials: { accessToken: process.env.HF_TOKEN }, | |
| title: `Validate ${corpus.id}/doc_${document_index} page ${page_number}`, | |
| operations: [{ | |
| operation: 'addOrUpdate', | |
| path: repoPath, | |
| content: new Blob([content], { type: 'application/json' }), | |
| }], | |
| }); | |
| } else { | |
| const filePath = getDocLocalPath(corpus, document_index); | |
| fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2)); | |
| } | |
| return NextResponse.json({ | |
| success: true, | |
| dataset: pagesData[pageIdx].datasets[dataset_index], | |
| }); | |
| } catch (error) { | |
| console.error('Validate error:', error); | |
| return NextResponse.json({ error: 'Failed to validate: ' + error.message }, { status: 500 }); | |
| } | |
| } | |
| /** | |
| * DELETE /api/validate?corpus=X&doc=X&page=Y&idx=Z | |
| */ | |
| export async function DELETE(request) { | |
| try { | |
| const { searchParams } = new URL(request.url); | |
| const corpusId = searchParams.get('corpus'); | |
| const document_index = parseInt(searchParams.get('doc'), 10); | |
| const page_number = parseInt(searchParams.get('page'), 10); | |
| const dataset_index = parseInt(searchParams.get('idx'), 10); | |
| const corpus = getCorpus(corpusId); | |
| if (isNaN(document_index) || isNaN(page_number) || isNaN(dataset_index)) { | |
| return NextResponse.json( | |
| { error: 'Missing doc, page, or idx parameter' }, | |
| { status: 400 } | |
| ); | |
| } | |
| let pagesData; | |
| if (isHFSpace()) { | |
| const repoPath = getDocRepoPath(corpus, document_index); | |
| const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`; | |
| const res = await fetch(url, { | |
| headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` } | |
| }); | |
| if (!res.ok) { | |
| return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 }); | |
| } | |
| pagesData = await res.json(); | |
| } else { | |
| const filePath = getDocLocalPath(corpus, document_index); | |
| if (!fs.existsSync(filePath)) { | |
| return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 }); | |
| } | |
| pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8')); | |
| } | |
| const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number); | |
| if (pageIdx === -1) { | |
| return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 }); | |
| } | |
| const datasets = pagesData[pageIdx].datasets || []; | |
| if (dataset_index < 0 || dataset_index >= datasets.length) { | |
| return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 }); | |
| } | |
| pagesData[pageIdx].datasets.splice(dataset_index, 1); | |
| if (isHFSpace()) { | |
| const repoPath = getDocRepoPath(corpus, document_index); | |
| const content = JSON.stringify(pagesData, null, 2); | |
| await commit({ | |
| repo: { type: 'dataset', name: HF_DATASET_ID }, | |
| credentials: { accessToken: process.env.HF_TOKEN }, | |
| title: `Delete from ${corpus.id}/doc_${document_index} page ${page_number}`, | |
| operations: [{ | |
| operation: 'addOrUpdate', | |
| path: repoPath, | |
| content: new Blob([content], { type: 'application/json' }), | |
| }], | |
| }); | |
| } else { | |
| const filePath = getDocLocalPath(corpus, document_index); | |
| fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2)); | |
| } | |
| return NextResponse.json({ success: true }); | |
| } catch (error) { | |
| console.error('Delete error:', error); | |
| return NextResponse.json({ error: 'Failed to delete: ' + error.message }, { status: 500 }); | |
| } | |
| } | |