Spaces:
Build error
Build error
'use client' | |
import { useTranslation } from 'react-i18next' | |
import { formatFileSize, formatNumber, formatTime } from '@/utils/format' | |
import type { DocType } from '@/models/datasets' | |
import useTimestamp from '@/hooks/use-timestamp' | |
export type inputType = 'input' | 'select' | 'textarea' | |
export type metadataType = DocType | 'originInfo' | 'technicalParameters' | |
type MetadataMap = | |
Record< | |
metadataType, | |
{ | |
text: string | |
allowEdit?: boolean | |
icon?: React.ReactNode | |
iconName?: string | |
subFieldsMap: Record< | |
string, | |
{ | |
label: string | |
inputType?: inputType | |
field?: string | |
render?: (value: any, total?: number) => React.ReactNode | string | |
} | |
> | |
} | |
> | |
const fieldPrefix = 'datasetDocuments.metadata.field' | |
export const useMetadataMap = (): MetadataMap => { | |
const { t } = useTranslation() | |
const { formatTime: formatTimestamp } = useTimestamp() | |
return { | |
book: { | |
text: t('datasetDocuments.metadata.type.book'), | |
iconName: 'bookOpen', | |
subFieldsMap: { | |
title: { label: t(`${fieldPrefix}.book.title`) }, | |
language: { | |
label: t(`${fieldPrefix}.book.language`), | |
inputType: 'select', | |
}, | |
author: { label: t(`${fieldPrefix}.book.author`) }, | |
publisher: { label: t(`${fieldPrefix}.book.publisher`) }, | |
publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) }, | |
isbn: { label: t(`${fieldPrefix}.book.ISBN`) }, | |
category: { | |
label: t(`${fieldPrefix}.book.category`), | |
inputType: 'select', | |
}, | |
}, | |
}, | |
web_page: { | |
text: t('datasetDocuments.metadata.type.webPage'), | |
iconName: 'globe', | |
subFieldsMap: { | |
'title': { label: t(`${fieldPrefix}.webPage.title`) }, | |
'url': { label: t(`${fieldPrefix}.webPage.url`) }, | |
'language': { | |
label: t(`${fieldPrefix}.webPage.language`), | |
inputType: 'select', | |
}, | |
'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) }, | |
'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) }, | |
'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) }, | |
'description': { label: t(`${fieldPrefix}.webPage.description`) }, | |
}, | |
}, | |
paper: { | |
text: t('datasetDocuments.metadata.type.paper'), | |
iconName: 'graduationHat', | |
subFieldsMap: { | |
'title': { label: t(`${fieldPrefix}.paper.title`) }, | |
'language': { | |
label: t(`${fieldPrefix}.paper.language`), | |
inputType: 'select', | |
}, | |
'author': { label: t(`${fieldPrefix}.paper.author`) }, | |
'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) }, | |
'journal/conference_name': { | |
label: t(`${fieldPrefix}.paper.journalConferenceName`), | |
}, | |
'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) }, | |
'doi': { label: t(`${fieldPrefix}.paper.DOI`) }, | |
'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) }, | |
'abstract': { | |
label: t(`${fieldPrefix}.paper.abstract`), | |
inputType: 'textarea', | |
}, | |
}, | |
}, | |
social_media_post: { | |
text: t('datasetDocuments.metadata.type.socialMediaPost'), | |
iconName: 'atSign', | |
subFieldsMap: { | |
'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) }, | |
'author/username': { | |
label: t(`${fieldPrefix}.socialMediaPost.authorUsername`), | |
}, | |
'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) }, | |
'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) }, | |
'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) }, | |
}, | |
}, | |
personal_document: { | |
text: t('datasetDocuments.metadata.type.personalDocument'), | |
iconName: 'file', | |
subFieldsMap: { | |
'title': { label: t(`${fieldPrefix}.personalDocument.title`) }, | |
'author': { label: t(`${fieldPrefix}.personalDocument.author`) }, | |
'creation_date': { | |
label: t(`${fieldPrefix}.personalDocument.creationDate`), | |
}, | |
'last_modified_date': { | |
label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`), | |
}, | |
'document_type': { | |
label: t(`${fieldPrefix}.personalDocument.documentType`), | |
inputType: 'select', | |
}, | |
'tags/category': { | |
label: t(`${fieldPrefix}.personalDocument.tagsCategory`), | |
}, | |
}, | |
}, | |
business_document: { | |
text: t('datasetDocuments.metadata.type.businessDocument'), | |
iconName: 'briefcase', | |
subFieldsMap: { | |
'title': { label: t(`${fieldPrefix}.businessDocument.title`) }, | |
'author': { label: t(`${fieldPrefix}.businessDocument.author`) }, | |
'creation_date': { | |
label: t(`${fieldPrefix}.businessDocument.creationDate`), | |
}, | |
'last_modified_date': { | |
label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`), | |
}, | |
'document_type': { | |
label: t(`${fieldPrefix}.businessDocument.documentType`), | |
inputType: 'select', | |
}, | |
'department/team': { | |
label: t(`${fieldPrefix}.businessDocument.departmentTeam`), | |
}, | |
}, | |
}, | |
im_chat_log: { | |
text: t('datasetDocuments.metadata.type.IMChat'), | |
iconName: 'messageTextCircle', | |
subFieldsMap: { | |
'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) }, | |
'chat_participants/group_name': { | |
label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`), | |
}, | |
'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) }, | |
'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) }, | |
'participants': { label: t(`${fieldPrefix}.IMChat.participants`) }, | |
'topicsKeywords': { | |
label: t(`${fieldPrefix}.IMChat.topicsKeywords`), | |
inputType: 'textarea', | |
}, | |
'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) }, | |
}, | |
}, | |
wikipedia_entry: { | |
text: t('datasetDocuments.metadata.type.wikipediaEntry'), | |
allowEdit: false, | |
subFieldsMap: { | |
'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) }, | |
'language': { | |
label: t(`${fieldPrefix}.wikipediaEntry.language`), | |
inputType: 'select', | |
}, | |
'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) }, | |
'editor/contributor': { | |
label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`), | |
}, | |
'last_edit_date': { | |
label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`), | |
}, | |
'summary/introduction': { | |
label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`), | |
inputType: 'textarea', | |
}, | |
}, | |
}, | |
synced_from_notion: { | |
text: t('datasetDocuments.metadata.type.notion'), | |
allowEdit: false, | |
subFieldsMap: { | |
'title': { label: t(`${fieldPrefix}.notion.title`) }, | |
'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' }, | |
'author/creator': { label: t(`${fieldPrefix}.notion.author`) }, | |
'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) }, | |
'last_modified_date': { | |
label: t(`${fieldPrefix}.notion.lastModifiedTime`), | |
}, | |
'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) }, | |
'category/tags': { label: t(`${fieldPrefix}.notion.tag`) }, | |
'description': { label: t(`${fieldPrefix}.notion.desc`) }, | |
}, | |
}, | |
synced_from_github: { | |
text: t('datasetDocuments.metadata.type.github'), | |
allowEdit: false, | |
subFieldsMap: { | |
'repository_name': { label: t(`${fieldPrefix}.github.repoName`) }, | |
'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) }, | |
'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) }, | |
'code_filename': { label: t(`${fieldPrefix}.github.fileName`) }, | |
'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) }, | |
'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) }, | |
'github_link': { label: t(`${fieldPrefix}.github.url`) }, | |
'open_source_license': { label: t(`${fieldPrefix}.github.license`) }, | |
'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) }, | |
'commit_author': { | |
label: t(`${fieldPrefix}.github.lastCommitAuthor`), | |
}, | |
}, | |
}, | |
originInfo: { | |
text: '', | |
allowEdit: false, | |
subFieldsMap: { | |
'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) }, | |
'data_source_info.upload_file.size': { | |
label: t(`${fieldPrefix}.originInfo.originalFileSize`), | |
render: value => formatFileSize(value), | |
}, | |
'created_at': { | |
label: t(`${fieldPrefix}.originInfo.uploadDate`), | |
render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), | |
}, | |
'completed_at': { | |
label: t(`${fieldPrefix}.originInfo.lastUpdateDate`), | |
render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), | |
}, | |
'data_source_type': { | |
label: t(`${fieldPrefix}.originInfo.source`), | |
render: value => t(`datasetDocuments.metadata.source.${value}`), | |
}, | |
}, | |
}, | |
technicalParameters: { | |
text: t('datasetDocuments.metadata.type.technicalParameters'), | |
allowEdit: false, | |
subFieldsMap: { | |
'dataset_process_rule.mode': { | |
label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`), | |
render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string), | |
}, | |
'dataset_process_rule.rules.segmentation.max_tokens': { | |
label: t(`${fieldPrefix}.technicalParameters.segmentLength`), | |
render: value => formatNumber(value), | |
}, | |
'average_segment_length': { | |
label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`), | |
render: value => `${formatNumber(value)} characters`, | |
}, | |
'segment_count': { | |
label: t(`${fieldPrefix}.technicalParameters.paragraphs`), | |
render: value => `${formatNumber(value)} paragraphs`, | |
}, | |
'hit_count': { | |
label: t(`${fieldPrefix}.technicalParameters.hitCount`), | |
render: (value, total) => { | |
const v = value || 0 | |
return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})` | |
}, | |
}, | |
'indexing_latency': { | |
label: t(`${fieldPrefix}.technicalParameters.embeddingTime`), | |
render: value => formatTime(value), | |
}, | |
'tokens': { | |
label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`), | |
render: value => `${formatNumber(value)} tokens`, | |
}, | |
}, | |
}, | |
} | |
} | |
const langPrefix = 'datasetDocuments.metadata.languageMap.' | |
export const useLanguages = () => { | |
const { t } = useTranslation() | |
return { | |
zh: t(`${langPrefix}zh`), | |
en: t(`${langPrefix}en`), | |
es: t(`${langPrefix}es`), | |
fr: t(`${langPrefix}fr`), | |
de: t(`${langPrefix}de`), | |
ja: t(`${langPrefix}ja`), | |
ko: t(`${langPrefix}ko`), | |
ru: t(`${langPrefix}ru`), | |
ar: t(`${langPrefix}ar`), | |
pt: t(`${langPrefix}pt`), | |
it: t(`${langPrefix}it`), | |
nl: t(`${langPrefix}nl`), | |
pl: t(`${langPrefix}pl`), | |
sv: t(`${langPrefix}sv`), | |
tr: t(`${langPrefix}tr`), | |
he: t(`${langPrefix}he`), | |
hi: t(`${langPrefix}hi`), | |
da: t(`${langPrefix}da`), | |
fi: t(`${langPrefix}fi`), | |
no: t(`${langPrefix}no`), | |
hu: t(`${langPrefix}hu`), | |
el: t(`${langPrefix}el`), | |
cs: t(`${langPrefix}cs`), | |
th: t(`${langPrefix}th`), | |
id: t(`${langPrefix}id`), | |
ro: t(`${langPrefix}ro`), | |
} | |
} | |
const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.' | |
export const useBookCategories = () => { | |
const { t } = useTranslation() | |
return { | |
fiction: t(`${bookCategoryPrefix}fiction`), | |
biography: t(`${bookCategoryPrefix}biography`), | |
history: t(`${bookCategoryPrefix}history`), | |
science: t(`${bookCategoryPrefix}science`), | |
technology: t(`${bookCategoryPrefix}technology`), | |
education: t(`${bookCategoryPrefix}education`), | |
philosophy: t(`${bookCategoryPrefix}philosophy`), | |
religion: t(`${bookCategoryPrefix}religion`), | |
socialSciences: t(`${bookCategoryPrefix}socialSciences`), | |
art: t(`${bookCategoryPrefix}art`), | |
travel: t(`${bookCategoryPrefix}travel`), | |
health: t(`${bookCategoryPrefix}health`), | |
selfHelp: t(`${bookCategoryPrefix}selfHelp`), | |
businessEconomics: t(`${bookCategoryPrefix}businessEconomics`), | |
cooking: t(`${bookCategoryPrefix}cooking`), | |
childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`), | |
comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`), | |
poetry: t(`${bookCategoryPrefix}poetry`), | |
drama: t(`${bookCategoryPrefix}drama`), | |
other: t(`${bookCategoryPrefix}other`), | |
} | |
} | |
const personalDocCategoryPrefix | |
= 'datasetDocuments.metadata.categoryMap.personalDoc.' | |
export const usePersonalDocCategories = () => { | |
const { t } = useTranslation() | |
return { | |
notes: t(`${personalDocCategoryPrefix}notes`), | |
blogDraft: t(`${personalDocCategoryPrefix}blogDraft`), | |
diary: t(`${personalDocCategoryPrefix}diary`), | |
researchReport: t(`${personalDocCategoryPrefix}researchReport`), | |
bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`), | |
schedule: t(`${personalDocCategoryPrefix}schedule`), | |
list: t(`${personalDocCategoryPrefix}list`), | |
projectOverview: t(`${personalDocCategoryPrefix}projectOverview`), | |
photoCollection: t(`${personalDocCategoryPrefix}photoCollection`), | |
creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`), | |
codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`), | |
designDraft: t(`${personalDocCategoryPrefix}designDraft`), | |
personalResume: t(`${personalDocCategoryPrefix}personalResume`), | |
other: t(`${personalDocCategoryPrefix}other`), | |
} | |
} | |
const businessDocCategoryPrefix | |
= 'datasetDocuments.metadata.categoryMap.businessDoc.' | |
export const useBusinessDocCategories = () => { | |
const { t } = useTranslation() | |
return { | |
meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`), | |
researchReport: t(`${businessDocCategoryPrefix}researchReport`), | |
proposal: t(`${businessDocCategoryPrefix}proposal`), | |
employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`), | |
trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`), | |
requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`), | |
designDocument: t(`${businessDocCategoryPrefix}designDocument`), | |
productSpecification: t(`${businessDocCategoryPrefix}productSpecification`), | |
financialReport: t(`${businessDocCategoryPrefix}financialReport`), | |
marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`), | |
projectPlan: t(`${businessDocCategoryPrefix}projectPlan`), | |
teamStructure: t(`${businessDocCategoryPrefix}teamStructure`), | |
policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`), | |
contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`), | |
emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`), | |
other: t(`${businessDocCategoryPrefix}other`), | |
} | |
} | |