File size: 15,643 Bytes
a8b3f00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
'use client'
import { useTranslation } from 'react-i18next'
import { formatFileSize, formatNumber, formatTime } from '@/utils/format'
import type { DocType } from '@/models/datasets'
import useTimestamp from '@/hooks/use-timestamp'

export type inputType = 'input' | 'select' | 'textarea'
export type metadataType = DocType | 'originInfo' | 'technicalParameters'

type MetadataMap =
    Record<
    metadataType,
    {
      text: string
      allowEdit?: boolean
      icon?: React.ReactNode
      iconName?: string
      subFieldsMap: Record<
      string,
      {
        label: string
        inputType?: inputType
        field?: string
        render?: (value: any, total?: number) => React.ReactNode | string
      }
      >
    }
    >

const fieldPrefix = 'datasetDocuments.metadata.field'

export const useMetadataMap = (): MetadataMap => {
  const { t } = useTranslation()
  const { formatTime: formatTimestamp } = useTimestamp()

  return {
    book: {
      text: t('datasetDocuments.metadata.type.book'),
      iconName: 'bookOpen',
      subFieldsMap: {
        title: { label: t(`${fieldPrefix}.book.title`) },
        language: {
          label: t(`${fieldPrefix}.book.language`),
          inputType: 'select',
        },
        author: { label: t(`${fieldPrefix}.book.author`) },
        publisher: { label: t(`${fieldPrefix}.book.publisher`) },
        publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) },
        isbn: { label: t(`${fieldPrefix}.book.ISBN`) },
        category: {
          label: t(`${fieldPrefix}.book.category`),
          inputType: 'select',
        },
      },
    },
    web_page: {
      text: t('datasetDocuments.metadata.type.webPage'),
      iconName: 'globe',
      subFieldsMap: {
        'title': { label: t(`${fieldPrefix}.webPage.title`) },
        'url': { label: t(`${fieldPrefix}.webPage.url`) },
        'language': {
          label: t(`${fieldPrefix}.webPage.language`),
          inputType: 'select',
        },
        'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) },
        'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) },
        'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) },
        'description': { label: t(`${fieldPrefix}.webPage.description`) },
      },
    },
    paper: {
      text: t('datasetDocuments.metadata.type.paper'),
      iconName: 'graduationHat',
      subFieldsMap: {
        'title': { label: t(`${fieldPrefix}.paper.title`) },
        'language': {
          label: t(`${fieldPrefix}.paper.language`),
          inputType: 'select',
        },
        'author': { label: t(`${fieldPrefix}.paper.author`) },
        'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) },
        'journal/conference_name': {
          label: t(`${fieldPrefix}.paper.journalConferenceName`),
        },
        'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) },
        'doi': { label: t(`${fieldPrefix}.paper.DOI`) },
        'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) },
        'abstract': {
          label: t(`${fieldPrefix}.paper.abstract`),
          inputType: 'textarea',
        },
      },
    },
    social_media_post: {
      text: t('datasetDocuments.metadata.type.socialMediaPost'),
      iconName: 'atSign',
      subFieldsMap: {
        'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) },
        'author/username': {
          label: t(`${fieldPrefix}.socialMediaPost.authorUsername`),
        },
        'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) },
        'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) },
        'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) },
      },
    },
    personal_document: {
      text: t('datasetDocuments.metadata.type.personalDocument'),
      iconName: 'file',
      subFieldsMap: {
        'title': { label: t(`${fieldPrefix}.personalDocument.title`) },
        'author': { label: t(`${fieldPrefix}.personalDocument.author`) },
        'creation_date': {
          label: t(`${fieldPrefix}.personalDocument.creationDate`),
        },
        'last_modified_date': {
          label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`),
        },
        'document_type': {
          label: t(`${fieldPrefix}.personalDocument.documentType`),
          inputType: 'select',
        },
        'tags/category': {
          label: t(`${fieldPrefix}.personalDocument.tagsCategory`),
        },
      },
    },
    business_document: {
      text: t('datasetDocuments.metadata.type.businessDocument'),
      iconName: 'briefcase',
      subFieldsMap: {
        'title': { label: t(`${fieldPrefix}.businessDocument.title`) },
        'author': { label: t(`${fieldPrefix}.businessDocument.author`) },
        'creation_date': {
          label: t(`${fieldPrefix}.businessDocument.creationDate`),
        },
        'last_modified_date': {
          label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`),
        },
        'document_type': {
          label: t(`${fieldPrefix}.businessDocument.documentType`),
          inputType: 'select',
        },
        'department/team': {
          label: t(`${fieldPrefix}.businessDocument.departmentTeam`),
        },
      },
    },
    im_chat_log: {
      text: t('datasetDocuments.metadata.type.IMChat'),
      iconName: 'messageTextCircle',
      subFieldsMap: {
        'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) },
        'chat_participants/group_name': {
          label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`),
        },
        'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) },
        'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) },
        'participants': { label: t(`${fieldPrefix}.IMChat.participants`) },
        'topicsKeywords': {
          label: t(`${fieldPrefix}.IMChat.topicsKeywords`),
          inputType: 'textarea',
        },
        'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) },
      },
    },
    wikipedia_entry: {
      text: t('datasetDocuments.metadata.type.wikipediaEntry'),
      allowEdit: false,
      subFieldsMap: {
        'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) },
        'language': {
          label: t(`${fieldPrefix}.wikipediaEntry.language`),
          inputType: 'select',
        },
        'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) },
        'editor/contributor': {
          label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`),
        },
        'last_edit_date': {
          label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`),
        },
        'summary/introduction': {
          label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`),
          inputType: 'textarea',
        },
      },
    },
    synced_from_notion: {
      text: t('datasetDocuments.metadata.type.notion'),
      allowEdit: false,
      subFieldsMap: {
        'title': { label: t(`${fieldPrefix}.notion.title`) },
        'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' },
        'author/creator': { label: t(`${fieldPrefix}.notion.author`) },
        'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) },
        'last_modified_date': {
          label: t(`${fieldPrefix}.notion.lastModifiedTime`),
        },
        'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) },
        'category/tags': { label: t(`${fieldPrefix}.notion.tag`) },
        'description': { label: t(`${fieldPrefix}.notion.desc`) },
      },
    },
    synced_from_github: {
      text: t('datasetDocuments.metadata.type.github'),
      allowEdit: false,
      subFieldsMap: {
        'repository_name': { label: t(`${fieldPrefix}.github.repoName`) },
        'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) },
        'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) },
        'code_filename': { label: t(`${fieldPrefix}.github.fileName`) },
        'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) },
        'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) },
        'github_link': { label: t(`${fieldPrefix}.github.url`) },
        'open_source_license': { label: t(`${fieldPrefix}.github.license`) },
        'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) },
        'commit_author': {
          label: t(`${fieldPrefix}.github.lastCommitAuthor`),
        },
      },
    },
    originInfo: {
      text: '',
      allowEdit: false,
      subFieldsMap: {
        'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) },
        'data_source_info.upload_file.size': {
          label: t(`${fieldPrefix}.originInfo.originalFileSize`),
          render: value => formatFileSize(value),
        },
        'created_at': {
          label: t(`${fieldPrefix}.originInfo.uploadDate`),
          render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string),
        },
        'completed_at': {
          label: t(`${fieldPrefix}.originInfo.lastUpdateDate`),
          render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string),
        },
        'data_source_type': {
          label: t(`${fieldPrefix}.originInfo.source`),
          render: value => t(`datasetDocuments.metadata.source.${value}`),
        },
      },
    },
    technicalParameters: {
      text: t('datasetDocuments.metadata.type.technicalParameters'),
      allowEdit: false,
      subFieldsMap: {
        'dataset_process_rule.mode': {
          label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),
          render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string),
        },
        'dataset_process_rule.rules.segmentation.max_tokens': {
          label: t(`${fieldPrefix}.technicalParameters.segmentLength`),
          render: value => formatNumber(value),
        },
        'average_segment_length': {
          label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`),
          render: value => `${formatNumber(value)} characters`,
        },
        'segment_count': {
          label: t(`${fieldPrefix}.technicalParameters.paragraphs`),
          render: value => `${formatNumber(value)} paragraphs`,
        },
        'hit_count': {
          label: t(`${fieldPrefix}.technicalParameters.hitCount`),
          render: (value, total) => {
            const v = value || 0
            return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})`
          },
        },
        'indexing_latency': {
          label: t(`${fieldPrefix}.technicalParameters.embeddingTime`),
          render: value => formatTime(value),
        },
        'tokens': {
          label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`),
          render: value => `${formatNumber(value)} tokens`,
        },
      },
    },
  }
}

const langPrefix = 'datasetDocuments.metadata.languageMap.'

export const useLanguages = () => {
  const { t } = useTranslation()
  return {
    zh: t(`${langPrefix}zh`),
    en: t(`${langPrefix}en`),
    es: t(`${langPrefix}es`),
    fr: t(`${langPrefix}fr`),
    de: t(`${langPrefix}de`),
    ja: t(`${langPrefix}ja`),
    ko: t(`${langPrefix}ko`),
    ru: t(`${langPrefix}ru`),
    ar: t(`${langPrefix}ar`),
    pt: t(`${langPrefix}pt`),
    it: t(`${langPrefix}it`),
    nl: t(`${langPrefix}nl`),
    pl: t(`${langPrefix}pl`),
    sv: t(`${langPrefix}sv`),
    tr: t(`${langPrefix}tr`),
    he: t(`${langPrefix}he`),
    hi: t(`${langPrefix}hi`),
    da: t(`${langPrefix}da`),
    fi: t(`${langPrefix}fi`),
    no: t(`${langPrefix}no`),
    hu: t(`${langPrefix}hu`),
    el: t(`${langPrefix}el`),
    cs: t(`${langPrefix}cs`),
    th: t(`${langPrefix}th`),
    id: t(`${langPrefix}id`),
    ro: t(`${langPrefix}ro`),
  }
}

const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.'

export const useBookCategories = () => {
  const { t } = useTranslation()
  return {
    fiction: t(`${bookCategoryPrefix}fiction`),
    biography: t(`${bookCategoryPrefix}biography`),
    history: t(`${bookCategoryPrefix}history`),
    science: t(`${bookCategoryPrefix}science`),
    technology: t(`${bookCategoryPrefix}technology`),
    education: t(`${bookCategoryPrefix}education`),
    philosophy: t(`${bookCategoryPrefix}philosophy`),
    religion: t(`${bookCategoryPrefix}religion`),
    socialSciences: t(`${bookCategoryPrefix}socialSciences`),
    art: t(`${bookCategoryPrefix}art`),
    travel: t(`${bookCategoryPrefix}travel`),
    health: t(`${bookCategoryPrefix}health`),
    selfHelp: t(`${bookCategoryPrefix}selfHelp`),
    businessEconomics: t(`${bookCategoryPrefix}businessEconomics`),
    cooking: t(`${bookCategoryPrefix}cooking`),
    childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`),
    comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`),
    poetry: t(`${bookCategoryPrefix}poetry`),
    drama: t(`${bookCategoryPrefix}drama`),
    other: t(`${bookCategoryPrefix}other`),
  }
}

const personalDocCategoryPrefix
  = 'datasetDocuments.metadata.categoryMap.personalDoc.'

export const usePersonalDocCategories = () => {
  const { t } = useTranslation()
  return {
    notes: t(`${personalDocCategoryPrefix}notes`),
    blogDraft: t(`${personalDocCategoryPrefix}blogDraft`),
    diary: t(`${personalDocCategoryPrefix}diary`),
    researchReport: t(`${personalDocCategoryPrefix}researchReport`),
    bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`),
    schedule: t(`${personalDocCategoryPrefix}schedule`),
    list: t(`${personalDocCategoryPrefix}list`),
    projectOverview: t(`${personalDocCategoryPrefix}projectOverview`),
    photoCollection: t(`${personalDocCategoryPrefix}photoCollection`),
    creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`),
    codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`),
    designDraft: t(`${personalDocCategoryPrefix}designDraft`),
    personalResume: t(`${personalDocCategoryPrefix}personalResume`),
    other: t(`${personalDocCategoryPrefix}other`),
  }
}

const businessDocCategoryPrefix
  = 'datasetDocuments.metadata.categoryMap.businessDoc.'

export const useBusinessDocCategories = () => {
  const { t } = useTranslation()
  return {
    meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`),
    researchReport: t(`${businessDocCategoryPrefix}researchReport`),
    proposal: t(`${businessDocCategoryPrefix}proposal`),
    employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`),
    trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`),
    requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`),
    designDocument: t(`${businessDocCategoryPrefix}designDocument`),
    productSpecification: t(`${businessDocCategoryPrefix}productSpecification`),
    financialReport: t(`${businessDocCategoryPrefix}financialReport`),
    marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`),
    projectPlan: t(`${businessDocCategoryPrefix}projectPlan`),
    teamStructure: t(`${businessDocCategoryPrefix}teamStructure`),
    policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`),
    contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`),
    emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`),
    other: t(`${businessDocCategoryPrefix}other`),
  }
}