fkzhao commited on
Commit
614a413
·
1 Parent(s): b43a465

Supports obtaining PDF documents from web pages (#1107)

Browse files

### What problem does this PR solve?

Knowledge base management supports crawling information from web pages
and generating PDF documents

### Type of change
- [x] New Feature (Support document from web pages)

api/apps/document_app.py CHANGED
@@ -39,6 +39,7 @@ from api.settings import RetCode
39
  from api.utils.api_utils import get_json_result
40
  from rag.utils.minio_conn import MINIO
41
  from api.utils.file_utils import filename_type, thumbnail
 
42
 
43
 
44
  @manager.route('/upload', methods=['POST'])
@@ -289,7 +290,7 @@ def run():
289
  return get_data_error_result(retmsg="Tenant not found!")
290
  ELASTICSEARCH.deleteByQuery(
291
  Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
292
-
293
  if str(req["run"]) == TaskStatus.RUNNING.value:
294
  TaskService.filter_delete([Task.doc_id == id])
295
  e, doc = DocumentService.get_by_id(id)
@@ -416,3 +417,69 @@ def get_image(image_id):
416
  return response
417
  except Exception as e:
418
  return server_error_response(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  from api.utils.api_utils import get_json_result
40
  from rag.utils.minio_conn import MINIO
41
  from api.utils.file_utils import filename_type, thumbnail
42
+ from api.utils.web_utils import html2pdf, is_valid_url
43
 
44
 
45
  @manager.route('/upload', methods=['POST'])
 
290
  return get_data_error_result(retmsg="Tenant not found!")
291
  ELASTICSEARCH.deleteByQuery(
292
  Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
293
+
294
  if str(req["run"]) == TaskStatus.RUNNING.value:
295
  TaskService.filter_delete([Task.doc_id == id])
296
  e, doc = DocumentService.get_by_id(id)
 
417
  return response
418
  except Exception as e:
419
  return server_error_response(e)
420
+
421
+
422
+ @manager.route('/web_crawl', methods=['POST'])
423
+ @login_required
424
+ def web_crawl():
425
+ kb_id = request.form.get("kb_id")
426
+ if not kb_id:
427
+ return get_json_result(
428
+ data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR)
429
+ name = request.form.get("name")
430
+ url = request.form.get("url")
431
+ if not name:
432
+ return get_json_result(
433
+ data=False, retmsg='Lack of "name"', retcode=RetCode.ARGUMENT_ERROR)
434
+ if not url:
435
+ return get_json_result(
436
+ data=False, retmsg='Lack of "url"', retcode=RetCode.ARGUMENT_ERROR)
437
+ if not is_valid_url(url):
438
+ return get_json_result(
439
+ data=False, retmsg='The URL format is invalid', retcode=RetCode.ARGUMENT_ERROR)
440
+ e, kb = KnowledgebaseService.get_by_id(kb_id)
441
+ if not e:
442
+ raise LookupError("Can't find this knowledgebase!")
443
+
444
+ root_folder = FileService.get_root_folder(current_user.id)
445
+ pf_id = root_folder["id"]
446
+ FileService.init_knowledgebase_docs(pf_id, current_user.id)
447
+ kb_root_folder = FileService.get_kb_folder(current_user.id)
448
+ kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
449
+
450
+ try:
451
+ filename = duplicate_name(
452
+ DocumentService.query,
453
+ name=name+".pdf",
454
+ kb_id=kb.id)
455
+ filetype = filename_type(filename)
456
+ if filetype == FileType.OTHER.value:
457
+ raise RuntimeError("This type of file has not been supported yet!")
458
+
459
+ location = filename
460
+ while MINIO.obj_exist(kb_id, location):
461
+ location += "_"
462
+ blob = html2pdf(url)
463
+ MINIO.put(kb_id, location, blob)
464
+ doc = {
465
+ "id": get_uuid(),
466
+ "kb_id": kb.id,
467
+ "parser_id": kb.parser_id,
468
+ "parser_config": kb.parser_config,
469
+ "created_by": current_user.id,
470
+ "type": filetype,
471
+ "name": filename,
472
+ "location": location,
473
+ "size": len(blob),
474
+ "thumbnail": thumbnail(filename, blob)
475
+ }
476
+ if doc["type"] == FileType.VISUAL:
477
+ doc["parser_id"] = ParserType.PICTURE.value
478
+ if re.search(r"\.(ppt|pptx|pages)$", filename):
479
+ doc["parser_id"] = ParserType.PRESENTATION.value
480
+ DocumentService.insert(doc)
481
+ FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
482
+ except Exception as e:
483
+ return get_json_result(
484
+ data=False, retmsg=e, retcode=RetCode.SERVER_ERROR)
485
+ return get_json_result(data=True)
api/utils/web_utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import base64
4
+
5
+ from selenium import webdriver
6
+ from selenium.webdriver.chrome.options import Options
7
+ from selenium.webdriver.chrome.service import Service
8
+ from selenium.common.exceptions import TimeoutException
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support.expected_conditions import staleness_of
11
+ from webdriver_manager.chrome import ChromeDriverManager
12
+ from selenium.webdriver.common.by import By
13
+
14
+
15
+ def html2pdf(
16
+ source: str,
17
+ timeout: int = 2,
18
+ install_driver: bool = True,
19
+ print_options: dict = {},
20
+ ):
21
+ result = __get_pdf_from_html(source, timeout, install_driver, print_options)
22
+ return result
23
+
24
+
25
+ def __send_devtools(driver, cmd, params={}):
26
+ resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
27
+ url = driver.command_executor._url + resource
28
+ body = json.dumps({"cmd": cmd, "params": params})
29
+ response = driver.command_executor._request("POST", url, body)
30
+
31
+ if not response:
32
+ raise Exception(response.get("value"))
33
+
34
+ return response.get("value")
35
+
36
+
37
+ def __get_pdf_from_html(
38
+ path: str,
39
+ timeout: int,
40
+ install_driver: bool,
41
+ print_options: dict
42
+ ):
43
+ webdriver_options = Options()
44
+ webdriver_prefs = {}
45
+ webdriver_options.add_argument("--headless")
46
+ webdriver_options.add_argument("--disable-gpu")
47
+ webdriver_options.add_argument("--no-sandbox")
48
+ webdriver_options.add_argument("--disable-dev-shm-usage")
49
+ webdriver_options.experimental_options["prefs"] = webdriver_prefs
50
+
51
+ webdriver_prefs["profile.default_content_settings"] = {"images": 2}
52
+
53
+ if install_driver:
54
+ service = Service(ChromeDriverManager().install())
55
+ driver = webdriver.Chrome(service=service, options=webdriver_options)
56
+ else:
57
+ driver = webdriver.Chrome(options=webdriver_options)
58
+
59
+ driver.get(path)
60
+
61
+ try:
62
+ WebDriverWait(driver, timeout).until(
63
+ staleness_of(driver.find_element(by=By.TAG_NAME, value="html"))
64
+ )
65
+ except TimeoutException:
66
+ calculated_print_options = {
67
+ "landscape": False,
68
+ "displayHeaderFooter": False,
69
+ "printBackground": True,
70
+ "preferCSSPageSize": True,
71
+ }
72
+ calculated_print_options.update(print_options)
73
+ result = __send_devtools(
74
+ driver, "Page.printToPDF", calculated_print_options)
75
+ driver.quit()
76
+ return base64.b64decode(result["data"])
77
+
78
+
79
+ def is_valid_url(url: str) -> bool:
80
+ return bool(re.match(r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))
81
+
82
+
requirements.txt CHANGED
@@ -138,4 +138,6 @@ umap-learn
138
  fasttext==0.9.2
139
  volcengine==1.0.141
140
  readability-lxml==0.8.1
141
- html_text==0.6.2
 
 
 
138
  fasttext==0.9.2
139
  volcengine==1.0.141
140
  readability-lxml==0.8.1
141
+ html_text==0.6.2
142
+ selenium==4.21.0
143
+ webdriver-manager==4.0.1
web/src/hooks/documentHooks.ts CHANGED
@@ -1,13 +1,13 @@
1
- import { IChunk, IKnowledgeFile } from '@/interfaces/database/knowledge';
2
- import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
3
- import { api_host } from '@/utils/api';
4
- import { buildChunkHighlights } from '@/utils/documentUtils';
5
- import { UploadFile } from 'antd';
6
- import { useCallback, useMemo, useState } from 'react';
7
- import { IHighlight } from 'react-pdf-highlighter';
8
- import { useDispatch, useSelector } from 'umi';
9
- import { useGetKnowledgeSearchParams } from './routeHook';
10
- import { useOneNamespaceEffectsLoading } from './storeHooks';
11
 
12
  export const useGetDocumentUrl = (documentId?: string) => {
13
  const getDocumentUrl = useCallback(
@@ -207,6 +207,28 @@ export const useUploadDocument = () => {
207
  return uploadDocument;
208
  };
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  export const useRunDocument = () => {
211
  const dispatch = useDispatch();
212
 
 
1
+ import {IChunk, IKnowledgeFile} from '@/interfaces/database/knowledge';
2
+ import {IChangeParserConfigRequestBody} from '@/interfaces/request/document';
3
+ import {api_host} from '@/utils/api';
4
+ import {buildChunkHighlights} from '@/utils/documentUtils';
5
+ import {UploadFile} from 'antd';
6
+ import {useCallback, useMemo, useState} from 'react';
7
+ import {IHighlight} from 'react-pdf-highlighter';
8
+ import {useDispatch, useSelector} from 'umi';
9
+ import {useGetKnowledgeSearchParams} from './routeHook';
10
+ import {useOneNamespaceEffectsLoading} from './storeHooks';
11
 
12
  export const useGetDocumentUrl = (documentId?: string) => {
13
  const getDocumentUrl = useCallback(
 
207
  return uploadDocument;
208
  };
209
 
210
+ export const useWebCrawl = () => {
211
+ const dispatch = useDispatch();
212
+ const { knowledgeId } = useGetKnowledgeSearchParams();
213
+ return useCallback(
214
+ (name: string, url: string) => {
215
+ try {
216
+ return dispatch<any>({
217
+ type: 'kFModel/web_crawl',
218
+ payload: {
219
+ name,
220
+ url,
221
+ kb_id: knowledgeId,
222
+ },
223
+ });
224
+ } catch (errorInfo) {
225
+ console.log('Failed:', errorInfo);
226
+ }
227
+ },
228
+ [dispatch],
229
+ );
230
+ };
231
+
232
  export const useRunDocument = () => {
233
  const dispatch = useDispatch();
234
 
web/src/locales/en.ts CHANGED
@@ -81,6 +81,7 @@ export default {
81
  searchFiles: 'Search your files',
82
  localFiles: 'Local files',
83
  emptyFiles: 'Create empty file',
 
84
  chunkNumber: 'Chunk Number',
85
  uploadDate: 'Upload Date',
86
  chunkMethod: 'Chunk Method',
 
81
  searchFiles: 'Search your files',
82
  localFiles: 'Local files',
83
  emptyFiles: 'Create empty file',
84
+ webCrawl: 'Web Crawl',
85
  chunkNumber: 'Chunk Number',
86
  uploadDate: 'Upload Date',
87
  chunkMethod: 'Chunk Method',
web/src/locales/zh-traditional.ts CHANGED
@@ -80,6 +80,7 @@ export default {
80
  searchFiles: '搜索文件',
81
  localFiles: '本地文件',
82
  emptyFiles: '新建空文件',
 
83
  chunkNumber: '分塊數',
84
  uploadDate: '上傳日期',
85
  chunkMethod: '解析方法',
 
80
  searchFiles: '搜索文件',
81
  localFiles: '本地文件',
82
  emptyFiles: '新建空文件',
83
+ webCrawl: '網頁抓取',
84
  chunkNumber: '分塊數',
85
  uploadDate: '上傳日期',
86
  chunkMethod: '解析方法',
web/src/locales/zh.ts CHANGED
@@ -80,6 +80,7 @@ export default {
80
  searchFiles: '搜索文件',
81
  localFiles: '本地文件',
82
  emptyFiles: '新建空文件',
 
83
  chunkNumber: '分块数',
84
  uploadDate: '上传日期',
85
  chunkMethod: '解析方法',
 
80
  searchFiles: '搜索文件',
81
  localFiles: '本地文件',
82
  emptyFiles: '新建空文件',
83
+ webCrawl: '网页抓取',
84
  chunkNumber: '分块数',
85
  uploadDate: '上传日期',
86
  chunkMethod: '解析方法',
web/src/pages/add-knowledge/components/knowledge-file/document-toolbar.tsx CHANGED
@@ -29,13 +29,15 @@ import styles from './index.less';
29
  interface IProps {
30
  selectedRowKeys: string[];
31
  showCreateModal(): void;
 
32
  showDocumentUploadModal(): void;
33
  }
34
 
35
  const DocumentToolbar = ({
36
- selectedRowKeys,
37
- showCreateModal,
38
- showDocumentUploadModal,
 
39
  }: IProps) => {
40
  const { t } = useTranslate('knowledgeDetails');
41
  const { fetchDocumentList } = useFetchDocumentListOnMount();
@@ -66,6 +68,19 @@ const DocumentToolbar = ({
66
  { type: 'divider' },
67
  {
68
  key: '2',
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  onClick: showCreateModal,
70
  label: (
71
  <div>
@@ -77,7 +92,7 @@ const DocumentToolbar = ({
77
  ),
78
  },
79
  ];
80
- }, [showDocumentUploadModal, showCreateModal, t]);
81
 
82
  const handleDelete = useCallback(() => {
83
  showDeleteConfirm({
 
29
  interface IProps {
30
  selectedRowKeys: string[];
31
  showCreateModal(): void;
32
+ showWebCrawlModal(): void;
33
  showDocumentUploadModal(): void;
34
  }
35
 
36
  const DocumentToolbar = ({
37
+ selectedRowKeys,
38
+ showCreateModal,
39
+ showWebCrawlModal,
40
+ showDocumentUploadModal,
41
  }: IProps) => {
42
  const { t } = useTranslate('knowledgeDetails');
43
  const { fetchDocumentList } = useFetchDocumentListOnMount();
 
68
  { type: 'divider' },
69
  {
70
  key: '2',
71
+ onClick: showWebCrawlModal,
72
+ label: (
73
+ <div>
74
+ <Button type="link">
75
+ <FileTextOutlined />
76
+ {t('webCrawl')}
77
+ </Button>
78
+ </div>
79
+ ),
80
+ },
81
+ { type: 'divider' },
82
+ {
83
+ key: '3',
84
  onClick: showCreateModal,
85
  label: (
86
  <div>
 
92
  ),
93
  },
94
  ];
95
+ }, [showDocumentUploadModal, showWebCrawlModal, showCreateModal, t]);
96
 
97
  const handleDelete = useCallback(() => {
98
  showDeleteConfirm({
web/src/pages/add-knowledge/components/knowledge-file/hooks.ts CHANGED
@@ -7,6 +7,7 @@ import {
7
  useSelectRunDocumentLoading,
8
  useSetDocumentParser,
9
  useUploadDocument,
 
10
  } from '@/hooks/documentHooks';
11
  import { useGetKnowledgeSearchParams } from '@/hooks/routeHook';
12
  import { useOneNamespaceEffectsLoading } from '@/hooks/storeHooks';
@@ -286,6 +287,37 @@ export const useHandleUploadDocument = () => {
286
  };
287
  };
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  export const useHandleRunDocumentByIds = (id: string) => {
290
  const loading = useSelectRunDocumentLoading();
291
  const runDocumentByIds = useRunDocument();
 
7
  useSelectRunDocumentLoading,
8
  useSetDocumentParser,
9
  useUploadDocument,
10
+ useWebCrawl,
11
  } from '@/hooks/documentHooks';
12
  import { useGetKnowledgeSearchParams } from '@/hooks/routeHook';
13
  import { useOneNamespaceEffectsLoading } from '@/hooks/storeHooks';
 
287
  };
288
  };
289
 
290
+ export const useHandleWebCrawl = () => {
291
+ const {
292
+ visible: webCrawlUploadVisible,
293
+ hideModal: hideWebCrawlUploadModal,
294
+ showModal: showWebCrawlUploadModal,
295
+ } = useSetModalState();
296
+ const webCrawl = useWebCrawl();
297
+
298
+ const onWebCrawlUploadOk = useCallback(
299
+ async (name: string, url: string ) => {
300
+ const ret = await webCrawl(name, url);
301
+ if (ret === 0) {
302
+ hideWebCrawlUploadModal();
303
+ return 0
304
+ }
305
+ return -1
306
+ },
307
+ [webCrawl, hideWebCrawlUploadModal],
308
+ );
309
+
310
+ const loading = useOneNamespaceEffectsLoading('kFModel', ['web_crawl']);
311
+
312
+ return {
313
+ webCrawlUploadLoading: loading,
314
+ onWebCrawlUploadOk,
315
+ webCrawlUploadVisible,
316
+ hideWebCrawlUploadModal,
317
+ showWebCrawlUploadModal,
318
+ };
319
+ };
320
+
321
  export const useHandleRunDocumentByIds = (id: string) => {
322
  const loading = useSelectRunDocumentLoading();
323
  const runDocumentByIds = useRunDocument();
web/src/pages/add-knowledge/components/knowledge-file/index.tsx CHANGED
@@ -12,6 +12,7 @@ import { Divider, Flex, Switch, Table, Typography } from 'antd';
12
  import type { ColumnsType } from 'antd/es/table';
13
  import { useTranslation } from 'react-i18next';
14
  import CreateFileModal from './create-file-modal';
 
15
  import DocumentToolbar from './document-toolbar';
16
  import {
17
  useChangeDocumentParser,
@@ -19,7 +20,7 @@ import {
19
  useFetchDocumentListOnMount,
20
  useGetPagination,
21
  useGetRowSelection,
22
- useHandleUploadDocument,
23
  useNavigateToOtherPage,
24
  useRenameDocument,
25
  } from './hooks';
@@ -69,6 +70,13 @@ const KnowledgeFile = () => {
69
  onDocumentUploadOk,
70
  documentUploadLoading,
71
  } = useHandleUploadDocument();
 
 
 
 
 
 
 
72
  const { t } = useTranslation('translation', {
73
  keyPrefix: 'knowledgeDetails',
74
  });
@@ -170,6 +178,7 @@ const KnowledgeFile = () => {
170
  <DocumentToolbar
171
  selectedRowKeys={rowSelection.selectedRowKeys as string[]}
172
  showCreateModal={showCreateModal}
 
173
  showDocumentUploadModal={showDocumentUploadModal}
174
  ></DocumentToolbar>
175
  <Table
@@ -211,6 +220,12 @@ const KnowledgeFile = () => {
211
  loading={documentUploadLoading}
212
  onOk={onDocumentUploadOk}
213
  ></FileUploadModal>
 
 
 
 
 
 
214
  </div>
215
  );
216
  };
 
12
  import type { ColumnsType } from 'antd/es/table';
13
  import { useTranslation } from 'react-i18next';
14
  import CreateFileModal from './create-file-modal';
15
+ import WebCrawlModal from './web-crawl-modal';
16
  import DocumentToolbar from './document-toolbar';
17
  import {
18
  useChangeDocumentParser,
 
20
  useFetchDocumentListOnMount,
21
  useGetPagination,
22
  useGetRowSelection,
23
+ useHandleUploadDocument, useHandleWebCrawl,
24
  useNavigateToOtherPage,
25
  useRenameDocument,
26
  } from './hooks';
 
70
  onDocumentUploadOk,
71
  documentUploadLoading,
72
  } = useHandleUploadDocument();
73
+ const {
74
+ webCrawlUploadVisible,
75
+ hideWebCrawlUploadModal,
76
+ showWebCrawlUploadModal,
77
+ onWebCrawlUploadOk,
78
+ webCrawlUploadLoading,
79
+ } = useHandleWebCrawl();
80
  const { t } = useTranslation('translation', {
81
  keyPrefix: 'knowledgeDetails',
82
  });
 
178
  <DocumentToolbar
179
  selectedRowKeys={rowSelection.selectedRowKeys as string[]}
180
  showCreateModal={showCreateModal}
181
+ showWebCrawlModal={showWebCrawlUploadModal}
182
  showDocumentUploadModal={showDocumentUploadModal}
183
  ></DocumentToolbar>
184
  <Table
 
220
  loading={documentUploadLoading}
221
  onOk={onDocumentUploadOk}
222
  ></FileUploadModal>
223
+ <WebCrawlModal
224
+ visible={webCrawlUploadVisible}
225
+ hideModal={hideWebCrawlUploadModal}
226
+ loading={webCrawlUploadLoading}
227
+ onOk={onWebCrawlUploadOk}
228
+ ></WebCrawlModal>
229
  </div>
230
  );
231
  };
web/src/pages/add-knowledge/components/knowledge-file/model.ts CHANGED
@@ -232,6 +232,27 @@ const model: DvaModel<KFModelState> = {
232
  }
233
  return data;
234
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  },
236
  subscriptions: {
237
  setup({ dispatch, history }) {
 
232
  }
233
  return data;
234
  },
235
+ *web_crawl({ payload = {} }, { call, put }) {
236
+ const formData = new FormData();
237
+ formData.append('name', payload.name);
238
+ formData.append('url', payload.url);
239
+ formData.append('kb_id', payload.kb_id);
240
+
241
+ const { data } = yield call(kbService.web_crawl, formData);
242
+
243
+ const succeed = data.retcode === 0;
244
+
245
+ if (succeed) {
246
+ message.success(i18n.t('message.uploaded'));
247
+ }
248
+ if (succeed || data.retcode === 500) {
249
+ yield put({
250
+ type: 'getKfList',
251
+ payload: { kb_id: payload.kb_id },
252
+ });
253
+ }
254
+ return data.retcode;
255
+ },
256
  },
257
  subscriptions: {
258
  setup({ dispatch, history }) {
web/src/pages/add-knowledge/components/knowledge-file/web-crawl-modal.tsx ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { IModalManagerChildrenProps } from '@/components/modal-manager';
2
+ import { Form, Input, Modal } from 'antd';
3
+ import React from 'react';
4
+ import {useTranslate} from "@/hooks/commonHooks";
5
+
6
+
7
+ interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
8
+ loading: boolean;
9
+ onOk: (name: string, url: string) => void;
10
+ showModal?(): void;
11
+ }
12
+
13
+ const WebCrawlModal: React.FC<IProps> = ({ visible, hideModal, onOk }) => {
14
+ const [form] = Form.useForm();
15
+ const { t } = useTranslate('knowledgeDetails');
16
+ const handleOk = async () => {
17
+ const values = await form.validateFields();
18
+ onOk(values.name, values.url);
19
+ };
20
+
21
+ return (
22
+ <Modal
23
+ title={t('webCrawl')}
24
+ open={visible}
25
+ onOk={handleOk}
26
+ onCancel={hideModal}
27
+ >
28
+ <Form
29
+ form={form}
30
+ name="validateOnly"
31
+ labelCol={{ span: 4 }}
32
+ wrapperCol={{ span: 20 }}
33
+ style={{ maxWidth: 600 }}
34
+ autoComplete="off"
35
+ >
36
+ <Form.Item
37
+ label="Name"
38
+ name="name"
39
+ rules={[{ required: true, message: 'Please input name!' },{ max: 10, message: 'The maximum length of name is 128 characters' }]}
40
+ >
41
+ <Input placeholder="Document name" />
42
+ </Form.Item>
43
+ <Form.Item
44
+ label="URL"
45
+ name="url"
46
+ rules={[{ required: true, message: 'Please input url!' },{pattern: new RegExp('(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'), message: 'Please enter a valid URL!'}]}
47
+ >
48
+ <Input placeholder="https://www.baidu.com" />
49
+ </Form.Item>
50
+ </Form>
51
+ </Modal>
52
+ );
53
+ };
54
+ export default WebCrawlModal;
web/src/services/kbService.ts CHANGED
@@ -26,6 +26,7 @@ const {
26
  document_run,
27
  get_document_file,
28
  document_upload,
 
29
  } = api;
30
 
31
  const methods = {
@@ -87,6 +88,10 @@ const methods = {
87
  url: document_upload,
88
  method: 'post',
89
  },
 
 
 
 
90
  // chunk管理
91
  chunk_list: {
92
  url: chunk_list,
 
26
  document_run,
27
  get_document_file,
28
  document_upload,
29
+ web_crawl,
30
  } = api;
31
 
32
  const methods = {
 
88
  url: document_upload,
89
  method: 'post',
90
  },
91
+ web_crawl: {
92
+ url: web_crawl,
93
+ method: 'post',
94
+ },
95
  // chunk管理
96
  chunk_list: {
97
  url: chunk_list,
web/src/utils/api.ts CHANGED
@@ -48,6 +48,7 @@ export default {
48
  document_thumbnails: `${api_host}/document/thumbnails`,
49
  get_document_file: `${api_host}/document/get`,
50
  document_upload: `${api_host}/document/upload`,
 
51
 
52
  // chat
53
  setDialog: `${api_host}/dialog/set`,
 
48
  document_thumbnails: `${api_host}/document/thumbnails`,
49
  get_document_file: `${api_host}/document/get`,
50
  document_upload: `${api_host}/document/upload`,
51
+ web_crawl: `${api_host}/document/web_crawl`,
52
 
53
  // chat
54
  setDialog: `${api_host}/dialog/set`,