Kevin Hu commited on
Commit
3ec35b2
·
1 Parent(s): e195b4d

force eml file to be parsed by EMAIL (#2615)

Browse files

### What problem does this PR solve?
#2613
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

api/apps/dataset_api.py CHANGED
@@ -381,6 +381,8 @@ def upload_documents(dataset_id):
381
  doc["parser_id"] = ParserType.AUDIO.value
382
  if re.search(r"\.(ppt|pptx|pages)$", filename):
383
  doc["parser_id"] = ParserType.PRESENTATION.value
 
 
384
  DocumentService.insert(doc)
385
 
386
  FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
 
381
  doc["parser_id"] = ParserType.AUDIO.value
382
  if re.search(r"\.(ppt|pptx|pages)$", filename):
383
  doc["parser_id"] = ParserType.PRESENTATION.value
384
+ if re.search(r"\.(eml)$", filename):
385
+ doc["parser_id"] = ParserType.EMAIL.value
386
  DocumentService.insert(doc)
387
 
388
  FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
api/apps/document_app.py CHANGED
@@ -139,6 +139,8 @@ def web_crawl():
139
  doc["parser_id"] = ParserType.AUDIO.value
140
  if re.search(r"\.(ppt|pptx|pages)$", filename):
141
  doc["parser_id"] = ParserType.PRESENTATION.value
 
 
142
  DocumentService.insert(doc)
143
  FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
144
  except Exception as e:
 
139
  doc["parser_id"] = ParserType.AUDIO.value
140
  if re.search(r"\.(ppt|pptx|pages)$", filename):
141
  doc["parser_id"] = ParserType.PRESENTATION.value
142
+ if re.search(r"\.(eml)$", filename):
143
+ doc["parser_id"] = ParserType.EMAIL.value
144
  DocumentService.insert(doc)
145
  FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
146
  except Exception as e: