File size: 1,029 Bytes
f807e7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import json

from .pdf_processor import PDFProcessor
from .gpt_processor import Translator, EmbeddingGenerator, KeywordsGenerator, TopicsGenerator

processors = {
    'pdf': PDFProcessor,
}

class WorkFlowController():
    def __init__(self, file_path: str, file_name: str) -> None:
        # get file raw content
        self.file_name = file_name
        file_format = file_path.split('.')[-1]
        self.file_processor = processors[file_format]
        self.file_info = self.file_processor(file_path).file_info

    def process_file(self):
        # process file content
        # return processed data
        if not self.file_info['is_chinese']:
            translator = Translator()
            self.file_info[1]['file_content'] = translator.translate_to_chinese(self.file_info[1]['file_content'])

    # save file_info data to json file
    def dump_to_json(self) -> None:
        with open(f'{self.file_name}.json', 'w', encoding='utf-8') as f:
            json.dump(self.file_info, f, indent=4, ensure_ascii=False)