zhichyu commited on
Commit
0404a52
·
1 Parent(s): fa89227

Fix errors detected by Ruff (#3918)

Browse files

### What problem does this PR solve?

Fix errors detected by Ruff

### Type of change

- [x] Refactoring

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. agent/canvas.py +23 -13
  2. agent/component/__init__.py +70 -0
  3. agent/component/base.py +10 -5
  4. agent/component/categorize.py +8 -5
  5. agent/component/deepl.py +0 -1
  6. agent/component/exesql.py +4 -2
  7. agent/component/generate.py +24 -12
  8. agent/component/rewrite.py +2 -1
  9. agent/component/switch.py +10 -7
  10. agent/component/template.py +2 -1
  11. agent/test/client.py +2 -1
  12. api/apps/api_app.py +8 -5
  13. api/apps/canvas_app.py +4 -4
  14. api/apps/chunk_app.py +2 -1
  15. api/apps/conversation_app.py +4 -2
  16. api/apps/dialog_app.py +4 -2
  17. api/apps/document_app.py +4 -3
  18. api/apps/llm_app.py +4 -2
  19. api/apps/sdk/agent.py +1 -1
  20. api/apps/sdk/doc.py +2 -3
  21. api/apps/sdk/session.py +8 -4
  22. api/apps/user_app.py +1 -1
  23. api/db/db_models.py +1 -1
  24. api/db/init_data.py +1 -1
  25. api/db/services/__init__.py +5 -4
  26. api/db/services/api_service.py +2 -1
  27. api/db/services/canvas_service.py +1 -3
  28. api/db/services/common_service.py +1 -1
  29. api/db/services/dialog_service.py +24 -14
  30. api/db/services/document_service.py +2 -1
  31. api/db/services/file2document_service.py +2 -2
  32. api/db/services/file_service.py +4 -2
  33. api/db/services/llm_service.py +2 -1
  34. api/db/services/task_service.py +66 -34
  35. api/db/services/user_service.py +1 -1
  36. api/ragflow_server.py +3 -3
  37. api/utils/api_utils.py +0 -1
  38. api/validation.py +1 -1
  39. deepdoc/parser/__init__.py +13 -1
  40. deepdoc/parser/excel_parser.py +12 -8
  41. deepdoc/parser/html_parser.py +1 -1
  42. deepdoc/parser/json_parser.py +1 -1
  43. deepdoc/parser/pdf_parser.py +9 -8
  44. deepdoc/parser/resume/__init__.py +60 -19
  45. deepdoc/parser/resume/entities/corporations.py +47 -21
  46. deepdoc/parser/resume/entities/degrees.py +20 -16
  47. deepdoc/parser/resume/entities/industries.py +684 -679
  48. deepdoc/parser/resume/entities/regions.py +758 -748
  49. deepdoc/parser/resume/entities/schools.py +28 -17
  50. deepdoc/parser/resume/step_two.py +202 -106
agent/canvas.py CHANGED
@@ -133,7 +133,8 @@ class Canvas(ABC):
133
  "components": {}
134
  }
135
  for k in self.dsl.keys():
136
- if k in ["components"]:continue
 
137
  dsl[k] = deepcopy(self.dsl[k])
138
 
139
  for k, cpn in self.components.items():
@@ -158,7 +159,8 @@ class Canvas(ABC):
158
 
159
  def get_compnent_name(self, cid):
160
  for n in self.dsl["graph"]["nodes"]:
161
- if cid == n["id"]: return n["data"]["name"]
 
162
  return ""
163
 
164
  def run(self, **kwargs):
@@ -173,7 +175,8 @@ class Canvas(ABC):
173
  if kwargs.get("stream"):
174
  for an in ans():
175
  yield an
176
- else: yield ans
 
177
  return
178
 
179
  if not self.path:
@@ -188,7 +191,8 @@ class Canvas(ABC):
188
  def prepare2run(cpns):
189
  nonlocal ran, ans
190
  for c in cpns:
191
- if self.path[-1] and c == self.path[-1][-1]: continue
 
192
  cpn = self.components[c]["obj"]
193
  if cpn.component_name == "Answer":
194
  self.answer.append(c)
@@ -197,7 +201,8 @@ class Canvas(ABC):
197
  if c not in without_dependent_checking:
198
  cpids = cpn.get_dependent_components()
199
  if any([cc not in self.path[-1] for cc in cpids]):
200
- if c not in waiting: waiting.append(c)
 
201
  continue
202
  yield "*'{}'* is running...🕞".format(self.get_compnent_name(c))
203
  ans = cpn.run(self.history, **kwargs)
@@ -211,10 +216,12 @@ class Canvas(ABC):
211
  logging.debug(f"Canvas.run: {ran} {self.path}")
212
  cpn_id = self.path[-1][ran]
213
  cpn = self.get_component(cpn_id)
214
- if not cpn["downstream"]: break
 
215
 
216
  loop = self._find_loop()
217
- if loop: raise OverflowError(f"Too much loops: {loop}")
 
218
 
219
  if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]:
220
  switch_out = cpn["obj"].output()[1].iloc[0, 0]
@@ -283,19 +290,22 @@ class Canvas(ABC):
283
 
284
  def _find_loop(self, max_loops=6):
285
  path = self.path[-1][::-1]
286
- if len(path) < 2: return False
 
287
 
288
  for i in range(len(path)):
289
  if path[i].lower().find("answer") >= 0:
290
  path = path[:i]
291
  break
292
 
293
- if len(path) < 2: return False
 
294
 
295
- for l in range(2, len(path) // 2):
296
- pat = ",".join(path[0:l])
297
  path_str = ",".join(path)
298
- if len(pat) >= len(path_str): return False
 
299
  loop = max_loops
300
  while path_str.find(pat) == 0 and loop >= 0:
301
  loop -= 1
@@ -303,7 +313,7 @@ class Canvas(ABC):
303
  return False
304
  path_str = path_str[len(pat)+1:]
305
  if loop < 0:
306
- pat = " => ".join([p.split(":")[0] for p in path[0:l]])
307
  return pat + " => " + pat
308
 
309
  return False
 
133
  "components": {}
134
  }
135
  for k in self.dsl.keys():
136
+ if k in ["components"]:
137
+ continue
138
  dsl[k] = deepcopy(self.dsl[k])
139
 
140
  for k, cpn in self.components.items():
 
159
 
160
  def get_compnent_name(self, cid):
161
  for n in self.dsl["graph"]["nodes"]:
162
+ if cid == n["id"]:
163
+ return n["data"]["name"]
164
  return ""
165
 
166
  def run(self, **kwargs):
 
175
  if kwargs.get("stream"):
176
  for an in ans():
177
  yield an
178
+ else:
179
+ yield ans
180
  return
181
 
182
  if not self.path:
 
191
  def prepare2run(cpns):
192
  nonlocal ran, ans
193
  for c in cpns:
194
+ if self.path[-1] and c == self.path[-1][-1]:
195
+ continue
196
  cpn = self.components[c]["obj"]
197
  if cpn.component_name == "Answer":
198
  self.answer.append(c)
 
201
  if c not in without_dependent_checking:
202
  cpids = cpn.get_dependent_components()
203
  if any([cc not in self.path[-1] for cc in cpids]):
204
+ if c not in waiting:
205
+ waiting.append(c)
206
  continue
207
  yield "*'{}'* is running...🕞".format(self.get_compnent_name(c))
208
  ans = cpn.run(self.history, **kwargs)
 
216
  logging.debug(f"Canvas.run: {ran} {self.path}")
217
  cpn_id = self.path[-1][ran]
218
  cpn = self.get_component(cpn_id)
219
+ if not cpn["downstream"]:
220
+ break
221
 
222
  loop = self._find_loop()
223
+ if loop:
224
+ raise OverflowError(f"Too much loops: {loop}")
225
 
226
  if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]:
227
  switch_out = cpn["obj"].output()[1].iloc[0, 0]
 
290
 
291
  def _find_loop(self, max_loops=6):
292
  path = self.path[-1][::-1]
293
+ if len(path) < 2:
294
+ return False
295
 
296
  for i in range(len(path)):
297
  if path[i].lower().find("answer") >= 0:
298
  path = path[:i]
299
  break
300
 
301
+ if len(path) < 2:
302
+ return False
303
 
304
+ for loc in range(2, len(path) // 2):
305
+ pat = ",".join(path[0:loc])
306
  path_str = ",".join(path)
307
+ if len(pat) >= len(path_str):
308
+ return False
309
  loop = max_loops
310
  while path_str.find(pat) == 0 and loop >= 0:
311
  loop -= 1
 
313
  return False
314
  path_str = path_str[len(pat)+1:]
315
  if loop < 0:
316
+ pat = " => ".join([p.split(":")[0] for p in path[0:loc]])
317
  return pat + " => " + pat
318
 
319
  return False
agent/component/__init__.py CHANGED
@@ -39,3 +39,73 @@ def component_class(class_name):
39
  m = importlib.import_module("agent.component")
40
  c = getattr(m, class_name)
41
  return c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  m = importlib.import_module("agent.component")
40
  c = getattr(m, class_name)
41
  return c
42
+
43
+ __all__ = [
44
+ "Begin",
45
+ "BeginParam",
46
+ "Generate",
47
+ "GenerateParam",
48
+ "Retrieval",
49
+ "RetrievalParam",
50
+ "Answer",
51
+ "AnswerParam",
52
+ "Categorize",
53
+ "CategorizeParam",
54
+ "Switch",
55
+ "SwitchParam",
56
+ "Relevant",
57
+ "RelevantParam",
58
+ "Message",
59
+ "MessageParam",
60
+ "RewriteQuestion",
61
+ "RewriteQuestionParam",
62
+ "KeywordExtract",
63
+ "KeywordExtractParam",
64
+ "Concentrator",
65
+ "ConcentratorParam",
66
+ "Baidu",
67
+ "BaiduParam",
68
+ "DuckDuckGo",
69
+ "DuckDuckGoParam",
70
+ "Wikipedia",
71
+ "WikipediaParam",
72
+ "PubMed",
73
+ "PubMedParam",
74
+ "ArXiv",
75
+ "ArXivParam",
76
+ "Google",
77
+ "GoogleParam",
78
+ "Bing",
79
+ "BingParam",
80
+ "GoogleScholar",
81
+ "GoogleScholarParam",
82
+ "DeepL",
83
+ "DeepLParam",
84
+ "GitHub",
85
+ "GitHubParam",
86
+ "BaiduFanyi",
87
+ "BaiduFanyiParam",
88
+ "QWeather",
89
+ "QWeatherParam",
90
+ "ExeSQL",
91
+ "ExeSQLParam",
92
+ "YahooFinance",
93
+ "YahooFinanceParam",
94
+ "WenCai",
95
+ "WenCaiParam",
96
+ "Jin10",
97
+ "Jin10Param",
98
+ "TuShare",
99
+ "TuShareParam",
100
+ "AkShare",
101
+ "AkShareParam",
102
+ "Crawler",
103
+ "CrawlerParam",
104
+ "Invoke",
105
+ "InvokeParam",
106
+ "Template",
107
+ "TemplateParam",
108
+ "Email",
109
+ "EmailParam",
110
+ "component_class"
111
+ ]
agent/component/base.py CHANGED
@@ -428,7 +428,8 @@ class ComponentBase(ABC):
428
  def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
429
  o = getattr(self._param, self._param.output_var_name)
430
  if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
431
- if not isinstance(o, list): o = [o]
 
432
  o = pd.DataFrame(o)
433
 
434
  if allow_partial or not isinstance(o, partial):
@@ -440,7 +441,8 @@ class ComponentBase(ABC):
440
  for oo in o():
441
  if not isinstance(oo, pd.DataFrame):
442
  outs = pd.DataFrame(oo if isinstance(oo, list) else [oo])
443
- else: outs = oo
 
444
  return self._param.output_var_name, outs
445
 
446
  def reset(self):
@@ -482,13 +484,15 @@ class ComponentBase(ABC):
482
  outs.append(pd.DataFrame([{"content": q["value"]}]))
483
  if outs:
484
  df = pd.concat(outs, ignore_index=True)
485
- if "content" in df: df = df.drop_duplicates(subset=['content']).reset_index(drop=True)
 
486
  return df
487
 
488
  upstream_outs = []
489
 
490
  for u in reversed_cpnts[::-1]:
491
- if self.get_component_name(u) in ["switch", "concentrator"]: continue
 
492
  if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
493
  o = self._canvas.get_component(u)["obj"].output(allow_partial=False)[1]
494
  if o is not None:
@@ -532,7 +536,8 @@ class ComponentBase(ABC):
532
  reversed_cpnts.extend(self._canvas.path[-1])
533
 
534
  for u in reversed_cpnts[::-1]:
535
- if self.get_component_name(u) in ["switch", "answer"]: continue
 
536
  return self._canvas.get_component(u)["obj"].output()[1]
537
 
538
  @staticmethod
 
428
  def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
429
  o = getattr(self._param, self._param.output_var_name)
430
  if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
431
+ if not isinstance(o, list):
432
+ o = [o]
433
  o = pd.DataFrame(o)
434
 
435
  if allow_partial or not isinstance(o, partial):
 
441
  for oo in o():
442
  if not isinstance(oo, pd.DataFrame):
443
  outs = pd.DataFrame(oo if isinstance(oo, list) else [oo])
444
+ else:
445
+ outs = oo
446
  return self._param.output_var_name, outs
447
 
448
  def reset(self):
 
484
  outs.append(pd.DataFrame([{"content": q["value"]}]))
485
  if outs:
486
  df = pd.concat(outs, ignore_index=True)
487
+ if "content" in df:
488
+ df = df.drop_duplicates(subset=['content']).reset_index(drop=True)
489
  return df
490
 
491
  upstream_outs = []
492
 
493
  for u in reversed_cpnts[::-1]:
494
+ if self.get_component_name(u) in ["switch", "concentrator"]:
495
+ continue
496
  if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
497
  o = self._canvas.get_component(u)["obj"].output(allow_partial=False)[1]
498
  if o is not None:
 
536
  reversed_cpnts.extend(self._canvas.path[-1])
537
 
538
  for u in reversed_cpnts[::-1]:
539
+ if self.get_component_name(u) in ["switch", "answer"]:
540
+ continue
541
  return self._canvas.get_component(u)["obj"].output()[1]
542
 
543
  @staticmethod
agent/component/categorize.py CHANGED
@@ -34,15 +34,18 @@ class CategorizeParam(GenerateParam):
34
  super().check()
35
  self.check_empty(self.category_description, "[Categorize] Category examples")
36
  for k, v in self.category_description.items():
37
- if not k: raise ValueError("[Categorize] Category name can not be empty!")
38
- if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
 
 
39
 
40
  def get_prompt(self):
41
  cate_lines = []
42
  for c, desc in self.category_description.items():
43
- for l in desc.get("examples", "").split("\n"):
44
- if not l: continue
45
- cate_lines.append("Question: {}\tCategory: {}".format(l, c))
 
46
  descriptions = []
47
  for c, desc in self.category_description.items():
48
  if desc.get("description"):
 
34
  super().check()
35
  self.check_empty(self.category_description, "[Categorize] Category examples")
36
  for k, v in self.category_description.items():
37
+ if not k:
38
+ raise ValueError("[Categorize] Category name can not be empty!")
39
+ if not v.get("to"):
40
+ raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
41
 
42
  def get_prompt(self):
43
  cate_lines = []
44
  for c, desc in self.category_description.items():
45
+ for line in desc.get("examples", "").split("\n"):
46
+ if not line:
47
+ continue
48
+ cate_lines.append("Question: {}\tCategory: {}".format(line, c))
49
  descriptions = []
50
  for c, desc in self.category_description.items():
51
  if desc.get("description"):
agent/component/deepl.py CHANGED
@@ -14,7 +14,6 @@
14
  # limitations under the License.
15
  #
16
  from abc import ABC
17
- import re
18
  from agent.component.base import ComponentBase, ComponentParamBase
19
  import deepl
20
 
 
14
  # limitations under the License.
15
  #
16
  from abc import ABC
 
17
  from agent.component.base import ComponentBase, ComponentParamBase
18
  import deepl
19
 
agent/component/exesql.py CHANGED
@@ -46,8 +46,10 @@ class ExeSQLParam(ComponentParamBase):
46
  self.check_empty(self.password, "Database password")
47
  self.check_positive_integer(self.top_n, "Number of records")
48
  if self.database == "rag_flow":
49
- if self.host == "ragflow-mysql": raise ValueError("The host is not accessible.")
50
- if self.password == "infini_rag_flow": raise ValueError("The host is not accessible.")
 
 
51
 
52
 
53
  class ExeSQL(ComponentBase, ABC):
 
46
  self.check_empty(self.password, "Database password")
47
  self.check_positive_integer(self.top_n, "Number of records")
48
  if self.database == "rag_flow":
49
+ if self.host == "ragflow-mysql":
50
+ raise ValueError("The host is not accessible.")
51
+ if self.password == "infini_rag_flow":
52
+ raise ValueError("The host is not accessible.")
53
 
54
 
55
  class ExeSQL(ComponentBase, ABC):
agent/component/generate.py CHANGED
@@ -51,11 +51,16 @@ class GenerateParam(ComponentParamBase):
51
 
52
  def gen_conf(self):
53
  conf = {}
54
- if self.max_tokens > 0: conf["max_tokens"] = self.max_tokens
55
- if self.temperature > 0: conf["temperature"] = self.temperature
56
- if self.top_p > 0: conf["top_p"] = self.top_p
57
- if self.presence_penalty > 0: conf["presence_penalty"] = self.presence_penalty
58
- if self.frequency_penalty > 0: conf["frequency_penalty"] = self.frequency_penalty
 
 
 
 
 
59
  return conf
60
 
61
 
@@ -83,7 +88,8 @@ class Generate(ComponentBase):
83
  recall_docs = []
84
  for i in idx:
85
  did = retrieval_res.loc[int(i), "doc_id"]
86
- if did in doc_ids: continue
 
87
  doc_ids.add(did)
88
  recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]})
89
 
@@ -108,7 +114,8 @@ class Generate(ComponentBase):
108
  retrieval_res = []
109
  self._param.inputs = []
110
  for para in self._param.parameters:
111
- if not para.get("component_id"): continue
 
112
  component_id = para["component_id"].split("@")[0]
113
  if para["component_id"].lower().find("@") >= 0:
114
  cpn_id, key = para["component_id"].split("@")
@@ -142,7 +149,8 @@ class Generate(ComponentBase):
142
 
143
  if retrieval_res:
144
  retrieval_res = pd.concat(retrieval_res, ignore_index=True)
145
- else: retrieval_res = pd.DataFrame([])
 
146
 
147
  for n, v in kwargs.items():
148
  prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
@@ -164,9 +172,11 @@ class Generate(ComponentBase):
164
  return pd.DataFrame([res])
165
 
166
  msg = self._canvas.get_history(self._param.message_history_window_size)
167
- if len(msg) < 1: msg.append({"role": "user", "content": ""})
 
168
  _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
169
- if len(msg) < 2: msg.append({"role": "user", "content": ""})
 
170
  ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
171
 
172
  if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
@@ -185,9 +195,11 @@ class Generate(ComponentBase):
185
  return
186
 
187
  msg = self._canvas.get_history(self._param.message_history_window_size)
188
- if len(msg) < 1: msg.append({"role": "user", "content": ""})
 
189
  _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
190
- if len(msg) < 2: msg.append({"role": "user", "content": ""})
 
191
  answer = ""
192
  for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()):
193
  res = {"content": ans, "reference": []}
 
51
 
52
  def gen_conf(self):
53
  conf = {}
54
+ if self.max_tokens > 0:
55
+ conf["max_tokens"] = self.max_tokens
56
+ if self.temperature > 0:
57
+ conf["temperature"] = self.temperature
58
+ if self.top_p > 0:
59
+ conf["top_p"] = self.top_p
60
+ if self.presence_penalty > 0:
61
+ conf["presence_penalty"] = self.presence_penalty
62
+ if self.frequency_penalty > 0:
63
+ conf["frequency_penalty"] = self.frequency_penalty
64
  return conf
65
 
66
 
 
88
  recall_docs = []
89
  for i in idx:
90
  did = retrieval_res.loc[int(i), "doc_id"]
91
+ if did in doc_ids:
92
+ continue
93
  doc_ids.add(did)
94
  recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]})
95
 
 
114
  retrieval_res = []
115
  self._param.inputs = []
116
  for para in self._param.parameters:
117
+ if not para.get("component_id"):
118
+ continue
119
  component_id = para["component_id"].split("@")[0]
120
  if para["component_id"].lower().find("@") >= 0:
121
  cpn_id, key = para["component_id"].split("@")
 
149
 
150
  if retrieval_res:
151
  retrieval_res = pd.concat(retrieval_res, ignore_index=True)
152
+ else:
153
+ retrieval_res = pd.DataFrame([])
154
 
155
  for n, v in kwargs.items():
156
  prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
 
172
  return pd.DataFrame([res])
173
 
174
  msg = self._canvas.get_history(self._param.message_history_window_size)
175
+ if len(msg) < 1:
176
+ msg.append({"role": "user", "content": ""})
177
  _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
178
+ if len(msg) < 2:
179
+ msg.append({"role": "user", "content": ""})
180
  ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
181
 
182
  if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
 
195
  return
196
 
197
  msg = self._canvas.get_history(self._param.message_history_window_size)
198
+ if len(msg) < 1:
199
+ msg.append({"role": "user", "content": ""})
200
  _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
201
+ if len(msg) < 2:
202
+ msg.append({"role": "user", "content": ""})
203
  answer = ""
204
  for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()):
205
  res = {"content": ans, "reference": []}
agent/component/rewrite.py CHANGED
@@ -95,7 +95,8 @@ class RewriteQuestion(Generate, ABC):
95
  hist = self._canvas.get_history(4)
96
  conv = []
97
  for m in hist:
98
- if m["role"] not in ["user", "assistant"]: continue
 
99
  conv.append("{}: {}".format(m["role"].upper(), m["content"]))
100
  conv = "\n".join(conv)
101
 
 
95
  hist = self._canvas.get_history(4)
96
  conv = []
97
  for m in hist:
98
+ if m["role"] not in ["user", "assistant"]:
99
+ continue
100
  conv.append("{}: {}".format(m["role"].upper(), m["content"]))
101
  conv = "\n".join(conv)
102
 
agent/component/switch.py CHANGED
@@ -41,7 +41,8 @@ class SwitchParam(ComponentParamBase):
41
  def check(self):
42
  self.check_empty(self.conditions, "[Switch] conditions")
43
  for cond in self.conditions:
44
- if not cond["to"]: raise ValueError(f"[Switch] 'To' can not be empty!")
 
45
 
46
 
47
  class Switch(ComponentBase, ABC):
@@ -51,7 +52,8 @@ class Switch(ComponentBase, ABC):
51
  res = []
52
  for cond in self._param.conditions:
53
  for item in cond["items"]:
54
- if not item["cpn_id"]: continue
 
55
  if item["cpn_id"].find("begin") >= 0:
56
  continue
57
  cid = item["cpn_id"].split("@")[0]
@@ -63,7 +65,8 @@ class Switch(ComponentBase, ABC):
63
  for cond in self._param.conditions:
64
  res = []
65
  for item in cond["items"]:
66
- if not item["cpn_id"]:continue
 
67
  cid = item["cpn_id"].split("@")[0]
68
  if item["cpn_id"].find("@") > 0:
69
  cpn_id, key = item["cpn_id"].split("@")
@@ -107,22 +110,22 @@ class Switch(ComponentBase, ABC):
107
  elif operator == ">":
108
  try:
109
  return True if float(input) > float(value) else False
110
- except Exception as e:
111
  return True if input > value else False
112
  elif operator == "<":
113
  try:
114
  return True if float(input) < float(value) else False
115
- except Exception as e:
116
  return True if input < value else False
117
  elif operator == "≥":
118
  try:
119
  return True if float(input) >= float(value) else False
120
- except Exception as e:
121
  return True if input >= value else False
122
  elif operator == "≤":
123
  try:
124
  return True if float(input) <= float(value) else False
125
- except Exception as e:
126
  return True if input <= value else False
127
 
128
  raise ValueError('Not supported operator' + operator)
 
41
  def check(self):
42
  self.check_empty(self.conditions, "[Switch] conditions")
43
  for cond in self.conditions:
44
+ if not cond["to"]:
45
+ raise ValueError("[Switch] 'To' can not be empty!")
46
 
47
 
48
  class Switch(ComponentBase, ABC):
 
52
  res = []
53
  for cond in self._param.conditions:
54
  for item in cond["items"]:
55
+ if not item["cpn_id"]:
56
+ continue
57
  if item["cpn_id"].find("begin") >= 0:
58
  continue
59
  cid = item["cpn_id"].split("@")[0]
 
65
  for cond in self._param.conditions:
66
  res = []
67
  for item in cond["items"]:
68
+ if not item["cpn_id"]:
69
+ continue
70
  cid = item["cpn_id"].split("@")[0]
71
  if item["cpn_id"].find("@") > 0:
72
  cpn_id, key = item["cpn_id"].split("@")
 
110
  elif operator == ">":
111
  try:
112
  return True if float(input) > float(value) else False
113
+ except Exception:
114
  return True if input > value else False
115
  elif operator == "<":
116
  try:
117
  return True if float(input) < float(value) else False
118
+ except Exception:
119
  return True if input < value else False
120
  elif operator == "≥":
121
  try:
122
  return True if float(input) >= float(value) else False
123
+ except Exception:
124
  return True if input >= value else False
125
  elif operator == "≤":
126
  try:
127
  return True if float(input) <= float(value) else False
128
+ except Exception:
129
  return True if input <= value else False
130
 
131
  raise ValueError('Not supported operator' + operator)
agent/component/template.py CHANGED
@@ -47,7 +47,8 @@ class Template(ComponentBase):
47
 
48
  self._param.inputs = []
49
  for para in self._param.parameters:
50
- if not para.get("component_id"): continue
 
51
  component_id = para["component_id"].split("@")[0]
52
  if para["component_id"].lower().find("@") >= 0:
53
  cpn_id, key = para["component_id"].split("@")
 
47
 
48
  self._param.inputs = []
49
  for para in self._param.parameters:
50
+ if not para.get("component_id"):
51
+ continue
52
  component_id = para["component_id"].split("@")[0]
53
  if para["component_id"].lower().find("@") >= 0:
54
  cpn_id, key = para["component_id"].split("@")
agent/test/client.py CHANGED
@@ -43,6 +43,7 @@ if __name__ == '__main__':
43
  else:
44
  print(ans["content"])
45
 
46
- if DEBUG: print(canvas.path)
 
47
  question = input("\n==================== User =====================\n> ")
48
  canvas.add_user_input(question)
 
43
  else:
44
  print(ans["content"])
45
 
46
+ if DEBUG:
47
+ print(canvas.path)
48
  question = input("\n==================== User =====================\n> ")
49
  canvas.add_user_input(question)
api/apps/api_app.py CHANGED
@@ -142,7 +142,6 @@ def set_conversation():
142
  if not objs:
143
  return get_json_result(
144
  data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
145
- req = request.json
146
  try:
147
  if objs[0].source == "agent":
148
  e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
@@ -188,7 +187,8 @@ def completion():
188
  e, conv = API4ConversationService.get_by_id(req["conversation_id"])
189
  if not e:
190
  return get_data_error_result(message="Conversation not found!")
191
- if "quote" not in req: req["quote"] = False
 
192
 
193
  msg = []
194
  for m in req["messages"]:
@@ -197,7 +197,8 @@ def completion():
197
  if m["role"] == "assistant" and not msg:
198
  continue
199
  msg.append(m)
200
- if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
 
201
  message_id = msg[-1]["id"]
202
 
203
  def fillin_conv(ans):
@@ -674,11 +675,13 @@ def completion_faq():
674
  e, conv = API4ConversationService.get_by_id(req["conversation_id"])
675
  if not e:
676
  return get_data_error_result(message="Conversation not found!")
677
- if "quote" not in req: req["quote"] = True
 
678
 
679
  msg = []
680
  msg.append({"role": "user", "content": req["word"]})
681
- if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
 
682
  message_id = msg[-1]["id"]
683
 
684
  def fillin_conv(ans):
 
142
  if not objs:
143
  return get_json_result(
144
  data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
 
145
  try:
146
  if objs[0].source == "agent":
147
  e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
 
187
  e, conv = API4ConversationService.get_by_id(req["conversation_id"])
188
  if not e:
189
  return get_data_error_result(message="Conversation not found!")
190
+ if "quote" not in req:
191
+ req["quote"] = False
192
 
193
  msg = []
194
  for m in req["messages"]:
 
197
  if m["role"] == "assistant" and not msg:
198
  continue
199
  msg.append(m)
200
+ if not msg[-1].get("id"):
201
+ msg[-1]["id"] = get_uuid()
202
  message_id = msg[-1]["id"]
203
 
204
  def fillin_conv(ans):
 
675
  e, conv = API4ConversationService.get_by_id(req["conversation_id"])
676
  if not e:
677
  return get_data_error_result(message="Conversation not found!")
678
+ if "quote" not in req:
679
+ req["quote"] = True
680
 
681
  msg = []
682
  msg.append({"role": "user", "content": req["word"]})
683
+ if not msg[-1].get("id"):
684
+ msg[-1]["id"] = get_uuid()
685
  message_id = msg[-1]["id"]
686
 
687
  def fillin_conv(ans):
api/apps/canvas_app.py CHANGED
@@ -13,10 +13,8 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import logging
17
  import json
18
  import traceback
19
- from functools import partial
20
  from flask import request, Response
21
  from flask_login import login_required, current_user
22
  from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
@@ -60,7 +58,8 @@ def rm():
60
  def save():
61
  req = request.json
62
  req["user_id"] = current_user.id
63
- if not isinstance(req["dsl"], str): req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
 
64
 
65
  req["dsl"] = json.loads(req["dsl"])
66
  if "id" not in req:
@@ -153,7 +152,8 @@ def run():
153
  return resp
154
 
155
  for answer in canvas.run(stream=False):
156
- if answer.get("running_status"): continue
 
157
  final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
158
  canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
159
  if final_ans.get("reference"):
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import json
17
  import traceback
 
18
  from flask import request, Response
19
  from flask_login import login_required, current_user
20
  from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
 
58
  def save():
59
  req = request.json
60
  req["user_id"] = current_user.id
61
+ if not isinstance(req["dsl"], str):
62
+ req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
63
 
64
  req["dsl"] = json.loads(req["dsl"])
65
  if "id" not in req:
 
152
  return resp
153
 
154
  for answer in canvas.run(stream=False):
155
+ if answer.get("running_status"):
156
+ continue
157
  final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
158
  canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
159
  if final_ans.get("reference"):
api/apps/chunk_app.py CHANGED
@@ -237,7 +237,8 @@ def create():
237
  e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
238
  if not e:
239
  return get_data_error_result(message="Knowledgebase not found!")
240
- if kb.pagerank: d["pagerank_fea"] = kb.pagerank
 
241
 
242
  embd_id = DocumentService.get_embd_id(req["doc_id"])
243
  embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
 
237
  e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
238
  if not e:
239
  return get_data_error_result(message="Knowledgebase not found!")
240
+ if kb.pagerank:
241
+ d["pagerank_fea"] = kb.pagerank
242
 
243
  embd_id = DocumentService.get_embd_id(req["doc_id"])
244
  embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
api/apps/conversation_app.py CHANGED
@@ -281,10 +281,12 @@ def thumbup():
281
  if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant":
282
  if up_down:
283
  msg["thumbup"] = True
284
- if "feedback" in msg: del msg["feedback"]
 
285
  else:
286
  msg["thumbup"] = False
287
- if feedback: msg["feedback"] = feedback
 
288
  break
289
 
290
  ConversationService.update_by_id(conv["id"], conv)
 
281
  if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant":
282
  if up_down:
283
  msg["thumbup"] = True
284
+ if "feedback" in msg:
285
+ del msg["feedback"]
286
  else:
287
  msg["thumbup"] = False
288
+ if feedback:
289
+ msg["feedback"] = feedback
290
  break
291
 
292
  ConversationService.update_by_id(conv["id"], conv)
api/apps/dialog_app.py CHANGED
@@ -37,10 +37,12 @@ def set_dialog():
37
  top_n = req.get("top_n", 6)
38
  top_k = req.get("top_k", 1024)
39
  rerank_id = req.get("rerank_id", "")
40
- if not rerank_id: req["rerank_id"] = ""
 
41
  similarity_threshold = req.get("similarity_threshold", 0.1)
42
  vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
43
- if vector_similarity_weight is None: vector_similarity_weight = 0.3
 
44
  llm_setting = req.get("llm_setting", {})
45
  default_prompt = {
46
  "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
 
37
  top_n = req.get("top_n", 6)
38
  top_k = req.get("top_k", 1024)
39
  rerank_id = req.get("rerank_id", "")
40
+ if not rerank_id:
41
+ req["rerank_id"] = ""
42
  similarity_threshold = req.get("similarity_threshold", 0.1)
43
  vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
44
+ if vector_similarity_weight is None:
45
+ vector_similarity_weight = 0.3
46
  llm_setting = req.get("llm_setting", {})
47
  default_prompt = {
48
  "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
api/apps/document_app.py CHANGED
@@ -13,7 +13,6 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License
15
  #
16
- import json
17
  import os.path
18
  import pathlib
19
  import re
@@ -90,7 +89,8 @@ def web_crawl():
90
  raise LookupError("Can't find this knowledgebase!")
91
 
92
  blob = html2pdf(url)
93
- if not blob: return server_error_response(ValueError("Download failure."))
 
94
 
95
  root_folder = FileService.get_root_folder(current_user.id)
96
  pf_id = root_folder["id"]
@@ -290,7 +290,8 @@ def change_status():
290
  def rm():
291
  req = request.json
292
  doc_ids = req["doc_id"]
293
- if isinstance(doc_ids, str): doc_ids = [doc_ids]
 
294
 
295
  for doc_id in doc_ids:
296
  if not DocumentService.accessible4deletion(doc_id, current_user.id):
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License
15
  #
 
16
  import os.path
17
  import pathlib
18
  import re
 
89
  raise LookupError("Can't find this knowledgebase!")
90
 
91
  blob = html2pdf(url)
92
+ if not blob:
93
+ return server_error_response(ValueError("Download failure."))
94
 
95
  root_folder = FileService.get_root_folder(current_user.id)
96
  pf_id = root_folder["id"]
 
290
  def rm():
291
  req = request.json
292
  doc_ids = req["doc_id"]
293
+ if isinstance(doc_ids, str):
294
+ doc_ids = [doc_ids]
295
 
296
  for doc_id in doc_ids:
297
  if not DocumentService.accessible4deletion(doc_id, current_user.id):
api/apps/llm_app.py CHANGED
@@ -351,8 +351,10 @@ def list_app():
351
 
352
  llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
353
  for o in objs:
354
- if not o.api_key: continue
355
- if o.llm_name + "@" + o.llm_factory in llm_set: continue
 
 
356
  llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
357
 
358
  res = {}
 
351
 
352
  llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
353
  for o in objs:
354
+ if not o.api_key:
355
+ continue
356
+ if o.llm_name + "@" + o.llm_factory in llm_set:
357
+ continue
358
  llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
359
 
360
  res = {}
api/apps/sdk/agent.py CHANGED
@@ -14,7 +14,7 @@
14
  # limitations under the License.
15
  #
16
 
17
- from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
18
  from api.utils.api_utils import get_error_data_result, token_required
19
  from api.utils.api_utils import get_result
20
  from flask import request
 
14
  # limitations under the License.
15
  #
16
 
17
+ from api.db.services.canvas_service import UserCanvasService
18
  from api.utils.api_utils import get_error_data_result, token_required
19
  from api.utils.api_utils import get_result
20
  from flask import request
api/apps/sdk/doc.py CHANGED
@@ -41,7 +41,6 @@ from api.utils.api_utils import construct_json_result, get_parser_config
41
  from rag.nlp import search
42
  from rag.utils import rmSpace
43
  from rag.utils.storage_factory import STORAGE_IMPL
44
- import os
45
 
46
  MAXIMUM_OF_UPLOADING_FILES = 256
47
 
@@ -976,12 +975,12 @@ def add_chunk(tenant_id, dataset_id, document_id):
976
  if not req.get("content"):
977
  return get_error_data_result(message="`content` is required")
978
  if "important_keywords" in req:
979
- if type(req["important_keywords"]) != list:
980
  return get_error_data_result(
981
  "`important_keywords` is required to be a list"
982
  )
983
  if "questions" in req:
984
- if type(req["questions"]) != list:
985
  return get_error_data_result(
986
  "`questions` is required to be a list"
987
  )
 
41
  from rag.nlp import search
42
  from rag.utils import rmSpace
43
  from rag.utils.storage_factory import STORAGE_IMPL
 
44
 
45
  MAXIMUM_OF_UPLOADING_FILES = 256
46
 
 
975
  if not req.get("content"):
976
  return get_error_data_result(message="`content` is required")
977
  if "important_keywords" in req:
978
+ if not isinstance(req["important_keywords"], list):
979
  return get_error_data_result(
980
  "`important_keywords` is required to be a list"
981
  )
982
  if "questions" in req:
983
+ if not isinstance(req["questions"], list):
984
  return get_error_data_result(
985
  "`questions` is required to be a list"
986
  )
api/apps/sdk/session.py CHANGED
@@ -143,8 +143,10 @@ def completion(tenant_id, chat_id):
143
  }
144
  conv.message.append(question)
145
  for m in conv.message:
146
- if m["role"] == "system": continue
147
- if m["role"] == "assistant" and not msg: continue
 
 
148
  msg.append(m)
149
  message_id = msg[-1].get("id")
150
  e, dia = DialogService.get_by_id(conv.dialog_id)
@@ -267,7 +269,8 @@ def agent_completion(tenant_id, agent_id):
267
  if m["role"] == "assistant" and not msg:
268
  continue
269
  msg.append(m)
270
- if not msg[-1].get("id"): msg[-1]["id"] = get_uuid()
 
271
  message_id = msg[-1]["id"]
272
 
273
  stream = req.get("stream", True)
@@ -361,7 +364,8 @@ def agent_completion(tenant_id, agent_id):
361
  return resp
362
 
363
  for answer in canvas.run(stream=False):
364
- if answer.get("running_status"): continue
 
365
  final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
366
  canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
367
  if final_ans.get("reference"):
 
143
  }
144
  conv.message.append(question)
145
  for m in conv.message:
146
+ if m["role"] == "system":
147
+ continue
148
+ if m["role"] == "assistant" and not msg:
149
+ continue
150
  msg.append(m)
151
  message_id = msg[-1].get("id")
152
  e, dia = DialogService.get_by_id(conv.dialog_id)
 
269
  if m["role"] == "assistant" and not msg:
270
  continue
271
  msg.append(m)
272
+ if not msg[-1].get("id"):
273
+ msg[-1]["id"] = get_uuid()
274
  message_id = msg[-1]["id"]
275
 
276
  stream = req.get("stream", True)
 
364
  return resp
365
 
366
  for answer in canvas.run(stream=False):
367
+ if answer.get("running_status"):
368
+ continue
369
  final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
370
  canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
371
  if final_ans.get("reference"):
api/apps/user_app.py CHANGED
@@ -330,7 +330,7 @@ def user_info_from_github(access_token):
330
  headers=headers,
331
  ).json()
332
  user_info["email"] = next(
333
- (email for email in email_info if email["primary"] == True), None
334
  )["email"]
335
  return user_info
336
 
 
330
  headers=headers,
331
  ).json()
332
  user_info["email"] = next(
333
+ (email for email in email_info if email["primary"]), None
334
  )["email"]
335
  return user_info
336
 
api/db/db_models.py CHANGED
@@ -130,7 +130,7 @@ def is_continuous_field(cls: typing.Type) -> bool:
130
  for p in cls.__bases__:
131
  if p in CONTINUOUS_FIELD_TYPE:
132
  return True
133
- elif p != Field and p != object:
134
  if is_continuous_field(p):
135
  return True
136
  else:
 
130
  for p in cls.__bases__:
131
  if p in CONTINUOUS_FIELD_TYPE:
132
  return True
133
+ elif p is not Field and p is not object:
134
  if is_continuous_field(p):
135
  return True
136
  else:
api/db/init_data.py CHANGED
@@ -170,7 +170,7 @@ def add_graph_templates():
170
  cnvs = json.load(open(os.path.join(dir, fnm), "r"))
171
  try:
172
  CanvasTemplateService.save(**cnvs)
173
- except:
174
  CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
175
  except Exception:
176
  logging.exception("Add graph templates error: ")
 
170
  cnvs = json.load(open(os.path.join(dir, fnm), "r"))
171
  try:
172
  CanvasTemplateService.save(**cnvs)
173
+ except Exception:
174
  CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
175
  except Exception:
176
  logging.exception("Add graph templates error: ")
api/db/services/__init__.py CHANGED
@@ -15,13 +15,14 @@
15
  #
16
  import pathlib
17
  import re
18
- from .user_service import UserService
19
 
20
 
21
  def duplicate_name(query_func, **kwargs):
22
  fnm = kwargs["name"]
23
  objs = query_func(**kwargs)
24
- if not objs: return fnm
 
25
  ext = pathlib.Path(fnm).suffix #.jpg
26
  nm = re.sub(r"%s$"%ext, "", fnm)
27
  r = re.search(r"\(([0-9]+)\)$", nm)
@@ -31,8 +32,8 @@ def duplicate_name(query_func, **kwargs):
31
  nm = re.sub(r"\([0-9]+\)$", "", nm)
32
  c += 1
33
  nm = f"{nm}({c})"
34
- if ext: nm += f"{ext}"
 
35
 
36
  kwargs["name"] = nm
37
  return duplicate_name(query_func, **kwargs)
38
-
 
15
  #
16
  import pathlib
17
  import re
18
+ from .user_service import UserService as UserService
19
 
20
 
21
  def duplicate_name(query_func, **kwargs):
22
  fnm = kwargs["name"]
23
  objs = query_func(**kwargs)
24
+ if not objs:
25
+ return fnm
26
  ext = pathlib.Path(fnm).suffix #.jpg
27
  nm = re.sub(r"%s$"%ext, "", fnm)
28
  r = re.search(r"\(([0-9]+)\)$", nm)
 
32
  nm = re.sub(r"\([0-9]+\)$", "", nm)
33
  c += 1
34
  nm = f"{nm}({c})"
35
+ if ext:
36
+ nm += f"{ext}"
37
 
38
  kwargs["name"] = nm
39
  return duplicate_name(query_func, **kwargs)
 
api/db/services/api_service.py CHANGED
@@ -64,7 +64,8 @@ class API4ConversationService(CommonService):
64
  @classmethod
65
  @DB.connection_context()
66
  def stats(cls, tenant_id, from_date, to_date, source=None):
67
- if len(to_date) == 10: to_date += " 23:59:59"
 
68
  return cls.model.select(
69
  cls.model.create_date.truncate("day").alias("dt"),
70
  peewee.fn.COUNT(
 
64
  @classmethod
65
  @DB.connection_context()
66
  def stats(cls, tenant_id, from_date, to_date, source=None):
67
+ if len(to_date) == 10:
68
+ to_date += " 23:59:59"
69
  return cls.model.select(
70
  cls.model.create_date.truncate("day").alias("dt"),
71
  peewee.fn.COUNT(
api/db/services/canvas_service.py CHANGED
@@ -13,9 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- from datetime import datetime
17
- import peewee
18
- from api.db.db_models import DB, API4Conversation, APIToken, Dialog, CanvasTemplate, UserCanvas
19
  from api.db.services.common_service import CommonService
20
 
21
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ from api.db.db_models import DB, CanvasTemplate, UserCanvas
 
 
17
  from api.db.services.common_service import CommonService
18
 
19
 
api/db/services/common_service.py CHANGED
@@ -115,7 +115,7 @@ class CommonService:
115
  try:
116
  obj = cls.model.query(id=pid)[0]
117
  return True, obj
118
- except Exception as e:
119
  return False, None
120
 
121
  @classmethod
 
115
  try:
116
  obj = cls.model.query(id=pid)[0]
117
  return True, obj
118
+ except Exception:
119
  return False, None
120
 
121
  @classmethod
api/db/services/dialog_service.py CHANGED
@@ -106,15 +106,15 @@ def message_fit_in(msg, max_length=4000):
106
  return c, msg
107
 
108
  ll = num_tokens_from_string(msg_[0]["content"])
109
- l = num_tokens_from_string(msg_[-1]["content"])
110
- if ll / (ll + l) > 0.8:
111
  m = msg_[0]["content"]
112
- m = encoder.decode(encoder.encode(m)[:max_length - l])
113
  msg[0]["content"] = m
114
  return max_length, msg
115
 
116
  m = msg_[1]["content"]
117
- m = encoder.decode(encoder.encode(m)[:max_length - l])
118
  msg[1]["content"] = m
119
  return max_length, msg
120
 
@@ -257,7 +257,8 @@ def chat(dialog, messages, stream=True, **kwargs):
257
  idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
258
  recall_docs = [
259
  d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
260
- if not recall_docs: recall_docs = kbinfos["doc_aggs"]
 
261
  kbinfos["doc_aggs"] = recall_docs
262
 
263
  refs = deepcopy(kbinfos)
@@ -433,13 +434,15 @@ def relevant(tenant_id, llm_id, question, contents: list):
433
  Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
434
  No other words needed except 'yes' or 'no'.
435
  """
436
- if not contents:return False
 
437
  contents = "Documents: \n" + " - ".join(contents)
438
  contents = f"Question: {question}\n" + contents
439
  if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
440
  contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
441
  ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
442
- if ans.lower().find("yes") >= 0: return True
 
443
  return False
444
 
445
 
@@ -481,8 +484,10 @@ Requirements:
481
  ]
482
  _, msg = message_fit_in(msg, chat_mdl.max_length)
483
  kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
484
- if isinstance(kwd, tuple): kwd = kwd[0]
485
- if kwd.find("**ERROR**") >=0: return ""
 
 
486
  return kwd
487
 
488
 
@@ -508,8 +513,10 @@ Requirements:
508
  ]
509
  _, msg = message_fit_in(msg, chat_mdl.max_length)
510
  kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
511
- if isinstance(kwd, tuple): kwd = kwd[0]
512
- if kwd.find("**ERROR**") >= 0: return ""
 
 
513
  return kwd
514
 
515
 
@@ -520,7 +527,8 @@ def full_question(tenant_id, llm_id, messages):
520
  chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
521
  conv = []
522
  for m in messages:
523
- if m["role"] not in ["user", "assistant"]: continue
 
524
  conv.append("{}: {}".format(m["role"].upper(), m["content"]))
525
  conv = "\n".join(conv)
526
  today = datetime.date.today().isoformat()
@@ -581,7 +589,8 @@ Output: What's the weather in Rochester on {tomorrow}?
581
 
582
 
583
  def tts(tts_mdl, text):
584
- if not tts_mdl or not text: return
 
585
  bin = b""
586
  for chunk in tts_mdl.tts(text):
587
  bin += chunk
@@ -641,7 +650,8 @@ def ask(question, kb_ids, tenant_id):
641
  idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
642
  recall_docs = [
643
  d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
644
- if not recall_docs: recall_docs = kbinfos["doc_aggs"]
 
645
  kbinfos["doc_aggs"] = recall_docs
646
  refs = deepcopy(kbinfos)
647
  for c in refs["chunks"]:
 
106
  return c, msg
107
 
108
  ll = num_tokens_from_string(msg_[0]["content"])
109
+ ll2 = num_tokens_from_string(msg_[-1]["content"])
110
+ if ll / (ll + ll2) > 0.8:
111
  m = msg_[0]["content"]
112
+ m = encoder.decode(encoder.encode(m)[:max_length - ll2])
113
  msg[0]["content"] = m
114
  return max_length, msg
115
 
116
  m = msg_[1]["content"]
117
+ m = encoder.decode(encoder.encode(m)[:max_length - ll2])
118
  msg[1]["content"] = m
119
  return max_length, msg
120
 
 
257
  idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
258
  recall_docs = [
259
  d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
260
+ if not recall_docs:
261
+ recall_docs = kbinfos["doc_aggs"]
262
  kbinfos["doc_aggs"] = recall_docs
263
 
264
  refs = deepcopy(kbinfos)
 
434
  Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
435
  No other words needed except 'yes' or 'no'.
436
  """
437
+ if not contents:
438
+ return False
439
  contents = "Documents: \n" + " - ".join(contents)
440
  contents = f"Question: {question}\n" + contents
441
  if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
442
  contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
443
  ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
444
+ if ans.lower().find("yes") >= 0:
445
+ return True
446
  return False
447
 
448
 
 
484
  ]
485
  _, msg = message_fit_in(msg, chat_mdl.max_length)
486
  kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
487
+ if isinstance(kwd, tuple):
488
+ kwd = kwd[0]
489
+ if kwd.find("**ERROR**") >=0:
490
+ return ""
491
  return kwd
492
 
493
 
 
513
  ]
514
  _, msg = message_fit_in(msg, chat_mdl.max_length)
515
  kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
516
+ if isinstance(kwd, tuple):
517
+ kwd = kwd[0]
518
+ if kwd.find("**ERROR**") >= 0:
519
+ return ""
520
  return kwd
521
 
522
 
 
527
  chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
528
  conv = []
529
  for m in messages:
530
+ if m["role"] not in ["user", "assistant"]:
531
+ continue
532
  conv.append("{}: {}".format(m["role"].upper(), m["content"]))
533
  conv = "\n".join(conv)
534
  today = datetime.date.today().isoformat()
 
589
 
590
 
591
  def tts(tts_mdl, text):
592
+ if not tts_mdl or not text:
593
+ return
594
  bin = b""
595
  for chunk in tts_mdl.tts(text):
596
  bin += chunk
 
650
  idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
651
  recall_docs = [
652
  d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
653
+ if not recall_docs:
654
+ recall_docs = kbinfos["doc_aggs"]
655
  kbinfos["doc_aggs"] = recall_docs
656
  refs = deepcopy(kbinfos)
657
  for c in refs["chunks"]:
api/db/services/document_service.py CHANGED
@@ -532,7 +532,8 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
532
  try:
533
  mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output,
534
  ensure_ascii=False, indent=2)
535
- if len(mind_map) < 32: raise Exception("Few content: " + mind_map)
 
536
  cks.append({
537
  "id": get_uuid(),
538
  "doc_id": doc_id,
 
532
  try:
533
  mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output,
534
  ensure_ascii=False, indent=2)
535
+ if len(mind_map) < 32:
536
+ raise Exception("Few content: " + mind_map)
537
  cks.append({
538
  "id": get_uuid(),
539
  "doc_id": doc_id,
api/db/services/file2document_service.py CHANGED
@@ -20,7 +20,7 @@ from api.db.db_models import DB
20
  from api.db.db_models import File, File2Document
21
  from api.db.services.common_service import CommonService
22
  from api.db.services.document_service import DocumentService
23
- from api.utils import current_timestamp, datetime_format, get_uuid
24
 
25
 
26
  class File2DocumentService(CommonService):
@@ -63,7 +63,7 @@ class File2DocumentService(CommonService):
63
  def update_by_file_id(cls, file_id, obj):
64
  obj["update_time"] = current_timestamp()
65
  obj["update_date"] = datetime_format(datetime.now())
66
- num = cls.model.update(obj).where(cls.model.id == file_id).execute()
67
  e, obj = cls.get_by_id(cls.model.id)
68
  return obj
69
 
 
20
  from api.db.db_models import File, File2Document
21
  from api.db.services.common_service import CommonService
22
  from api.db.services.document_service import DocumentService
23
+ from api.utils import current_timestamp, datetime_format
24
 
25
 
26
  class File2DocumentService(CommonService):
 
63
  def update_by_file_id(cls, file_id, obj):
64
  obj["update_time"] = current_timestamp()
65
  obj["update_date"] = datetime_format(datetime.now())
66
+ # num = cls.model.update(obj).where(cls.model.id == file_id).execute()
67
  e, obj = cls.get_by_id(cls.model.id)
68
  return obj
69
 
api/db/services/file_service.py CHANGED
@@ -85,7 +85,8 @@ class FileService(CommonService):
85
  .join(Document, on=(File2Document.document_id == Document.id))
86
  .join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
87
  .where(cls.model.id == file_id))
88
- if not kbs: return []
 
89
  kbs_info_list = []
90
  for kb in list(kbs.dicts()):
91
  kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
@@ -304,7 +305,8 @@ class FileService(CommonService):
304
  @classmethod
305
  @DB.connection_context()
306
  def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
307
- for _ in File2DocumentService.get_by_document_id(doc["id"]): return
 
308
  file = {
309
  "id": get_uuid(),
310
  "parent_id": kb_folder_id,
 
85
  .join(Document, on=(File2Document.document_id == Document.id))
86
  .join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
87
  .where(cls.model.id == file_id))
88
+ if not kbs:
89
+ return []
90
  kbs_info_list = []
91
  for kb in list(kbs.dicts()):
92
  kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
 
305
  @classmethod
306
  @DB.connection_context()
307
  def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
308
+ for _ in File2DocumentService.get_by_document_id(doc["id"]):
309
+ return
310
  file = {
311
  "id": get_uuid(),
312
  "parent_id": kb_folder_id,
api/db/services/llm_service.py CHANGED
@@ -107,7 +107,8 @@ class TenantLLMService(CommonService):
107
 
108
  model_config = cls.get_api_key(tenant_id, mdlnm)
109
  mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
110
- if model_config: model_config = model_config.to_dict()
 
111
  if not model_config:
112
  if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
113
  llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
 
107
 
108
  model_config = cls.get_api_key(tenant_id, mdlnm)
109
  mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
110
+ if model_config:
111
+ model_config = model_config.to_dict()
112
  if not model_config:
113
  if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
114
  llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
api/db/services/task_service.py CHANGED
@@ -57,28 +57,33 @@ class TaskService(CommonService):
57
  Tenant.img2txt_id,
58
  Tenant.asr_id,
59
  Tenant.llm_id,
60
- cls.model.update_time]
61
- docs = cls.model.select(*fields) \
62
- .join(Document, on=(cls.model.doc_id == Document.id)) \
63
- .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
64
- .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
 
 
65
  .where(cls.model.id == task_id)
 
66
  docs = list(docs.dicts())
67
- if not docs: return None
 
68
 
69
  msg = "\nTask has been received."
70
- prog = random.random() / 10.
71
  if docs[0]["retry_count"] >= 3:
72
  msg = "\nERROR: Task is abandoned after 3 times attempts."
73
  prog = -1
74
 
75
- cls.model.update(progress_msg=cls.model.progress_msg + msg,
76
- progress=prog,
77
- retry_count=docs[0]["retry_count"]+1
78
- ).where(
79
- cls.model.id == docs[0]["id"]).execute()
80
 
81
- if docs[0]["retry_count"] >= 3: return None
 
82
 
83
  return docs[0]
84
 
@@ -86,21 +91,44 @@ class TaskService(CommonService):
86
  @DB.connection_context()
87
  def get_ongoing_doc_name(cls):
88
  with DB.lock("get_task", -1):
89
- docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \
90
- .join(Document, on=(cls.model.doc_id == Document.id)) \
91
- .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \
92
- .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \
 
 
 
 
 
 
 
 
 
 
 
93
  .where(
94
  Document.status == StatusEnum.VALID.value,
95
  Document.run == TaskStatus.RUNNING.value,
96
  ~(Document.type == FileType.VIRTUAL.value),
97
  cls.model.progress < 1,
98
- cls.model.create_time >= current_timestamp() - 1000 * 600
99
  )
 
100
  docs = list(docs.dicts())
101
- if not docs: return []
102
-
103
- return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs]))
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  @classmethod
106
  @DB.connection_context()
@@ -118,28 +146,30 @@ class TaskService(CommonService):
118
  def update_progress(cls, id, info):
119
  if os.environ.get("MACOS"):
120
  if info["progress_msg"]:
121
- cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
122
- cls.model.id == id).execute()
 
123
  if "progress" in info:
124
  cls.model.update(progress=info["progress"]).where(
125
- cls.model.id == id).execute()
 
126
  return
127
 
128
  with DB.lock("update_progress", -1):
129
  if info["progress_msg"]:
130
- cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
131
- cls.model.id == id).execute()
 
132
  if "progress" in info:
133
  cls.model.update(progress=info["progress"]).where(
134
- cls.model.id == id).execute()
 
135
 
136
 
137
  def queue_tasks(doc: dict, bucket: str, name: str):
138
  def new_task():
139
- return {
140
- "id": get_uuid(),
141
- "doc_id": doc["id"]
142
- }
143
  tsks = []
144
 
145
  if doc["type"] == FileType.PDF.value:
@@ -150,8 +180,8 @@ def queue_tasks(doc: dict, bucket: str, name: str):
150
  if doc["parser_id"] == "paper":
151
  page_size = doc["parser_config"].get("task_page_size", 22)
152
  if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
153
- page_size = 10 ** 9
154
- page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
155
  for s, e in page_ranges:
156
  s -= 1
157
  s = max(0, s)
@@ -177,4 +207,6 @@ def queue_tasks(doc: dict, bucket: str, name: str):
177
  DocumentService.begin2parse(doc["id"])
178
 
179
  for t in tsks:
180
- assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status."
 
 
 
57
  Tenant.img2txt_id,
58
  Tenant.asr_id,
59
  Tenant.llm_id,
60
+ cls.model.update_time,
61
+ ]
62
+ docs = (
63
+ cls.model.select(*fields)
64
+ .join(Document, on=(cls.model.doc_id == Document.id))
65
+ .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
66
+ .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
67
  .where(cls.model.id == task_id)
68
+ )
69
  docs = list(docs.dicts())
70
+ if not docs:
71
+ return None
72
 
73
  msg = "\nTask has been received."
74
+ prog = random.random() / 10.0
75
  if docs[0]["retry_count"] >= 3:
76
  msg = "\nERROR: Task is abandoned after 3 times attempts."
77
  prog = -1
78
 
79
+ cls.model.update(
80
+ progress_msg=cls.model.progress_msg + msg,
81
+ progress=prog,
82
+ retry_count=docs[0]["retry_count"] + 1,
83
+ ).where(cls.model.id == docs[0]["id"]).execute()
84
 
85
+ if docs[0]["retry_count"] >= 3:
86
+ return None
87
 
88
  return docs[0]
89
 
 
91
  @DB.connection_context()
92
  def get_ongoing_doc_name(cls):
93
  with DB.lock("get_task", -1):
94
+ docs = (
95
+ cls.model.select(
96
+ *[Document.id, Document.kb_id, Document.location, File.parent_id]
97
+ )
98
+ .join(Document, on=(cls.model.doc_id == Document.id))
99
+ .join(
100
+ File2Document,
101
+ on=(File2Document.document_id == Document.id),
102
+ join_type=JOIN.LEFT_OUTER,
103
+ )
104
+ .join(
105
+ File,
106
+ on=(File2Document.file_id == File.id),
107
+ join_type=JOIN.LEFT_OUTER,
108
+ )
109
  .where(
110
  Document.status == StatusEnum.VALID.value,
111
  Document.run == TaskStatus.RUNNING.value,
112
  ~(Document.type == FileType.VIRTUAL.value),
113
  cls.model.progress < 1,
114
+ cls.model.create_time >= current_timestamp() - 1000 * 600,
115
  )
116
+ )
117
  docs = list(docs.dicts())
118
+ if not docs:
119
+ return []
120
+
121
+ return list(
122
+ set(
123
+ [
124
+ (
125
+ d["parent_id"] if d["parent_id"] else d["kb_id"],
126
+ d["location"],
127
+ )
128
+ for d in docs
129
+ ]
130
+ )
131
+ )
132
 
133
  @classmethod
134
  @DB.connection_context()
 
146
  def update_progress(cls, id, info):
147
  if os.environ.get("MACOS"):
148
  if info["progress_msg"]:
149
+ cls.model.update(
150
+ progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
151
+ ).where(cls.model.id == id).execute()
152
  if "progress" in info:
153
  cls.model.update(progress=info["progress"]).where(
154
+ cls.model.id == id
155
+ ).execute()
156
  return
157
 
158
  with DB.lock("update_progress", -1):
159
  if info["progress_msg"]:
160
+ cls.model.update(
161
+ progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
162
+ ).where(cls.model.id == id).execute()
163
  if "progress" in info:
164
  cls.model.update(progress=info["progress"]).where(
165
+ cls.model.id == id
166
+ ).execute()
167
 
168
 
169
  def queue_tasks(doc: dict, bucket: str, name: str):
170
  def new_task():
171
+ return {"id": get_uuid(), "doc_id": doc["id"]}
172
+
 
 
173
  tsks = []
174
 
175
  if doc["type"] == FileType.PDF.value:
 
180
  if doc["parser_id"] == "paper":
181
  page_size = doc["parser_config"].get("task_page_size", 22)
182
  if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
183
+ page_size = 10**9
184
+ page_ranges = doc["parser_config"].get("pages") or [(1, 10**5)]
185
  for s, e in page_ranges:
186
  s -= 1
187
  s = max(0, s)
 
207
  DocumentService.begin2parse(doc["id"])
208
 
209
  for t in tsks:
210
+ assert REDIS_CONN.queue_product(
211
+ SVR_QUEUE_NAME, message=t
212
+ ), "Can't access Redis. Please check the Redis' status."
api/db/services/user_service.py CHANGED
@@ -22,7 +22,7 @@ from api.db import UserTenantRole
22
  from api.db.db_models import DB, UserTenant
23
  from api.db.db_models import User, Tenant
24
  from api.db.services.common_service import CommonService
25
- from api.utils import get_uuid, get_format_time, current_timestamp, datetime_format
26
  from api.db import StatusEnum
27
 
28
 
 
22
  from api.db.db_models import DB, UserTenant
23
  from api.db.db_models import User, Tenant
24
  from api.db.services.common_service import CommonService
25
+ from api.utils import get_uuid, current_timestamp, datetime_format
26
  from api.db import StatusEnum
27
 
28
 
api/ragflow_server.py CHANGED
@@ -21,10 +21,7 @@
21
  import logging
22
  import os
23
  from api.utils.log_utils import initRootLogger
24
- LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
25
- initRootLogger("ragflow_server", LOG_LEVELS)
26
 
27
- import os
28
  import signal
29
  import sys
30
  import time
@@ -44,6 +41,9 @@ from api.versions import get_ragflow_version
44
  from api.utils import show_configs
45
  from rag.settings import print_rag_settings
46
 
 
 
 
47
 
48
  def update_progress():
49
  while True:
 
21
  import logging
22
  import os
23
  from api.utils.log_utils import initRootLogger
 
 
24
 
 
25
  import signal
26
  import sys
27
  import time
 
41
  from api.utils import show_configs
42
  from rag.settings import print_rag_settings
43
 
44
+ LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
45
+ initRootLogger("ragflow_server", LOG_LEVELS)
46
+
47
 
48
  def update_progress():
49
  while True:
api/utils/api_utils.py CHANGED
@@ -36,7 +36,6 @@ from werkzeug.http import HTTP_STATUS_CODES
36
  from api.db.db_models import APIToken
37
  from api import settings
38
 
39
- from api import settings
40
  from api.utils import CustomJSONEncoder, get_uuid
41
  from api.utils import json_dumps
42
  from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC
 
36
  from api.db.db_models import APIToken
37
  from api import settings
38
 
 
39
  from api.utils import CustomJSONEncoder, get_uuid
40
  from api.utils import json_dumps
41
  from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC
api/validation.py CHANGED
@@ -45,5 +45,5 @@ try:
45
  pool = Pool(processes=1)
46
  thread = pool.apply_async(download_nltk_data)
47
  binary = thread.get(timeout=60)
48
- except Exception as e:
49
  print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True)
 
45
  pool = Pool(processes=1)
46
  thread = pool.apply_async(download_nltk_data)
47
  binary = thread.get(timeout=60)
48
+ except Exception:
49
  print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True)
deepdoc/parser/__init__.py CHANGED
@@ -18,4 +18,16 @@ from .ppt_parser import RAGFlowPptParser as PptParser
18
  from .html_parser import RAGFlowHtmlParser as HtmlParser
19
  from .json_parser import RAGFlowJsonParser as JsonParser
20
  from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
21
- from .txt_parser import RAGFlowTxtParser as TxtParser
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  from .html_parser import RAGFlowHtmlParser as HtmlParser
19
  from .json_parser import RAGFlowJsonParser as JsonParser
20
  from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
21
+ from .txt_parser import RAGFlowTxtParser as TxtParser
22
+
23
+ __all__ = [
24
+ "PdfParser",
25
+ "PlainParser",
26
+ "DocxParser",
27
+ "ExcelParser",
28
+ "PptParser",
29
+ "HtmlParser",
30
+ "JsonParser",
31
+ "MarkdownParser",
32
+ "TxtParser",
33
+ ]
deepdoc/parser/excel_parser.py CHANGED
@@ -29,7 +29,8 @@ class RAGFlowExcelParser:
29
  for sheetname in wb.sheetnames:
30
  ws = wb[sheetname]
31
  rows = list(ws.rows)
32
- if not rows: continue
 
33
 
34
  tb_rows_0 = "<tr>"
35
  for t in list(rows[0]):
@@ -40,7 +41,9 @@ class RAGFlowExcelParser:
40
  tb = ""
41
  tb += f"<table><caption>{sheetname}</caption>"
42
  tb += tb_rows_0
43
- for r in list(rows[1 + chunk_i * chunk_rows:1 + (chunk_i + 1) * chunk_rows]):
 
 
44
  tb += "<tr>"
45
  for i, c in enumerate(r):
46
  if c.value is None:
@@ -62,20 +65,21 @@ class RAGFlowExcelParser:
62
  for sheetname in wb.sheetnames:
63
  ws = wb[sheetname]
64
  rows = list(ws.rows)
65
- if not rows:continue
 
66
  ti = list(rows[0])
67
  for r in list(rows[1:]):
68
- l = []
69
  for i, c in enumerate(r):
70
  if not c.value:
71
  continue
72
  t = str(ti[i].value) if i < len(ti) else ""
73
  t += (":" if t else "") + str(c.value)
74
- l.append(t)
75
- l = "; ".join(l)
76
  if sheetname.lower().find("sheet") < 0:
77
- l += " ——" + sheetname
78
- res.append(l)
79
  return res
80
 
81
  @staticmethod
 
29
  for sheetname in wb.sheetnames:
30
  ws = wb[sheetname]
31
  rows = list(ws.rows)
32
+ if not rows:
33
+ continue
34
 
35
  tb_rows_0 = "<tr>"
36
  for t in list(rows[0]):
 
41
  tb = ""
42
  tb += f"<table><caption>{sheetname}</caption>"
43
  tb += tb_rows_0
44
+ for r in list(
45
+ rows[1 + chunk_i * chunk_rows : 1 + (chunk_i + 1) * chunk_rows]
46
+ ):
47
  tb += "<tr>"
48
  for i, c in enumerate(r):
49
  if c.value is None:
 
65
  for sheetname in wb.sheetnames:
66
  ws = wb[sheetname]
67
  rows = list(ws.rows)
68
+ if not rows:
69
+ continue
70
  ti = list(rows[0])
71
  for r in list(rows[1:]):
72
+ fields = []
73
  for i, c in enumerate(r):
74
  if not c.value:
75
  continue
76
  t = str(ti[i].value) if i < len(ti) else ""
77
  t += (":" if t else "") + str(c.value)
78
+ fields.append(t)
79
+ line = "; ".join(fields)
80
  if sheetname.lower().find("sheet") < 0:
81
+ line += " ——" + sheetname
82
+ res.append(line)
83
  return res
84
 
85
  @staticmethod
deepdoc/parser/html_parser.py CHANGED
@@ -36,7 +36,7 @@ class RAGFlowHtmlParser:
36
 
37
  @classmethod
38
  def parser_txt(cls, txt):
39
- if type(txt) != str:
40
  raise TypeError("txt type should be str!")
41
  html_doc = readability.Document(txt)
42
  title = html_doc.title()
 
36
 
37
  @classmethod
38
  def parser_txt(cls, txt):
39
+ if not isinstance(txt, str):
40
  raise TypeError("txt type should be str!")
41
  html_doc = readability.Document(txt)
42
  title = html_doc.title()
deepdoc/parser/json_parser.py CHANGED
@@ -22,7 +22,7 @@ class RAGFlowJsonParser:
22
  txt = binary.decode(encoding, errors="ignore")
23
  json_data = json.loads(txt)
24
  chunks = self.split_json(json_data, True)
25
- sections = [json.dumps(l, ensure_ascii=False) for l in chunks if l]
26
  return sections
27
 
28
  @staticmethod
 
22
  txt = binary.decode(encoding, errors="ignore")
23
  json_data = json.loads(txt)
24
  chunks = self.split_json(json_data, True)
25
+ sections = [json.dumps(line, ensure_ascii=False) for line in chunks if line]
26
  return sections
27
 
28
  @staticmethod
deepdoc/parser/pdf_parser.py CHANGED
@@ -752,7 +752,7 @@ class RAGFlowPdfParser:
752
  "x1": np.max([b["x1"] for b in bxs]),
753
  "bottom": np.max([b["bottom"] for b in bxs]) - ht
754
  }
755
- louts = [l for l in self.page_layout[pn] if l["type"] == ltype]
756
  ii = Recognizer.find_overlapped(b, louts, naive=True)
757
  if ii is not None:
758
  b = louts[ii]
@@ -763,7 +763,8 @@ class RAGFlowPdfParser:
763
  "layoutno", "")))
764
 
765
  left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"]
766
- if right < left: right = left + 1
 
767
  poss.append((pn + self.page_from, left, right, top, bott))
768
  return self.page_images[pn] \
769
  .crop((left * ZM, top * ZM,
@@ -845,7 +846,8 @@ class RAGFlowPdfParser:
845
  top = bx["top"] - self.page_cum_height[pn[0] - 1]
846
  bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
847
  page_images_cnt = len(self.page_images)
848
- if pn[-1] - 1 >= page_images_cnt: return ""
 
849
  while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
850
  bott -= self.page_images[pn[-1] - 1].size[1] / ZM
851
  pn.append(pn[-1] + 1)
@@ -889,7 +891,6 @@ class RAGFlowPdfParser:
889
  nonlocal mh, pw, lines, widths
890
  lines.append(line)
891
  widths.append(width(line))
892
- width_mean = np.mean(widths)
893
  mmj = self.proj_match(
894
  line["text"]) or line.get(
895
  "layout_type",
@@ -994,7 +995,7 @@ class RAGFlowPdfParser:
994
  else:
995
  self.is_english = False
996
 
997
- st = timer()
998
  for i, img in enumerate(self.page_images_x2):
999
  chars = self.page_chars[i] if not self.is_english else []
1000
  self.mean_height.append(
@@ -1028,8 +1029,8 @@ class RAGFlowPdfParser:
1028
 
1029
  self.page_cum_height = np.cumsum(self.page_cum_height)
1030
  assert len(self.page_cum_height) == len(self.page_images) + 1
1031
- if len(self.boxes) == 0 and zoomin < 9: self.__images__(fnm, zoomin * 3, page_from,
1032
- page_to, callback)
1033
 
1034
  def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
1035
  self.__images__(fnm, zoomin)
@@ -1168,7 +1169,7 @@ class PlainParser(object):
1168
  if not self.outlines:
1169
  logging.warning("Miss outlines")
1170
 
1171
- return [(l, "") for l in lines], []
1172
 
1173
  def crop(self, ck, need_position):
1174
  raise NotImplementedError
 
752
  "x1": np.max([b["x1"] for b in bxs]),
753
  "bottom": np.max([b["bottom"] for b in bxs]) - ht
754
  }
755
+ louts = [layout for layout in self.page_layout[pn] if layout["type"] == ltype]
756
  ii = Recognizer.find_overlapped(b, louts, naive=True)
757
  if ii is not None:
758
  b = louts[ii]
 
763
  "layoutno", "")))
764
 
765
  left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"]
766
+ if right < left:
767
+ right = left + 1
768
  poss.append((pn + self.page_from, left, right, top, bott))
769
  return self.page_images[pn] \
770
  .crop((left * ZM, top * ZM,
 
846
  top = bx["top"] - self.page_cum_height[pn[0] - 1]
847
  bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
848
  page_images_cnt = len(self.page_images)
849
+ if pn[-1] - 1 >= page_images_cnt:
850
+ return ""
851
  while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
852
  bott -= self.page_images[pn[-1] - 1].size[1] / ZM
853
  pn.append(pn[-1] + 1)
 
891
  nonlocal mh, pw, lines, widths
892
  lines.append(line)
893
  widths.append(width(line))
 
894
  mmj = self.proj_match(
895
  line["text"]) or line.get(
896
  "layout_type",
 
995
  else:
996
  self.is_english = False
997
 
998
+ # st = timer()
999
  for i, img in enumerate(self.page_images_x2):
1000
  chars = self.page_chars[i] if not self.is_english else []
1001
  self.mean_height.append(
 
1029
 
1030
  self.page_cum_height = np.cumsum(self.page_cum_height)
1031
  assert len(self.page_cum_height) == len(self.page_images) + 1
1032
+ if len(self.boxes) == 0 and zoomin < 9:
1033
+ self.__images__(fnm, zoomin * 3, page_from, page_to, callback)
1034
 
1035
  def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
1036
  self.__images__(fnm, zoomin)
 
1169
  if not self.outlines:
1170
  logging.warning("Miss outlines")
1171
 
1172
+ return [(line, "") for line in lines], []
1173
 
1174
  def crop(self, ck, need_position):
1175
  raise NotImplementedError
deepdoc/parser/resume/__init__.py CHANGED
@@ -15,21 +15,42 @@ import datetime
15
 
16
 
17
  def refactor(cv):
18
- for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]:
19
- if n in cv and cv[n] is not None: del cv[n]
 
 
 
 
 
 
 
 
20
  cv["is_deleted"] = 0
21
- if "basic" not in cv: cv["basic"] = {}
22
- if cv["basic"].get("photo2"): del cv["basic"]["photo2"]
 
 
23
 
24
- for n in ["education", "work", "certificate", "project", "language", "skill", "training"]:
25
- if n not in cv or cv[n] is None: continue
26
- if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()]
27
- if type(cv[n]) != type([]):
 
 
 
 
 
 
 
 
 
 
28
  del cv[n]
29
  continue
30
  vv = []
31
  for v in cv[n]:
32
- if "external" in v and v["external"] is not None: del v["external"]
 
33
  vv.append(v)
34
  cv[n] = {str(i): vv[i] for i in range(len(vv))}
35
 
@@ -42,24 +63,44 @@ def refactor(cv):
42
  cv["basic"][t] = cv["basic"][n]
43
  del cv["basic"][n]
44
 
45
- work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", ""))
46
- edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", ""))
 
 
 
 
 
 
47
 
48
  if work:
49
  cv["basic"]["work_start_time"] = work[0].get("start_time", "")
50
- cv["basic"]["management_experience"] = 'Y' if any(
51
- [w.get("management_experience", '') == 'Y' for w in work]) else 'N'
 
 
 
52
  cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")
53
 
54
- for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities",
55
- "corporation_type", "scale", "corporation_name"]:
 
 
 
 
 
 
 
 
56
  cv["basic"][n] = work[-1].get(n, "")
57
 
58
  if edu:
59
  for n in ["school_name", "discipline_name"]:
60
- if n in edu[-1]: cv["basic"][n] = edu[-1][n]
 
61
 
62
  cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
63
- if "contact" not in cv: cv["contact"] = {}
64
- if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "")
65
- return cv
 
 
 
15
 
16
 
17
  def refactor(cv):
18
+ for n in [
19
+ "raw_txt",
20
+ "parser_name",
21
+ "inference",
22
+ "ori_text",
23
+ "use_time",
24
+ "time_stat",
25
+ ]:
26
+ if n in cv and cv[n] is not None:
27
+ del cv[n]
28
  cv["is_deleted"] = 0
29
+ if "basic" not in cv:
30
+ cv["basic"] = {}
31
+ if cv["basic"].get("photo2"):
32
+ del cv["basic"]["photo2"]
33
 
34
+ for n in [
35
+ "education",
36
+ "work",
37
+ "certificate",
38
+ "project",
39
+ "language",
40
+ "skill",
41
+ "training",
42
+ ]:
43
+ if n not in cv or cv[n] is None:
44
+ continue
45
+ if isinstance(cv[n], dict):
46
+ cv[n] = [v for _, v in cv[n].items()]
47
+ if not isinstance(cv[n], list):
48
  del cv[n]
49
  continue
50
  vv = []
51
  for v in cv[n]:
52
+ if "external" in v and v["external"] is not None:
53
+ del v["external"]
54
  vv.append(v)
55
  cv[n] = {str(i): vv[i] for i in range(len(vv))}
56
 
 
63
  cv["basic"][t] = cv["basic"][n]
64
  del cv["basic"][n]
65
 
66
+ work = sorted(
67
+ [v for _, v in cv.get("work", {}).items()],
68
+ key=lambda x: x.get("start_time", ""),
69
+ )
70
+ edu = sorted(
71
+ [v for _, v in cv.get("education", {}).items()],
72
+ key=lambda x: x.get("start_time", ""),
73
+ )
74
 
75
  if work:
76
  cv["basic"]["work_start_time"] = work[0].get("start_time", "")
77
+ cv["basic"]["management_experience"] = (
78
+ "Y"
79
+ if any([w.get("management_experience", "") == "Y" for w in work])
80
+ else "N"
81
+ )
82
  cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")
83
 
84
+ for n in [
85
+ "annual_salary_from",
86
+ "annual_salary_to",
87
+ "industry_name",
88
+ "position_name",
89
+ "responsibilities",
90
+ "corporation_type",
91
+ "scale",
92
+ "corporation_name",
93
+ ]:
94
  cv["basic"][n] = work[-1].get(n, "")
95
 
96
  if edu:
97
  for n in ["school_name", "discipline_name"]:
98
+ if n in edu[-1]:
99
+ cv["basic"][n] = edu[-1][n]
100
 
101
  cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
102
+ if "contact" not in cv:
103
+ cv["contact"] = {}
104
+ if not cv["contact"].get("name"):
105
+ cv["contact"]["name"] = cv["basic"].get("name", "")
106
+ return cv
deepdoc/parser/resume/entities/corporations.py CHANGED
@@ -21,13 +21,18 @@ from . import regions
21
 
22
 
23
  current_file_path = os.path.dirname(os.path.abspath(__file__))
24
- GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
 
 
25
  GOODS["cid"] = GOODS["cid"].astype(str)
26
  GOODS = GOODS.set_index(["cid"])
27
- CORP_TKS = json.load(open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r"))
 
 
28
  GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
29
  CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))
30
 
 
31
  def baike(cid, default_v=0):
32
  global GOODS
33
  try:
@@ -39,27 +44,41 @@ def baike(cid, default_v=0):
39
 
40
  def corpNorm(nm, add_region=True):
41
  global CORP_TKS
42
- if not nm or type(nm)!=type(""):return ""
 
43
  nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower()
44
  nm = re.sub(r"&amp;", "&", nm)
45
  nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm)
46
- nm = re.sub(r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE)
47
- nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE)
48
- if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm
 
 
 
 
 
 
 
 
 
49
 
50
  tks = rag_tokenizer.tokenize(nm).split()
51
- reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
52
  nm = ""
53
  for t in tks:
54
- if regions.isName(t) or t in CORP_TKS:continue
55
- if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):nm += " "
 
 
56
  nm += t
57
 
58
  r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
59
- if r:nm = r.group(1)
 
60
  r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
61
- if r:nm = r.group(1)
62
- return nm.strip() + (("" if not reg else "(%s)"%reg[0]) if add_region else "")
 
63
 
64
 
65
  def rmNoise(n):
@@ -67,33 +86,40 @@ def rmNoise(n):
67
  n = re.sub(r"[,. &()()]+", "", n)
68
  return n
69
 
 
70
  GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
71
- for c,v in CORP_TAG.items():
72
  cc = corpNorm(rmNoise(c), False)
73
  if not cc:
74
  logging.debug(c)
75
- CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
 
76
 
77
  def is_good(nm):
78
  global GOOD_CORP
79
- if nm.find("外派")>=0:return False
 
80
  nm = rmNoise(nm)
81
  nm = corpNorm(nm, False)
82
  for n in GOOD_CORP:
83
  if re.match(r"[0-9a-zA-Z]+$", n):
84
- if n == nm: return True
85
- elif nm.find(n)>=0:return True
 
 
86
  return False
87
 
 
88
  def corp_tag(nm):
89
  global CORP_TAG
90
  nm = rmNoise(nm)
91
  nm = corpNorm(nm, False)
92
  for n in CORP_TAG.keys():
93
  if re.match(r"[0-9a-zA-Z., ]+$", n):
94
- if n == nm: return CORP_TAG[n]
95
- elif nm.find(n)>=0:
96
- if len(n)<3 and len(nm)/len(n)>=2:continue
 
 
97
  return CORP_TAG[n]
98
  return []
99
-
 
21
 
22
 
23
  current_file_path = os.path.dirname(os.path.abspath(__file__))
24
+ GOODS = pd.read_csv(
25
+ os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0
26
+ ).fillna(0)
27
  GOODS["cid"] = GOODS["cid"].astype(str)
28
  GOODS = GOODS.set_index(["cid"])
29
+ CORP_TKS = json.load(
30
+ open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r")
31
+ )
32
  GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
33
  CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))
34
 
35
+
36
  def baike(cid, default_v=0):
37
  global GOODS
38
  try:
 
44
 
45
  def corpNorm(nm, add_region=True):
46
  global CORP_TKS
47
+ if not nm or isinstance(nm, str):
48
+ return ""
49
  nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower()
50
  nm = re.sub(r"&amp;", "&", nm)
51
  nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm)
52
+ nm = re.sub(
53
+ r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE
54
+ )
55
+ nm = re.sub(
56
+ r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$",
57
+ "",
58
+ nm,
59
+ 10000,
60
+ re.IGNORECASE,
61
+ )
62
+ if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])):
63
+ return nm
64
 
65
  tks = rag_tokenizer.tokenize(nm).split()
66
+ reg = [t for i, t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
67
  nm = ""
68
  for t in tks:
69
+ if regions.isName(t) or t in CORP_TKS:
70
+ continue
71
+ if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):
72
+ nm += " "
73
  nm += t
74
 
75
  r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
76
+ if r:
77
+ nm = r.group(1)
78
  r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
79
+ if r:
80
+ nm = r.group(1)
81
+ return nm.strip() + (("" if not reg else "(%s)" % reg[0]) if add_region else "")
82
 
83
 
84
  def rmNoise(n):
 
86
  n = re.sub(r"[,. &()()]+", "", n)
87
  return n
88
 
89
+
90
  GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
91
+ for c, v in CORP_TAG.items():
92
  cc = corpNorm(rmNoise(c), False)
93
  if not cc:
94
  logging.debug(c)
95
+ CORP_TAG = {corpNorm(rmNoise(c), False): v for c, v in CORP_TAG.items()}
96
+
97
 
98
  def is_good(nm):
99
  global GOOD_CORP
100
+ if nm.find("外派") >= 0:
101
+ return False
102
  nm = rmNoise(nm)
103
  nm = corpNorm(nm, False)
104
  for n in GOOD_CORP:
105
  if re.match(r"[0-9a-zA-Z]+$", n):
106
+ if n == nm:
107
+ return True
108
+ elif nm.find(n) >= 0:
109
+ return True
110
  return False
111
 
112
+
113
  def corp_tag(nm):
114
  global CORP_TAG
115
  nm = rmNoise(nm)
116
  nm = corpNorm(nm, False)
117
  for n in CORP_TAG.keys():
118
  if re.match(r"[0-9a-zA-Z., ]+$", n):
119
+ if n == nm:
120
+ return CORP_TAG[n]
121
+ elif nm.find(n) >= 0:
122
+ if len(n) < 3 and len(nm) / len(n) >= 2:
123
+ continue
124
  return CORP_TAG[n]
125
  return []
 
deepdoc/parser/resume/entities/degrees.py CHANGED
@@ -11,27 +11,31 @@
11
  # limitations under the License.
12
  #
13
 
14
- TBL = {"94":"EMBA",
15
- "6":"MBA",
16
- "95":"MPA",
17
- "92":"专升本",
18
- "4":"专科",
19
- "90":"中专",
20
- "91":"中技",
21
- "86":"初中",
22
- "3":"博士",
23
- "10":"博士后",
24
- "1":"本科",
25
- "2":"硕士",
26
- "87":"职高",
27
- "89":"高中"
 
28
  }
29
 
30
- TBL_ = {v:k for k,v in TBL.items()}
 
31
 
32
  def get_name(id):
33
  return TBL.get(str(id), "")
34
 
 
35
  def get_id(nm):
36
- if not nm:return ""
 
37
  return TBL_.get(nm.upper().strip(), "")
 
11
  # limitations under the License.
12
  #
13
 
14
+ TBL = {
15
+ "94": "EMBA",
16
+ "6": "MBA",
17
+ "95": "MPA",
18
+ "92": "专升本",
19
+ "4": "专科",
20
+ "90": "中专",
21
+ "91": "中技",
22
+ "86": "初中",
23
+ "3": "博士",
24
+ "10": "博士后",
25
+ "1": "本科",
26
+ "2": "硕士",
27
+ "87": "职高",
28
+ "89": "高中",
29
  }
30
 
31
+ TBL_ = {v: k for k, v in TBL.items()}
32
+
33
 
34
  def get_name(id):
35
  return TBL.get(str(id), "")
36
 
37
+
38
  def get_id(nm):
39
+ if not nm:
40
+ return ""
41
  return TBL_.get(nm.upper().strip(), "")
deepdoc/parser/resume/entities/industries.py CHANGED
@@ -11,694 +11,699 @@
11
  # limitations under the License.
12
  #
13
 
14
- TBL = {"1":{"name":"IT/通信/电子","parent":"0"},
15
- "2":{"name":"互联网","parent":"0"},
16
- "3":{"name":"电子商务","parent":"2"},
17
- "4":{"name":"互联网金融","parent":"2"},
18
- "5":{"name":"网络游戏","parent":"2"},
19
- "6":{"name":"社交网络平台","parent":"2"},
20
- "7":{"name":"视频音乐","parent":"2"},
21
- "9":{"name":"安全","parent":"2"},
22
- "10":{"name":"云计算","parent":"2"},
23
- "12":{"name":"工具类客户端应用","parent":"2"},
24
- "13":{"name":"互联网广告","parent":"2"},
25
- "14":{"name":"企业互联网服务","parent":"2"},
26
- "16":{"name":"在线教育","parent":"2"},
27
- "17":{"name":"在线医疗","parent":"2"},
28
- "19":{"name":"B2B","parent":"3"},
29
- "20":{"name":"B2C","parent":"3"},
30
- "21":{"name":"C2C","parent":"3"},
31
- "22":{"name":"生活信息本地化","parent":"3"},
32
- "23":{"name":"在线旅游","parent":"2"},
33
- "24":{"name":"第三方支付","parent":"4"},
34
- "26":{"name":"客户端游戏","parent":"5"},
35
- "27":{"name":"网页游戏","parent":"5"},
36
- "28":{"name":"手机游戏","parent":"5"},
37
- "29":{"name":"微博","parent":"6"},
38
- "30":{"name":"社交网站","parent":"6"},
39
- "31":{"name":"在线视频","parent":"7"},
40
- "32":{"name":"在线音乐","parent":"7"},
41
- "35":{"name":"企业安全","parent":"9"},
42
- "36":{"name":"个人安全","parent":"9"},
43
- "37":{"name":"企业级云服务","parent":"10"},
44
- "38":{"name":"个人级云服务","parent":"10"},
45
- "43":{"name":"输入法","parent":"12"},
46
- "44":{"name":"浏览器","parent":"12"},
47
- "45":{"name":"词典","parent":"12"},
48
- "46":{"name":"播放器","parent":"12"},
49
- "47":{"name":"下载器","parent":"12"},
50
- "48":{"name":"IM","parent":"12"},
51
- "49":{"name":"广告服务","parent":"13"},
52
- "50":{"name":"第三方广告网络平台","parent":"13"},
53
- "51":{"name":"媒体代理","parent":"13"},
54
- "52":{"name":"创意代理","parent":"13"},
55
- "53":{"name":"IT-综合","parent":"1"},
56
- "71":{"name":"团购","parent":"3"},
57
- "72":{"name":"地图","parent":"2"},
58
- "73":{"name":"数据存储","parent":"2"},
59
- "414":{"name":"计算机软件","parent":"1"},
60
- "415":{"name":"计算机硬件","parent":"1"},
61
- "416":{"name":"计算机服务(系统、数据服务、维修)","parent":"1"},
62
- "417":{"name":"通信/电信/网络设备","parent":"1"},
63
- "418":{"name":"通信/电信运营、增值服务","parent":"1"},
64
- "419":{"name":"电子技术/半导体/集成电路","parent":"1"},
65
- "472":{"name":"P2P网贷","parent":"4"},
66
- "473":{"name":"互联网理财","parent":"4"},
67
- "474":{"name":"婚恋","parent":"6"},
68
- "476":{"name":"虚拟化","parent":"10"},
69
- "477":{"name":"邮箱","parent":"12"},
70
- "478":{"name":"商业智能","parent":"14"},
71
- "479":{"name":"企业建站","parent":"14"},
72
- "480":{"name":"安防","parent":"14"},
73
- "481":{"name":"网络营销","parent":"2"},
74
- "487":{"name":"智能终端","parent":"2"},
75
- "488":{"name":"移动互联网","parent":"2"},
76
- "489":{"name":"数字城市","parent":"2"},
77
- "490":{"name":"大数据","parent":"2"},
78
- "491":{"name":"互联网人力资源","parent":"2"},
79
- "492":{"name":"舆情监控","parent":"2"},
80
- "493":{"name":"移动营销","parent":"481"},
81
- "494":{"name":"微博营销","parent":"481"},
82
- "495":{"name":"精准营销","parent":"481"},
83
- "496":{"name":"海外营销","parent":"481"},
84
- "497":{"name":"微信营销","parent":"481"},
85
- "498":{"name":"智能手机","parent":"487"},
86
- "499":{"name":"可穿戴设备","parent":"487"},
87
- "500":{"name":"智能电视","parent":"487"},
88
- "501":{"name":"WAP","parent":"488"},
89
- "502":{"name":"物联网","parent":"489"},
90
- "503":{"name":"O2O","parent":"489"},
91
- "504":{"name":"数字出版","parent":"489"},
92
- "505":{"name":"搜索","parent":"2"},
93
- "506":{"name":"垂直搜索","parent":"505"},
94
- "507":{"name":"无线搜索","parent":"505"},
95
- "508":{"name":"网页搜索","parent":"505"},
96
- "509":{"name":"网址导航","parent":"2"},
97
- "510":{"name":"门户","parent":"2"},
98
- "511":{"name":"网络文学","parent":"2"},
99
- "512":{"name":"自媒体","parent":"2"},
100
- "513":{"name":"金融","parent":"0"},
101
- "514":{"name":"建筑与房地产","parent":"0"},
102
- "515":{"name":"专业服务","parent":"0"},
103
- "516":{"name":"教育培训","parent":"0"},
104
- "517":{"name":"文化传媒","parent":"0"},
105
- "518":{"name":"消费品","parent":"0"},
106
- "519":{"name":"工业","parent":"0"},
107
- "520":{"name":"交通物流","parent":"0"},
108
- "521":{"name":"贸易","parent":"0"},
109
- "522":{"name":"医药","parent":"0"},
110
- "523":{"name":"医疗器械","parent":"522"},
111
- "524":{"name":"保健品","parent":"518"},
112
- "525":{"name":"服务业","parent":"0"},
113
- "526":{"name":"能源/矿产/环保","parent":"0"},
114
- "527":{"name":"化工","parent":"0"},
115
- "528":{"name":"政府","parent":"0"},
116
- "529":{"name":"公共事业","parent":"0"},
117
- "530":{"name":"非盈利机构","parent":"0"},
118
- "531":{"name":"农业","parent":"1131"},
119
- "532":{"name":"林业","parent":"1131"},
120
- "533":{"name":"畜牧业","parent":"1131"},
121
- "534":{"name":"渔业","parent":"1131"},
122
- "535":{"name":"学术科研","parent":"0"},
123
- "536":{"name":"零售","parent":"0"},
124
- "537":{"name":"银行","parent":"513"},
125
- "538":{"name":"保险","parent":"513"},
126
- "539":{"name":"��券","parent":"513"},
127
- "540":{"name":"基金","parent":"513"},
128
- "541":{"name":"信托","parent":"513"},
129
- "542":{"name":"担保","parent":"513"},
130
- "543":{"name":"典当","parent":"513"},
131
- "544":{"name":"拍卖","parent":"513"},
132
- "545":{"name":"投资/融资","parent":"513"},
133
- "546":{"name":"期货","parent":"513"},
134
- "547":{"name":"房地产开发","parent":"514"},
135
- "548":{"name":"工程施工","parent":"514"},
136
- "549":{"name":"建筑设计","parent":"514"},
137
- "550":{"name":"房地产代理","parent":"514"},
138
- "551":{"name":"物业管理","parent":"514"},
139
- "552":{"name":"室内设计","parent":"514"},
140
- "553":{"name":"装修装潢","parent":"514"},
141
- "554":{"name":"市政工程","parent":"514"},
142
- "555":{"name":"工程造价","parent":"514"},
143
- "556":{"name":"工程监理","parent":"514"},
144
- "557":{"name":"环境工程","parent":"514"},
145
- "558":{"name":"园林景观","parent":"514"},
146
- "559":{"name":"法律","parent":"515"},
147
- "560":{"name":"人力资源","parent":"515"},
148
- "561":{"name":"会计","parent":"1125"},
149
- "562":{"name":"审计","parent":"515"},
150
- "563":{"name":"检测认证","parent":"515"},
151
- "565":{"name":"翻译","parent":"515"},
152
- "566":{"name":"中介","parent":"515"},
153
- "567":{"name":"咨询","parent":"515"},
154
- "568":{"name":"外包服务","parent":"515"},
155
- "569":{"name":"家教","parent":"516"},
156
- "570":{"name":"早教","parent":"516"},
157
- "571":{"name":"职业技能培训","parent":"516"},
158
- "572":{"name":"外语培训","parent":"516"},
159
- "573":{"name":"设计培训","parent":"516"},
160
- "574":{"name":"IT培训","parent":"516"},
161
- "575":{"name":"文艺体育培训","parent":"516"},
162
- "576":{"name":"学历教育","parent":"516"},
163
- "577":{"name":"管理培训","parent":"516"},
164
- "578":{"name":"民办基础教育","parent":"516"},
165
- "579":{"name":"广告","parent":"517"},
166
- "580":{"name":"媒体","parent":"517"},
167
- "581":{"name":"会展","parent":"517"},
168
- "582":{"name":"公关","parent":"517"},
169
- "583":{"name":"影视","parent":"517"},
170
- "584":{"name":"艺术","parent":"517"},
171
- "585":{"name":"文化传播","parent":"517"},
172
- "586":{"name":"娱乐","parent":"517"},
173
- "587":{"name":"体育","parent":"517"},
174
- "588":{"name":"出版","parent":"517"},
175
- "589":{"name":"休闲","parent":"517"},
176
- "590":{"name":"动漫","parent":"517"},
177
- "591":{"name":"市场推广","parent":"517"},
178
- "592":{"name":"市场研究","parent":"517"},
179
- "593":{"name":"食品","parent":"1129"},
180
- "594":{"name":"饮料","parent":"1129"},
181
- "595":{"name":"烟草","parent":"1129"},
182
- "596":{"name":"酒品","parent":"518"},
183
- "597":{"name":"服饰","parent":"518"},
184
- "598":{"name":"纺织","parent":"518"},
185
- "599":{"name":"化妆品","parent":"1129"},
186
- "600":{"name":"日用品","parent":"1129"},
187
- "601":{"name":"家电","parent":"518"},
188
- "602":{"name":"家具","parent":"518"},
189
- "603":{"name":"办公用品","parent":"518"},
190
- "604":{"name":"奢侈品","parent":"518"},
191
- "605":{"name":"珠宝","parent":"518"},
192
- "606":{"name":"数码产品","parent":"518"},
193
- "607":{"name":"玩具","parent":"518"},
194
- "608":{"name":"图书","parent":"518"},
195
- "609":{"name":"音像","parent":"518"},
196
- "610":{"name":"钟表","parent":"518"},
197
- "611":{"name":"箱包","parent":"518"},
198
- "612":{"name":"母婴","parent":"518"},
199
- "613":{"name":"营养保健","parent":"518"},
200
- "614":{"name":"户外用品","parent":"518"},
201
- "615":{"name":"健身器材","parent":"518"},
202
- "616":{"name":"乐器","parent":"518"},
203
- "617":{"name":"汽车用品","parent":"518"},
204
- "619":{"name":"厨具","parent":"518"},
205
- "620":{"name":"机械制造","parent":"519"},
206
- "621":{"name":"流体控制","parent":"519"},
207
- "622":{"name":"自动化控制","parent":"519"},
208
- "623":{"name":"仪器仪表","parent":"519"},
209
- "624":{"name":"航空/航天","parent":"519"},
210
- "625":{"name":"交通设施","parent":"519"},
211
- "626":{"name":"工业电子","parent":"519"},
212
- "627":{"name":"建材","parent":"519"},
213
- "628":{"name":"五金材料","parent":"519"},
214
- "629":{"name":"汽车","parent":"519"},
215
- "630":{"name":"印刷","parent":"519"},
216
- "631":{"name":"造纸","parent":"519"},
217
- "632":{"name":"包装","parent":"519"},
218
- "633":{"name":"原材料及加工","parent":"519"},
219
- "634":{"name":"物流","parent":"520"},
220
- "635":{"name":"仓储","parent":"520"},
221
- "636":{"name":"客运","parent":"520"},
222
- "637":{"name":"快递","parent":"520"},
223
- "638":{"name":"化学药","parent":"522"},
224
- "639":{"name":"中药","parent":"522"},
225
- "640":{"name":"生物制药","parent":"522"},
226
- "641":{"name":"兽药","parent":"522"},
227
- "642":{"name":"农药","parent":"522"},
228
- "643":{"name":"CRO","parent":"522"},
229
- "644":{"name":"消毒","parent":"522"},
230
- "645":{"name":"医药商业","parent":"522"},
231
- "646":{"name":"医疗服务","parent":"522"},
232
- "647":{"name":"医疗器械","parent":"523"},
233
- "648":{"name":"制药设备","parent":"523"},
234
- "649":{"name":"医用耗材","parent":"523"},
235
- "650":{"name":"手术器械","parent":"523"},
236
- "651":{"name":"保健器材","parent":"524"},
237
- "652":{"name":"性保健品","parent":"524"},
238
- "653":{"name":"医药保养","parent":"524"},
239
- "654":{"name":"医用保健","parent":"524"},
240
- "655":{"name":"酒店","parent":"525"},
241
- "656":{"name":"餐饮","parent":"525"},
242
- "657":{"name":"��游","parent":"525"},
243
- "658":{"name":"生活服务","parent":"525"},
244
- "659":{"name":"保健服务","parent":"525"},
245
- "660":{"name":"运动健身","parent":"525"},
246
- "661":{"name":"家政服务","parent":"525"},
247
- "662":{"name":"婚庆服务","parent":"525"},
248
- "663":{"name":"租赁服务","parent":"525"},
249
- "664":{"name":"维修服务","parent":"525"},
250
- "665":{"name":"石油天然气","parent":"526"},
251
- "666":{"name":"电力","parent":"526"},
252
- "667":{"name":"新能源","parent":"526"},
253
- "668":{"name":"水利","parent":"526"},
254
- "669":{"name":"矿产","parent":"526"},
255
- "670":{"name":"采掘业","parent":"526"},
256
- "671":{"name":"冶炼","parent":"526"},
257
- "672":{"name":"环保","parent":"526"},
258
- "673":{"name":"无机化工原料","parent":"527"},
259
- "674":{"name":"有机化工原料","parent":"527"},
260
- "675":{"name":"精细化学品","parent":"527"},
261
- "676":{"name":"化工设备","parent":"527"},
262
- "677":{"name":"化工工程","parent":"527"},
263
- "678":{"name":"资产管理","parent":"513"},
264
- "679":{"name":"金融租赁","parent":"513"},
265
- "680":{"name":"征信及信评机构","parent":"513"},
266
- "681":{"name":"资产评估机构","parent":"513"},
267
- "683":{"name":"金融监管机构","parent":"513"},
268
- "684":{"name":"国际贸易","parent":"521"},
269
- "685":{"name":"海关","parent":"521"},
270
- "686":{"name":"购物中心","parent":"536"},
271
- "687":{"name":"超市","parent":"536"},
272
- "688":{"name":"便利店","parent":"536"},
273
- "689":{"name":"专卖店","parent":"536"},
274
- "690":{"name":"专业店","parent":"536"},
275
- "691":{"name":"百货店","parent":"536"},
276
- "692":{"name":"杂货店","parent":"536"},
277
- "693":{"name":"个人银行","parent":"537"},
278
- "695":{"name":"私人银行","parent":"537"},
279
- "696":{"name":"公司银行","parent":"537"},
280
- "697":{"name":"投资银行","parent":"537"},
281
- "698":{"name":"政策性银行","parent":"537"},
282
- "699":{"name":"中央银行","parent":"537"},
283
- "700":{"name":"人寿险","parent":"538"},
284
- "701":{"name":"财产险","parent":"538"},
285
- "702":{"name":"再保险","parent":"538"},
286
- "703":{"name":"养老险","parent":"538"},
287
- "704":{"name":"保险代理公司","parent":"538"},
288
- "705":{"name":"公募基金","parent":"540"},
289
- "707":{"name":"私募基金","parent":"540"},
290
- "708":{"name":"第三方理财","parent":"679"},
291
- "709":{"name":"资产管理公司","parent":"679"},
292
- "711":{"name":"房产中介","parent":"566"},
293
- "712":{"name":"职业中介","parent":"566"},
294
- "713":{"name":"婚姻中介","parent":"566"},
295
- "714":{"name":"战略咨询","parent":"567"},
296
- "715":{"name":"投资咨询","parent":"567"},
297
- "716":{"name":"心理咨询","parent":"567"},
298
- "717":{"name":"留学移民咨询","parent":"567"},
299
- "718":{"name":"工商注册代理","parent":"568"},
300
- "719":{"name":"商标专利代理","parent":"568"},
301
- "720":{"name":"财务代理","parent":"568"},
302
- "721":{"name":"工程机械","parent":"620"},
303
- "722":{"name":"农业机械","parent":"620"},
304
- "723":{"name":"海工设备","parent":"620"},
305
- "724":{"name":"包装机械","parent":"620"},
306
- "725":{"name":"印刷机械","parent":"620"},
307
- "726":{"name":"数控机床","parent":"620"},
308
- "727":{"name":"矿山机械","parent":"620"},
309
- "728":{"name":"水泵","parent":"621"},
310
- "729":{"name":"管道","parent":"621"},
311
- "730":{"name":"阀门","parent":"621"},
312
- "732":{"name":"压缩机","parent":"621"},
313
- "733":{"name":"集散控制系统","parent":"622"},
314
- "734":{"name":"远程控制","parent":"622"},
315
- "735":{"name":"液压系统","parent":"622"},
316
- "736":{"name":"楼宇智能化","parent":"622"},
317
- "737":{"name":"飞机制造","parent":"624"},
318
- "738":{"name":"航空公司","parent":"624"},
319
- "739":{"name":"发动机","parent":"624"},
320
- "740":{"name":"复合材料","parent":"624"},
321
- "741":{"name":"高铁","parent":"625"},
322
- "742":{"name":"地铁","parent":"625"},
323
- "743":{"name":"信号传输","parent":"625"},
324
- "745":{"name":"结构材料","parent":"627"},
325
- "746":{"name":"装饰材料","parent":"627"},
326
- "747":{"name":"专用材料","parent":"627"},
327
- "749":{"name":"经销商集团","parent":"629"},
328
- "750":{"name":"整车制造","parent":"629"},
329
- "751":{"name":"汽车零配件","parent":"629"},
330
- "752":{"name":"外型设计","parent":"629"},
331
- "753":{"name":"平版印刷","parent":"630"},
332
- "754":{"name":"凸版印刷","parent":"630"},
333
- "755":{"name":"凹版印刷","parent":"630"},
334
- "756":{"name":"孔版印刷","parent":"630"},
335
- "757":{"name":"印刷用纸","parent":"631"},
336
- "758":{"name":"书写、制图及复制用纸","parent":"631"},
337
- "759":{"name":"包装用纸","parent":"631"},
338
- "760":{"name":"生活、卫生及装饰用纸","parent":"631"},
339
- "761":{"name":"技术用纸","parent":"631"},
340
- "762":{"name":"加工纸原纸","parent":"631"},
341
- "763":{"name":"食品包装","parent":"632"},
342
- "764":{"name":"医药包装","parent":"632"},
343
- "765":{"name":"日化包装","parent":"632"},
344
- "766":{"name":"物流包装","parent":"632"},
345
- "767":{"name":"礼品包装","parent":"632"},
346
- "768":{"name":"电子五金包装","parent":"632"},
347
- "769":{"name":"汽车服务","parent":"525"},
348
- "770":{"name":"汽车保养","parent":"769"},
349
- "771":{"name":"租车","parent":"769"},
350
- "773":{"name":"出租车","parent":"769"},
351
- "774":{"name":"代驾","parent":"769"},
352
- "775":{"name":"发电","parent":"666"},
353
- "777":{"name":"输配电","parent":"666"},
354
- "779":{"name":"风电","parent":"667"},
355
- "780":{"name":"光伏/太阳能","parent":"667"},
356
- "781":{"name":"生物质发电","parent":"667"},
357
- "782":{"name":"煤化工","parent":"667"},
358
- "783":{"name":"垃圾发电","parent":"667"},
359
- "784":{"name":"核电","parent":"667"},
360
- "785":{"name":"能源矿产","parent":"669"},
361
- "786":{"name":"金属矿产","parent":"669"},
362
- "787":{"name":"非金属矿产","parent":"669"},
363
- "788":{"name":"水气矿产","parent":"669"},
364
- "789":{"name":"锅炉","parent":"775"},
365
- "790":{"name":"发电机","parent":"775"},
366
- "791":{"name":"汽轮机","parent":"775"},
367
- "792":{"name":"燃机","parent":"775"},
368
- "793":{"name":"冷却","parent":"775"},
369
- "794":{"name":"电力设计院","parent":"775"},
370
- "795":{"name":"高压输配电","parent":"777"},
371
- "796":{"name":"中压输配电","parent":"777"},
372
- "797":{"name":"低压输配电","parent":"777"},
373
- "798":{"name":"继电保护","parent":"777"},
374
- "799":{"name":"智能电网","parent":"777"},
375
- "800":{"name":"小学","parent":"516"},
376
- "801":{"name":"电动车","parent":"519"},
377
- "802":{"name":"皮具箱包","parent":"518"},
378
- "803":{"name":"医药制造","parent":"522"},
379
- "804":{"name":"电器销售","parent":"536"},
380
- "805":{"name":"塑料制品","parent":"527"},
381
- "806":{"name":"公益基金会","parent":"530"},
382
- "807":{"name":"美发服务","parent":"525"},
383
- "808":{"name":"农业养殖","parent":"531"},
384
- "809":{"name":"金融服务","parent":"513"},
385
- "810":{"name":"商业地产综合体","parent":"514"},
386
- "811":{"name":"美容服务","parent":"525"},
387
- "812":{"name":"灯饰","parent":"518"},
388
- "813":{"name":"油墨颜料产品","parent":"527"},
389
- "814":{"name":"眼镜制造","parent":"518"},
390
- "815":{"name":"农业生物技术","parent":"531"},
391
- "816":{"name":"体育用品","parent":"518"},
392
- "817":{"name":"保健用品","parent":"524"},
393
- "818":{"name":"化学化工产品","parent":"527"},
394
- "819":{"name":"饲料","parent":"531"},
395
- "821":{"name":"保安服务","parent":"525"},
396
- "822":{"name":"干细胞技术","parent":"522"},
397
- "824":{"name":"农药化肥","parent":"527"},
398
- "825":{"name":"卫生洁具","parent":"518"},
399
- "826":{"name":"体育器材、场馆","parent":"518"},
400
- "827":{"name":"饲料加工","parent":"531"},
401
- "828":{"name":"测绘服务","parent":"529"},
402
- "830":{"name":"金属船舶制造","parent":"519"},
403
- "831":{"name":"基因工程","parent":"522"},
404
- "832":{"name":"花卉服务","parent":"536"},
405
- "833":{"name":"农业种植","parent":"531"},
406
- "834":{"name":"皮革制品","parent":"518"},
407
- "835":{"name":"地理信息加工服务","parent":"529"},
408
- "836":{"name":"机器人","parent":"519"},
409
- "837":{"name":"礼品","parent":"518"},
410
- "838":{"name":"理发及美容服务","parent":"525"},
411
- "839":{"name":"其他清洁服务","parent":"525"},
412
- "840":{"name":"硅胶材料","parent":"527"},
413
- "841":{"name":"茶叶销售","parent":"518"},
414
- "842":{"name":"彩票活动","parent":"529"},
415
- "843":{"name":"化妆培训","parent":"516"},
416
- "844":{"name":"鞋业","parent":"518"},
417
- "845":{"name":"酒店用品","parent":"518"},
418
- "846":{"name":"复合材料","parent":"527"},
419
- "847":{"name":"房地产工程建设","parent":"548"},
420
- "848":{"name":"知识产权服务","parent":"559"},
421
- "849":{"name":"新型建材","parent":"627"},
422
- "850":{"name":"企业投资咨询","parent":"567"},
423
- "851":{"name":"含乳饮料和植物蛋白饮料制造","parent":"594"},
424
- "852":{"name":"汽车检测设备","parent":"629"},
425
- "853":{"name":"手机通讯器材","parent":"417"},
426
- "854":{"name":"环保材料","parent":"672"},
427
- "855":{"name":"交通设施","parent":"554"},
428
- "856":{"name":"电子器件","parent":"419"},
429
- "857":{"name":"啤酒","parent":"594"},
430
- "858":{"name":"生态旅游","parent":"657"},
431
- "859":{"name":"自动化设备","parent":"626"},
432
- "860":{"name":"软件开发","parent":"414"},
433
- "861":{"name":"葡萄酒销售","parent":"594"},
434
- "862":{"name":"钢材","parent":"633"},
435
- "863":{"name":"餐饮培训","parent":"656"},
436
- "864":{"name":"速冻食品","parent":"593"},
437
- "865":{"name":"空气环保","parent":"672"},
438
- "866":{"name":"互联网房地产经纪服务","parent":"550"},
439
- "867":{"name":"食品添加剂","parent":"593"},
440
- "868":{"name":"演艺传播","parent":"585"},
441
- "869":{"name":"信用卡","parent":"537"},
442
- "870":{"name":"报纸期刊广告","parent":"579"},
443
- "871":{"name":"摄影","parent":"525"},
444
- "872":{"name":"手机软件","parent":"414"},
445
- "873":{"name":"地坪建材","parent":"627"},
446
- "874":{"name":"企业管理咨询","parent":"567"},
447
- "875":{"name":"幼儿教育","parent":"570"},
448
- "876":{"name":"系统集成","parent":"416"},
449
- "877":{"name":"皮革服饰","parent":"597"},
450
- "878":{"name":"保健食品","parent":"593"},
451
- "879":{"name":"叉车","parent":"620"},
452
- "880":{"name":"厨卫电器","parent":"601"},
453
- "882":{"name":"地暖设备","parent":"627"},
454
- "883":{"name":"钢结构制造","parent":"548"},
455
- "884":{"name":"投影机","parent":"606"},
456
- "885":{"name":"啤酒销售","parent":"594"},
457
- "886":{"name":"度假村旅游","parent":"657"},
458
- "887":{"name":"电力元件设备","parent":"626"},
459
- "888":{"name":"管理软件","parent":"414"},
460
- "889":{"name":"轴承","parent":"628"},
461
- "890":{"name":"餐饮设备","parent":"656"},
462
- "891":{"name":"肉制品及副产品加工","parent":"593"},
463
- "892":{"name":"艺术收藏品投资交易","parent":"584"},
464
- "893":{"name":"净水器","parent":"601"},
465
- "894":{"name":"进口食品","parent":"593"},
466
- "895":{"name":"娱乐文化传播","parent":"585"},
467
- "896":{"name":"文化传播","parent":"585"},
468
- "897":{"name":"商旅传媒","parent":"580"},
469
- "898":{"name":"广告设计制作","parent":"579"},
470
- "899":{"name":"金属丝绳及其制品制造","parent":"627"},
471
- "900":{"name":"建筑涂料","parent":"627"},
472
- "901":{"name":"抵押贷款","parent":"543"},
473
- "902":{"name":"早教","parent":"570"},
474
- "903":{"name":"电影放映","parent":"583"},
475
- "904":{"name":"内衣服饰","parent":"597"},
476
- "905":{"name":"无线网络通信","parent":"418"},
477
- "906":{"name":"记忆卡","parent":"415"},
478
- "907":{"name":"女装服饰","parent":"597"},
479
- "908":{"name":"建筑机械","parent":"620"},
480
- "909":{"name":"制冷电器","parent":"601"},
481
- "910":{"name":"通信设备","parent":"417"},
482
- "911":{"name":"空调设备","parent":"601"},
483
- "912":{"name":"建筑装饰","parent":"553"},
484
- "913":{"name":"办公设备","parent":"603"},
485
- "916":{"name":"数据处理软件","parent":"414"},
486
- "917":{"name":"葡萄酒贸易","parent":"594"},
487
- "918":{"name":"通讯器材","parent":"417"},
488
- "919":{"name":"铜业","parent":"633"},
489
- "920":{"name":"食堂","parent":"656"},
490
- "921":{"name":"糖果零食","parent":"593"},
491
- "922":{"name":"文化艺术传播","parent":"584"},
492
- "923":{"name":"太阳能电器","parent":"601"},
493
- "924":{"name":"药品零售","parent":"645"},
494
- "925":{"name":"果蔬食品","parent":"593"},
495
- "926":{"name":"文化活动策划","parent":"585"},
496
- "928":{"name":"汽车广告","parent":"657"},
497
- "929":{"name":"条码设备","parent":"630"},
498
- "930":{"name":"建筑石材","parent":"627"},
499
- "931":{"name":"贵金属","parent":"545"},
500
- "932":{"name":"体育","parent":"660"},
501
- "933":{"name":"金融信息服务","parent":"414"},
502
- "934":{"name":"玻璃建材","parent":"627"},
503
- "935":{"name":"家教","parent":"569"},
504
- "936":{"name":"歌舞厅娱乐活动","parent":"586"},
505
- "937":{"name":"计算机服务器","parent":"415"},
506
- "938":{"name":"管道","parent":"627"},
507
- "939":{"name":"婴幼儿服饰","parent":"597"},
508
- "940":{"name":"热水器","parent":"601"},
509
- "941":{"name":"计算机及零部件制造","parent":"415"},
510
- "942":{"name":"钢铁贸易","parent":"633"},
511
- "944":{"name":"包装材料","parent":"632"},
512
- "945":{"name":"计算机办公设备","parent":"603"},
513
- "946":{"name":"白酒","parent":"594"},
514
- "948":{"name":"发动机","parent":"620"},
515
- "949":{"name":"快餐服务","parent":"656"},
516
- "950":{"name":"酒类销售","parent":"594"},
517
- "951":{"name":"电子产品、机电设备","parent":"626"},
518
- "952":{"name":"激光设备","parent":"626"},
519
- "953":{"name":"餐饮策划","parent":"656"},
520
- "954":{"name":"饮料、食品","parent":"594"},
521
- "955":{"name":"文化娱乐经纪","parent":"585"},
522
- "956":{"name":"天然气","parent":"665"},
523
- "957":{"name":"农副食品","parent":"593"},
524
- "958":{"name":"艺术表演","parent":"585"},
525
- "959":{"name":"石膏、水泥制品及类似制品制造","parent":"627"},
526
- "960":{"name":"橱柜","parent":"602"},
527
- "961":{"name":"管理培训","parent":"577"},
528
- "962":{"name":"男装服饰","parent":"597"},
529
- "963":{"name":"化肥制造","parent":"675"},
530
- "964":{"name":"童装服饰","parent":"597"},
531
- "965":{"name":"电源电池","parent":"626"},
532
- "966":{"name":"家电维修","parent":"664"},
533
- "967":{"name":"光电子器件","parent":"419"},
534
- "968":{"name":"旅行社服务","parent":"657"},
535
- "969":{"name":"电线、电缆制造","parent":"626"},
536
- "970":{"name":"软件开发、信息系统集成","parent":"419"},
537
- "971":{"name":"白酒制造","parent":"594"},
538
- "973":{"name":"甜品服务","parent":"656"},
539
- "974":{"name":"糕点、面包制造","parent":"593"},
540
- "975":{"name":"木工机械","parent":"620"},
541
- "976":{"name":"酒吧服务","parent":"656"},
542
- "977":{"name":"火腿肠","parent":"593"},
543
- "978":{"name":"广告策划推广","parent":"579"},
544
- "979":{"name":"新能源产品和生产装备制造","parent":"667"},
545
- "980":{"name":"调味品","parent":"593"},
546
- "981":{"name":"礼仪表演","parent":"585"},
547
- "982":{"name":"劳务派遣","parent":"560"},
548
- "983":{"name":"建材零售","parent":"627"},
549
- "984":{"name":"商品交易中心","parent":"545"},
550
- "985":{"name":"体育推广","parent":"585"},
551
- "986":{"name":"茶饮料及其他饮料制造","parent":"594"},
552
- "987":{"name":"金属建材","parent":"627"},
553
- "988":{"name":"职业技能培训","parent":"571"},
554
- "989":{"name":"网吧活动","parent":"586"},
555
- "990":{"name":"洗衣服务","parent":"658"},
556
- "991":{"name":"管道工程","parent":"554"},
557
- "992":{"name":"通信工程","parent":"417"},
558
- "993":{"name":"电子元器件","parent":"626"},
559
- "994":{"name":"电子设备","parent":"419"},
560
- "995":{"name":"茶馆服务","parent":"656"},
561
- "996":{"name":"旅游开发","parent":"657"},
562
- "997":{"name":"视频通讯","parent":"417"},
563
- "998":{"name":"白酒销售","parent":"594"},
564
- "1000":{"name":"咖啡馆服务","parent":"656"},
565
- "1001":{"name":"食品零售","parent":"593"},
566
- "1002":{"name":"健康疗养旅游","parent":"655"},
567
- "1003":{"name":"粮油食品","parent":"593"},
568
- "1004":{"name":"儿童教育影视","parent":"583"},
569
- "1005":{"name":"新能源发电","parent":"667"},
570
- "1006":{"name":"旅游策划","parent":"657"},
571
- "1007":{"name":"绘画","parent":"575"},
572
- "1008":{"name":"方便面及其他方便食品","parent":"593"},
573
- "1009":{"name":"房地产经纪","parent":"550"},
574
- "1010":{"name":"母婴家政","parent":"661"},
575
- "1011":{"name":"居家养老健康服务","parent":"661"},
576
- "1012":{"name":"文化艺术投资","parent":"545"},
577
- "1013":{"name":"运动健身","parent":"660"},
578
- "1014":{"name":"瓶(罐)装饮用水制造","parent":"594"},
579
- "1015":{"name":"金属门窗","parent":"627"},
580
- "1016":{"name":"机动车检测","parent":"563"},
581
- "1017":{"name":"货物运输","parent":"634"},
582
- "1018":{"name":"服饰专卖","parent":"690"},
583
- "1019":{"name":"酒店服装","parent":"597"},
584
- "1020":{"name":"通讯软件","parent":"417"},
585
- "1021":{"name":"消防工程","parent":"554"},
586
- "1022":{"name":"嵌入式电子系统","parent":"419"},
587
- "1023":{"name":"航空票务","parent":"636"},
588
- "1024":{"name":"电气设备","parent":"626"},
589
- "1025":{"name":"酒业贸易","parent":"594"},
590
- "1027":{"name":"其他饮料及冷饮服务","parent":"656"},
591
- "1028":{"name":"乳制品","parent":"593"},
592
- "1029":{"name":"新闻期刊出版","parent":"588"},
593
- "1030":{"name":"水污染治理","parent":"672"},
594
- "1031":{"name":"谷物食品","parent":"593"},
595
- "1032":{"name":"数字动漫设计制造服务","parent":"590"},
596
- "1033":{"name":"医院","parent":"646"},
597
- "1034":{"name":"旅游广告","parent":"657"},
598
- "1035":{"name":"办公家具","parent":"602"},
599
- "1036":{"name":"房地产营销策划","parent":"550"},
600
- "1037":{"name":"保洁家政","parent":"661"},
601
- "1038":{"name":"水泥制造","parent":"627"},
602
- "1039":{"name":"市场研究咨询","parent":"567"},
603
- "1040":{"name":"驾校","parent":"571"},
604
- "1041":{"name":"正餐服务","parent":"656"},
605
- "1043":{"name":"机动车燃油","parent":"665"},
606
- "1044":{"name":"食品","parent":"593"},
607
- "1045":{"name":"新能源汽车","parent":"629"},
608
- "1046":{"name":"手机无线网络推广","parent":"417"},
609
- "1047":{"name":"环保设备","parent":"672"},
610
- "1048":{"name":"通讯工程","parent":"418"},
611
- "1049":{"name":"半导体集成电路","parent":"419"},
612
- "1050":{"name":"航空服务","parent":"636"},
613
- "1051":{"name":"电机设备","parent":"626"},
614
- "1052":{"name":"档案软件","parent":"414"},
615
- "1053":{"name":"冷链物流服务","parent":"634"},
616
- "1054":{"name":"小吃服务","parent":"656"},
617
- "1055":{"name":"水产品加工","parent":"593"},
618
- "1056":{"name":"图书出版","parent":"588"},
619
- "1057":{"name":"固体废物治理","parent":"672"},
620
- "1059":{"name":"坚果食品","parent":"593"},
621
- "1060":{"name":"广告传媒","parent":"579"},
622
- "1061":{"name":"电梯","parent":"622"},
623
- "1062":{"name":"社区医疗与卫生院","parent":"646"},
624
- "1063":{"name":"广告、印刷包装","parent":"630"},
625
- "1064":{"name":"婚纱礼服","parent":"662"},
626
- "1065":{"name":"地毯","parent":"602"},
627
- "1066":{"name":"互联网物业","parent":"551"},
628
- "1067":{"name":"跨境电商","parent":"3"},
629
- "1068":{"name":"信息安全、系统集成","parent":"9"},
630
- "1069":{"name":"专用汽车制造","parent":"750"},
631
- "1070":{"name":"商品贸易","parent":"3"},
632
- "1071":{"name":"墙壁装饰材料","parent":"746"},
633
- "1072":{"name":"窗帘装饰材料","parent":"746"},
634
- "1073":{"name":"电子商务、本地生活服务","parent":"3"},
635
- "1075":{"name":"白酒电子商务","parent":"3"},
636
- "1076":{"name":"商品贸易、电子商务","parent":"3"},
637
- "1077":{"name":"木质装饰材料","parent":"746"},
638
- "1078":{"name":"电子商务、汽车电商交易平台","parent":"3"},
639
- "1079":{"name":"汽车轮胎","parent":"751"},
640
- "1080":{"name":"气体压缩机械制造","parent":"732"},
641
- "1081":{"name":"家装家具电子商务","parent":"3"},
642
- "1082":{"name":"化妆品电子商务","parent":"3"},
643
- "1083":{"name":"汽车销售","parent":"749"},
644
- "1084":{"name":"新闻资讯网站","parent":"510"},
645
- "1085":{"name":"母婴电商","parent":"3"},
646
- "1086":{"name":"电商商务、收藏品交易","parent":"3"},
647
- "1088":{"name":"电子商务、数码产品","parent":"3"},
648
- "1089":{"name":"二手车交易","parent":"749"},
649
- "1090":{"name":"游戏制作服务","parent":"5"},
650
- "1091":{"name":"母婴服务","parent":"510"},
651
- "1092":{"name":"家具电子商务","parent":"3"},
652
- "1093":{"name":"汽车配件电子商务","parent":"3"},
653
- "1094":{"name":"输配电设备","parent":"777"},
654
- "1095":{"name":"矿山设备","parent":"727"},
655
- "1096":{"name":"机床机械","parent":"726"},
656
- "1097":{"name":"农产品电商","parent":"3"},
657
- "1098":{"name":"陶瓷装饰材料","parent":"746"},
658
- "1099":{"name":"车载联网设备","parent":"487"},
659
- "1100":{"name":"汽车销售电子商务","parent":"3"},
660
- "1101":{"name":"石油设备","parent":"730"},
661
- "1102":{"name":"智能家居","parent":"487"},
662
- "1103":{"name":"散热器","parent":"751"},
663
- "1104":{"name":"电力工程","parent":"775"},
664
- "1105":{"name":"生鲜电商","parent":"3"},
665
- "1106":{"name":"互联网数据服务","parent":"490"},
666
- "1107":{"name":"房车、商务车销售","parent":"749"},
667
- "1108":{"name":"茶叶电子商务","parent":"3"},
668
- "1109":{"name":"酒类电子商务","parent":"3"},
669
- "1110":{"name":"阀门","parent":"730"},
670
- "1111":{"name":"食品电商","parent":"3"},
671
- "1112":{"name":"儿童摄影","parent":"871"},
672
- "1113":{"name":"广告摄影","parent":"871"},
673
- "1114":{"name":"婚纱摄影","parent":"871"},
674
- "1115":{"name":"模具制造","parent":"620"},
675
- "1116":{"name":"汽车模具","parent":"629"},
676
- "1117":{"name":"认证咨询","parent":"567"},
677
- "1118":{"name":"数字视觉制作服务","parent":"590"},
678
- "1119":{"name":"牙科及医疗器械","parent":"646"},
679
- "1120":{"name":"猎头招聘","parent":"560"},
680
- "1121":{"name":"家居","parent":"518"},
681
- "1122":{"name":"收藏品","parent":"518"},
682
- "1123":{"name":"首饰","parent":"518"},
683
- "1124":{"name":"工艺品","parent":"518"},
684
- "1125":{"name":"财务","parent":"515"},
685
- "1126":{"name":"税务","parent":"515"},
686
- "1127":{"name":"分类信息","parent":"2"},
687
- "1128":{"name":"宠物","parent":"0"},
688
- "1129":{"name":"快消品","parent":"518"},
689
- "1130":{"name":"人工智能","parent":"2"},
690
- "1131":{"name":"农/林/牧/渔","parent":"0"}
 
691
  }
692
 
 
693
  def get_names(id):
694
  id = str(id)
695
  nms = []
696
  d = TBL.get(id)
697
- if not d:return []
 
698
  nms.append(d["name"])
699
  p = get_names(d["parent"])
700
- if p: nms.extend(p)
 
701
  return nms
702
 
 
703
  if __name__ == "__main__":
704
  print(get_names("1119"))
 
11
  # limitations under the License.
12
  #
13
 
14
+ TBL = {
15
+ "1": {"name": "IT/通信/电子", "parent": "0"},
16
+ "2": {"name": "互联网", "parent": "0"},
17
+ "3": {"name": "电子商务", "parent": "2"},
18
+ "4": {"name": "互联网金融", "parent": "2"},
19
+ "5": {"name": "网络游戏", "parent": "2"},
20
+ "6": {"name": "社交网络平台", "parent": "2"},
21
+ "7": {"name": "视频音乐", "parent": "2"},
22
+ "9": {"name": "安全", "parent": "2"},
23
+ "10": {"name": "云计算", "parent": "2"},
24
+ "12": {"name": "工具类客户端应用", "parent": "2"},
25
+ "13": {"name": "互联网广告", "parent": "2"},
26
+ "14": {"name": "企业互联网服务", "parent": "2"},
27
+ "16": {"name": "在线教育", "parent": "2"},
28
+ "17": {"name": "在线医疗", "parent": "2"},
29
+ "19": {"name": "B2B", "parent": "3"},
30
+ "20": {"name": "B2C", "parent": "3"},
31
+ "21": {"name": "C2C", "parent": "3"},
32
+ "22": {"name": "生活信息本地化", "parent": "3"},
33
+ "23": {"name": "在线旅游", "parent": "2"},
34
+ "24": {"name": "第三方支付", "parent": "4"},
35
+ "26": {"name": "客户端游戏", "parent": "5"},
36
+ "27": {"name": "网页游戏", "parent": "5"},
37
+ "28": {"name": "手机游戏", "parent": "5"},
38
+ "29": {"name": "微博", "parent": "6"},
39
+ "30": {"name": "社交网站", "parent": "6"},
40
+ "31": {"name": "在线视频", "parent": "7"},
41
+ "32": {"name": "在线音乐", "parent": "7"},
42
+ "35": {"name": "企业安全", "parent": "9"},
43
+ "36": {"name": "个人安全", "parent": "9"},
44
+ "37": {"name": "企业级云服务", "parent": "10"},
45
+ "38": {"name": "个人级云服务", "parent": "10"},
46
+ "43": {"name": "输入法", "parent": "12"},
47
+ "44": {"name": "浏览器", "parent": "12"},
48
+ "45": {"name": "词典", "parent": "12"},
49
+ "46": {"name": "播放器", "parent": "12"},
50
+ "47": {"name": "下载器", "parent": "12"},
51
+ "48": {"name": "IM", "parent": "12"},
52
+ "49": {"name": "广告服务", "parent": "13"},
53
+ "50": {"name": "第三方广告网络平台", "parent": "13"},
54
+ "51": {"name": "媒体代理", "parent": "13"},
55
+ "52": {"name": "创意代理", "parent": "13"},
56
+ "53": {"name": "IT-综合", "parent": "1"},
57
+ "71": {"name": "团购", "parent": "3"},
58
+ "72": {"name": "地图", "parent": "2"},
59
+ "73": {"name": "数据存储", "parent": "2"},
60
+ "414": {"name": "计算机软件", "parent": "1"},
61
+ "415": {"name": "计算机硬件", "parent": "1"},
62
+ "416": {"name": "计算机服务(系统、数据服务、维修)", "parent": "1"},
63
+ "417": {"name": "通信/电信/网络设备", "parent": "1"},
64
+ "418": {"name": "通信/电信运营、增值服务", "parent": "1"},
65
+ "419": {"name": "电子技术/半导体/集成电路", "parent": "1"},
66
+ "472": {"name": "P2P网贷", "parent": "4"},
67
+ "473": {"name": "互联网理财", "parent": "4"},
68
+ "474": {"name": "婚恋", "parent": "6"},
69
+ "476": {"name": "虚拟化", "parent": "10"},
70
+ "477": {"name": "邮箱", "parent": "12"},
71
+ "478": {"name": "商业智能", "parent": "14"},
72
+ "479": {"name": "企业建站", "parent": "14"},
73
+ "480": {"name": "安防", "parent": "14"},
74
+ "481": {"name": "网络营销", "parent": "2"},
75
+ "487": {"name": "智能终端", "parent": "2"},
76
+ "488": {"name": "移动互联网", "parent": "2"},
77
+ "489": {"name": "数字城市", "parent": "2"},
78
+ "490": {"name": "大数据", "parent": "2"},
79
+ "491": {"name": "互联网人力资源", "parent": "2"},
80
+ "492": {"name": "舆情监控", "parent": "2"},
81
+ "493": {"name": "移动营销", "parent": "481"},
82
+ "494": {"name": "微博营销", "parent": "481"},
83
+ "495": {"name": "精准营销", "parent": "481"},
84
+ "496": {"name": "海外营销", "parent": "481"},
85
+ "497": {"name": "微信营销", "parent": "481"},
86
+ "498": {"name": "智能手机", "parent": "487"},
87
+ "499": {"name": "可穿戴设备", "parent": "487"},
88
+ "500": {"name": "智能电视", "parent": "487"},
89
+ "501": {"name": "WAP", "parent": "488"},
90
+ "502": {"name": "物联网", "parent": "489"},
91
+ "503": {"name": "O2O", "parent": "489"},
92
+ "504": {"name": "数字出版", "parent": "489"},
93
+ "505": {"name": "搜索", "parent": "2"},
94
+ "506": {"name": "垂直搜索", "parent": "505"},
95
+ "507": {"name": "无线搜索", "parent": "505"},
96
+ "508": {"name": "网页搜索", "parent": "505"},
97
+ "509": {"name": "网址导航", "parent": "2"},
98
+ "510": {"name": "门户", "parent": "2"},
99
+ "511": {"name": "网络文学", "parent": "2"},
100
+ "512": {"name": "自媒体", "parent": "2"},
101
+ "513": {"name": "金融", "parent": "0"},
102
+ "514": {"name": "建筑与房地产", "parent": "0"},
103
+ "515": {"name": "专业服务", "parent": "0"},
104
+ "516": {"name": "教育培训", "parent": "0"},
105
+ "517": {"name": "文化传媒", "parent": "0"},
106
+ "518": {"name": "消费品", "parent": "0"},
107
+ "519": {"name": "工业", "parent": "0"},
108
+ "520": {"name": "交通物流", "parent": "0"},
109
+ "521": {"name": "贸易", "parent": "0"},
110
+ "522": {"name": "医药", "parent": "0"},
111
+ "523": {"name": "医疗器械", "parent": "522"},
112
+ "524": {"name": "保健品", "parent": "518"},
113
+ "525": {"name": "服务业", "parent": "0"},
114
+ "526": {"name": "能源/矿产/环保", "parent": "0"},
115
+ "527": {"name": "化工", "parent": "0"},
116
+ "528": {"name": "政府", "parent": "0"},
117
+ "529": {"name": "公共事业", "parent": "0"},
118
+ "530": {"name": "非盈利机构", "parent": "0"},
119
+ "531": {"name": "农业", "parent": "1131"},
120
+ "532": {"name": "林业", "parent": "1131"},
121
+ "533": {"name": "畜牧业", "parent": "1131"},
122
+ "534": {"name": "渔业", "parent": "1131"},
123
+ "535": {"name": "学术科研", "parent": "0"},
124
+ "536": {"name": "零售", "parent": "0"},
125
+ "537": {"name": "银行", "parent": "513"},
126
+ "538": {"name": "保险", "parent": "513"},
127
+ "539": {"name": "证券", "parent": "513"},
128
+ "540": {"name": "基金", "parent": "513"},
129
+ "541": {"name": "信托", "parent": "513"},
130
+ "542": {"name": "担保", "parent": "513"},
131
+ "543": {"name": "典当", "parent": "513"},
132
+ "544": {"name": "拍卖", "parent": "513"},
133
+ "545": {"name": "投资/融资", "parent": "513"},
134
+ "546": {"name": "期货", "parent": "513"},
135
+ "547": {"name": "房地产开发", "parent": "514"},
136
+ "548": {"name": "工程施工", "parent": "514"},
137
+ "549": {"name": "建筑设计", "parent": "514"},
138
+ "550": {"name": "房地产代理", "parent": "514"},
139
+ "551": {"name": "物业管理", "parent": "514"},
140
+ "552": {"name": "室内设计", "parent": "514"},
141
+ "553": {"name": "装修装潢", "parent": "514"},
142
+ "554": {"name": "市政工程", "parent": "514"},
143
+ "555": {"name": "工程造价", "parent": "514"},
144
+ "556": {"name": "工程监理", "parent": "514"},
145
+ "557": {"name": "环境工程", "parent": "514"},
146
+ "558": {"name": "园林景观", "parent": "514"},
147
+ "559": {"name": "法律", "parent": "515"},
148
+ "560": {"name": "人力资源", "parent": "515"},
149
+ "561": {"name": "会计", "parent": "1125"},
150
+ "562": {"name": "审计", "parent": "515"},
151
+ "563": {"name": "检测认证", "parent": "515"},
152
+ "565": {"name": "翻译", "parent": "515"},
153
+ "566": {"name": "中介", "parent": "515"},
154
+ "567": {"name": "咨询", "parent": "515"},
155
+ "568": {"name": "外包服务", "parent": "515"},
156
+ "569": {"name": "家教", "parent": "516"},
157
+ "570": {"name": "早教", "parent": "516"},
158
+ "571": {"name": "职业技能培训", "parent": "516"},
159
+ "572": {"name": "外语培训", "parent": "516"},
160
+ "573": {"name": "设计培训", "parent": "516"},
161
+ "574": {"name": "IT培训", "parent": "516"},
162
+ "575": {"name": "文艺体育培训", "parent": "516"},
163
+ "576": {"name": "学历教育", "parent": "516"},
164
+ "577": {"name": "管理培训", "parent": "516"},
165
+ "578": {"name": "民办基础教育", "parent": "516"},
166
+ "579": {"name": "广告", "parent": "517"},
167
+ "580": {"name": "媒体", "parent": "517"},
168
+ "581": {"name": "会展", "parent": "517"},
169
+ "582": {"name": "公关", "parent": "517"},
170
+ "583": {"name": "影视", "parent": "517"},
171
+ "584": {"name": "艺术", "parent": "517"},
172
+ "585": {"name": "文化传播", "parent": "517"},
173
+ "586": {"name": "娱乐", "parent": "517"},
174
+ "587": {"name": "体育", "parent": "517"},
175
+ "588": {"name": "出版", "parent": "517"},
176
+ "589": {"name": "休闲", "parent": "517"},
177
+ "590": {"name": "动漫", "parent": "517"},
178
+ "591": {"name": "市场推广", "parent": "517"},
179
+ "592": {"name": "市场研究", "parent": "517"},
180
+ "593": {"name": "食品", "parent": "1129"},
181
+ "594": {"name": "饮料", "parent": "1129"},
182
+ "595": {"name": "烟草", "parent": "1129"},
183
+ "596": {"name": "酒品", "parent": "518"},
184
+ "597": {"name": "服饰", "parent": "518"},
185
+ "598": {"name": "纺织", "parent": "518"},
186
+ "599": {"name": "化妆品", "parent": "1129"},
187
+ "600": {"name": "日用品", "parent": "1129"},
188
+ "601": {"name": "家电", "parent": "518"},
189
+ "602": {"name": "家具", "parent": "518"},
190
+ "603": {"name": "办公用品", "parent": "518"},
191
+ "604": {"name": "奢侈品", "parent": "518"},
192
+ "605": {"name": "珠宝", "parent": "518"},
193
+ "606": {"name": "数码产品", "parent": "518"},
194
+ "607": {"name": "玩具", "parent": "518"},
195
+ "608": {"name": "图书", "parent": "518"},
196
+ "609": {"name": "音像", "parent": "518"},
197
+ "610": {"name": "钟表", "parent": "518"},
198
+ "611": {"name": "箱包", "parent": "518"},
199
+ "612": {"name": "母婴", "parent": "518"},
200
+ "613": {"name": "营养保健", "parent": "518"},
201
+ "614": {"name": "户外用品", "parent": "518"},
202
+ "615": {"name": "健身器材", "parent": "518"},
203
+ "616": {"name": "乐器", "parent": "518"},
204
+ "617": {"name": "汽车用品", "parent": "518"},
205
+ "619": {"name": "厨具", "parent": "518"},
206
+ "620": {"name": "机械制造", "parent": "519"},
207
+ "621": {"name": "流体控制", "parent": "519"},
208
+ "622": {"name": "自动化控制", "parent": "519"},
209
+ "623": {"name": "仪器仪表", "parent": "519"},
210
+ "624": {"name": "航空/航天", "parent": "519"},
211
+ "625": {"name": "交通设施", "parent": "519"},
212
+ "626": {"name": "工业电子", "parent": "519"},
213
+ "627": {"name": "建材", "parent": "519"},
214
+ "628": {"name": "五金材料", "parent": "519"},
215
+ "629": {"name": "汽车", "parent": "519"},
216
+ "630": {"name": "印刷", "parent": "519"},
217
+ "631": {"name": "造纸", "parent": "519"},
218
+ "632": {"name": "包装", "parent": "519"},
219
+ "633": {"name": "原材料及加工", "parent": "519"},
220
+ "634": {"name": "物流", "parent": "520"},
221
+ "635": {"name": "仓储", "parent": "520"},
222
+ "636": {"name": "客运", "parent": "520"},
223
+ "637": {"name": "快递", "parent": "520"},
224
+ "638": {"name": "化学药", "parent": "522"},
225
+ "639": {"name": "中药", "parent": "522"},
226
+ "640": {"name": "生物制药", "parent": "522"},
227
+ "641": {"name": "兽药", "parent": "522"},
228
+ "642": {"name": "农药", "parent": "522"},
229
+ "643": {"name": "CRO", "parent": "522"},
230
+ "644": {"name": "消毒", "parent": "522"},
231
+ "645": {"name": "医药商业", "parent": "522"},
232
+ "646": {"name": "医疗服务", "parent": "522"},
233
+ "647": {"name": "医疗器械", "parent": "523"},
234
+ "648": {"name": "制药设备", "parent": "523"},
235
+ "649": {"name": "医用耗材", "parent": "523"},
236
+ "650": {"name": "手术器械", "parent": "523"},
237
+ "651": {"name": "保健器材", "parent": "524"},
238
+ "652": {"name": "性保健品", "parent": "524"},
239
+ "653": {"name": "医药保养", "parent": "524"},
240
+ "654": {"name": "医用保健", "parent": "524"},
241
+ "655": {"name": "酒店", "parent": "525"},
242
+ "656": {"name": "餐饮", "parent": "525"},
243
+ "657": {"name": "旅游", "parent": "525"},
244
+ "658": {"name": "生活服务", "parent": "525"},
245
+ "659": {"name": "保健服务", "parent": "525"},
246
+ "660": {"name": "运动健身", "parent": "525"},
247
+ "661": {"name": "家政服务", "parent": "525"},
248
+ "662": {"name": "婚庆服务", "parent": "525"},
249
+ "663": {"name": "租赁服务", "parent": "525"},
250
+ "664": {"name": "维修服务", "parent": "525"},
251
+ "665": {"name": "石油天然气", "parent": "526"},
252
+ "666": {"name": "电力", "parent": "526"},
253
+ "667": {"name": "新能源", "parent": "526"},
254
+ "668": {"name": "水利", "parent": "526"},
255
+ "669": {"name": "矿产", "parent": "526"},
256
+ "670": {"name": "采掘业", "parent": "526"},
257
+ "671": {"name": "冶炼", "parent": "526"},
258
+ "672": {"name": "环保", "parent": "526"},
259
+ "673": {"name": "无机化工原料", "parent": "527"},
260
+ "674": {"name": "有机化工原料", "parent": "527"},
261
+ "675": {"name": "精细化学品", "parent": "527"},
262
+ "676": {"name": "化工设备", "parent": "527"},
263
+ "677": {"name": "化工工程", "parent": "527"},
264
+ "678": {"name": "资产管理", "parent": "513"},
265
+ "679": {"name": "金融租赁", "parent": "513"},
266
+ "680": {"name": "征信及信评机构", "parent": "513"},
267
+ "681": {"name": "资产评估机构", "parent": "513"},
268
+ "683": {"name": "金融监管机构", "parent": "513"},
269
+ "684": {"name": "国际贸易", "parent": "521"},
270
+ "685": {"name": "海关", "parent": "521"},
271
+ "686": {"name": "购物中心", "parent": "536"},
272
+ "687": {"name": "超市", "parent": "536"},
273
+ "688": {"name": "便利店", "parent": "536"},
274
+ "689": {"name": "专卖店", "parent": "536"},
275
+ "690": {"name": "专业店", "parent": "536"},
276
+ "691": {"name": "百货店", "parent": "536"},
277
+ "692": {"name": "杂货店", "parent": "536"},
278
+ "693": {"name": "个人银行", "parent": "537"},
279
+ "695": {"name": "私人银行", "parent": "537"},
280
+ "696": {"name": "公司银行", "parent": "537"},
281
+ "697": {"name": "投资银行", "parent": "537"},
282
+ "698": {"name": "政策性银行", "parent": "537"},
283
+ "699": {"name": "中央银行", "parent": "537"},
284
+ "700": {"name": "人寿险", "parent": "538"},
285
+ "701": {"name": "财产险", "parent": "538"},
286
+ "702": {"name": "再保险", "parent": "538"},
287
+ "703": {"name": "养老险", "parent": "538"},
288
+ "704": {"name": "保险代理公司", "parent": "538"},
289
+ "705": {"name": "公募基金", "parent": "540"},
290
+ "707": {"name": "私募基金", "parent": "540"},
291
+ "708": {"name": "第三方理财", "parent": "679"},
292
+ "709": {"name": "资产管理公司", "parent": "679"},
293
+ "711": {"name": "房产中介", "parent": "566"},
294
+ "712": {"name": "职业中介", "parent": "566"},
295
+ "713": {"name": "婚姻中介", "parent": "566"},
296
+ "714": {"name": "战略咨询", "parent": "567"},
297
+ "715": {"name": "投资咨询", "parent": "567"},
298
+ "716": {"name": "心理咨询", "parent": "567"},
299
+ "717": {"name": "留学移民咨询", "parent": "567"},
300
+ "718": {"name": "工商注册代理", "parent": "568"},
301
+ "719": {"name": "商标专利代理", "parent": "568"},
302
+ "720": {"name": "财务代理", "parent": "568"},
303
+ "721": {"name": "工程机械", "parent": "620"},
304
+ "722": {"name": "农业机械", "parent": "620"},
305
+ "723": {"name": "海工设备", "parent": "620"},
306
+ "724": {"name": "包装机械", "parent": "620"},
307
+ "725": {"name": "印刷机械", "parent": "620"},
308
+ "726": {"name": "数控机床", "parent": "620"},
309
+ "727": {"name": "矿山机械", "parent": "620"},
310
+ "728": {"name": "水泵", "parent": "621"},
311
+ "729": {"name": "管道", "parent": "621"},
312
+ "730": {"name": "阀门", "parent": "621"},
313
+ "732": {"name": "压缩机", "parent": "621"},
314
+ "733": {"name": "集散控制系统", "parent": "622"},
315
+ "734": {"name": "远程控制", "parent": "622"},
316
+ "735": {"name": "液压系统", "parent": "622"},
317
+ "736": {"name": "楼宇智能化", "parent": "622"},
318
+ "737": {"name": "飞机制造", "parent": "624"},
319
+ "738": {"name": "航空公司", "parent": "624"},
320
+ "739": {"name": "发动机", "parent": "624"},
321
+ "740": {"name": "复合材料", "parent": "624"},
322
+ "741": {"name": "高铁", "parent": "625"},
323
+ "742": {"name": "地铁", "parent": "625"},
324
+ "743": {"name": "信号传输", "parent": "625"},
325
+ "745": {"name": "结构材料", "parent": "627"},
326
+ "746": {"name": "装饰材料", "parent": "627"},
327
+ "747": {"name": "专用材料", "parent": "627"},
328
+ "749": {"name": "经销商集团", "parent": "629"},
329
+ "750": {"name": "整车制造", "parent": "629"},
330
+ "751": {"name": "汽车零配件", "parent": "629"},
331
+ "752": {"name": "外型设计", "parent": "629"},
332
+ "753": {"name": "平版印刷", "parent": "630"},
333
+ "754": {"name": "凸版印刷", "parent": "630"},
334
+ "755": {"name": "凹版印刷", "parent": "630"},
335
+ "756": {"name": "孔版印刷", "parent": "630"},
336
+ "757": {"name": "印刷用纸", "parent": "631"},
337
+ "758": {"name": "书写、制图及复制用纸", "parent": "631"},
338
+ "759": {"name": "包装用纸", "parent": "631"},
339
+ "760": {"name": "生活、卫生及装饰用纸", "parent": "631"},
340
+ "761": {"name": "技术用纸", "parent": "631"},
341
+ "762": {"name": "加工纸原纸", "parent": "631"},
342
+ "763": {"name": "食品包装", "parent": "632"},
343
+ "764": {"name": "医药包装", "parent": "632"},
344
+ "765": {"name": "日化包装", "parent": "632"},
345
+ "766": {"name": "物流包装", "parent": "632"},
346
+ "767": {"name": "礼品包装", "parent": "632"},
347
+ "768": {"name": "电子五金包装", "parent": "632"},
348
+ "769": {"name": "汽车服务", "parent": "525"},
349
+ "770": {"name": "汽车保养", "parent": "769"},
350
+ "771": {"name": "租车", "parent": "769"},
351
+ "773": {"name": "出租车", "parent": "769"},
352
+ "774": {"name": "代驾", "parent": "769"},
353
+ "775": {"name": "发电", "parent": "666"},
354
+ "777": {"name": "输配电", "parent": "666"},
355
+ "779": {"name": "风电", "parent": "667"},
356
+ "780": {"name": "光伏/太阳能", "parent": "667"},
357
+ "781": {"name": "生物质发电", "parent": "667"},
358
+ "782": {"name": "煤化工", "parent": "667"},
359
+ "783": {"name": "垃圾发电", "parent": "667"},
360
+ "784": {"name": "核电", "parent": "667"},
361
+ "785": {"name": "能源矿产", "parent": "669"},
362
+ "786": {"name": "金属矿产", "parent": "669"},
363
+ "787": {"name": "非金属矿产", "parent": "669"},
364
+ "788": {"name": "水气矿产", "parent": "669"},
365
+ "789": {"name": "锅炉", "parent": "775"},
366
+ "790": {"name": "发电机", "parent": "775"},
367
+ "791": {"name": "汽轮机", "parent": "775"},
368
+ "792": {"name": "燃机", "parent": "775"},
369
+ "793": {"name": "冷却", "parent": "775"},
370
+ "794": {"name": "电力设计院", "parent": "775"},
371
+ "795": {"name": "高压输配电", "parent": "777"},
372
+ "796": {"name": "中压输配电", "parent": "777"},
373
+ "797": {"name": "低压输配电", "parent": "777"},
374
+ "798": {"name": "继电保护", "parent": "777"},
375
+ "799": {"name": "智能电网", "parent": "777"},
376
+ "800": {"name": "小学", "parent": "516"},
377
+ "801": {"name": "电动车", "parent": "519"},
378
+ "802": {"name": "皮具箱包", "parent": "518"},
379
+ "803": {"name": "医药制造", "parent": "522"},
380
+ "804": {"name": "电器销售", "parent": "536"},
381
+ "805": {"name": "塑料制品", "parent": "527"},
382
+ "806": {"name": "公益基金会", "parent": "530"},
383
+ "807": {"name": "美发服务", "parent": "525"},
384
+ "808": {"name": "农业养殖", "parent": "531"},
385
+ "809": {"name": "金融服务", "parent": "513"},
386
+ "810": {"name": "商业地产综合体", "parent": "514"},
387
+ "811": {"name": "美容服务", "parent": "525"},
388
+ "812": {"name": "灯饰", "parent": "518"},
389
+ "813": {"name": "油墨颜料产品", "parent": "527"},
390
+ "814": {"name": "眼镜制造", "parent": "518"},
391
+ "815": {"name": "农业生物技术", "parent": "531"},
392
+ "816": {"name": "体育用品", "parent": "518"},
393
+ "817": {"name": "保健用品", "parent": "524"},
394
+ "818": {"name": "化学化工产品", "parent": "527"},
395
+ "819": {"name": "饲料", "parent": "531"},
396
+ "821": {"name": "保安服务", "parent": "525"},
397
+ "822": {"name": "干细胞技术", "parent": "522"},
398
+ "824": {"name": "农药化肥", "parent": "527"},
399
+ "825": {"name": "卫生洁具", "parent": "518"},
400
+ "826": {"name": "体育器材、场馆", "parent": "518"},
401
+ "827": {"name": "饲料加工", "parent": "531"},
402
+ "828": {"name": "测绘服务", "parent": "529"},
403
+ "830": {"name": "金属船舶制造", "parent": "519"},
404
+ "831": {"name": "基因工程", "parent": "522"},
405
+ "832": {"name": "花卉服务", "parent": "536"},
406
+ "833": {"name": "农业种植", "parent": "531"},
407
+ "834": {"name": "皮革制品", "parent": "518"},
408
+ "835": {"name": "地理信息加工服务", "parent": "529"},
409
+ "836": {"name": "机器人", "parent": "519"},
410
+ "837": {"name": "礼品", "parent": "518"},
411
+ "838": {"name": "理发及美容服务", "parent": "525"},
412
+ "839": {"name": "其他清洁服务", "parent": "525"},
413
+ "840": {"name": "硅胶材料", "parent": "527"},
414
+ "841": {"name": "茶叶销售", "parent": "518"},
415
+ "842": {"name": "彩票活动", "parent": "529"},
416
+ "843": {"name": "化妆培训", "parent": "516"},
417
+ "844": {"name": "鞋业", "parent": "518"},
418
+ "845": {"name": "酒店用品", "parent": "518"},
419
+ "846": {"name": "复合材料", "parent": "527"},
420
+ "847": {"name": "房地产工程建设", "parent": "548"},
421
+ "848": {"name": "知识产权服务", "parent": "559"},
422
+ "849": {"name": "新型建材", "parent": "627"},
423
+ "850": {"name": "企业投资咨询", "parent": "567"},
424
+ "851": {"name": "含乳饮料和植物蛋白饮料制造", "parent": "594"},
425
+ "852": {"name": "汽车检测设备", "parent": "629"},
426
+ "853": {"name": "手机通讯器材", "parent": "417"},
427
+ "854": {"name": "环保材料", "parent": "672"},
428
+ "855": {"name": "交通设施", "parent": "554"},
429
+ "856": {"name": "电子器件", "parent": "419"},
430
+ "857": {"name": "啤酒", "parent": "594"},
431
+ "858": {"name": "生态旅游", "parent": "657"},
432
+ "859": {"name": "自动化设备", "parent": "626"},
433
+ "860": {"name": "软件开发", "parent": "414"},
434
+ "861": {"name": "葡萄酒销售", "parent": "594"},
435
+ "862": {"name": "钢材", "parent": "633"},
436
+ "863": {"name": "餐饮培训", "parent": "656"},
437
+ "864": {"name": "速冻食品", "parent": "593"},
438
+ "865": {"name": "空气环保", "parent": "672"},
439
+ "866": {"name": "互联网房地产经纪服务", "parent": "550"},
440
+ "867": {"name": "食品添加剂", "parent": "593"},
441
+ "868": {"name": "演艺传播", "parent": "585"},
442
+ "869": {"name": "信用卡", "parent": "537"},
443
+ "870": {"name": "报纸期刊广告", "parent": "579"},
444
+ "871": {"name": "摄影", "parent": "525"},
445
+ "872": {"name": "手机软件", "parent": "414"},
446
+ "873": {"name": "地坪建材", "parent": "627"},
447
+ "874": {"name": "企业管理咨询", "parent": "567"},
448
+ "875": {"name": "幼儿教育", "parent": "570"},
449
+ "876": {"name": "系统集成", "parent": "416"},
450
+ "877": {"name": "皮革服饰", "parent": "597"},
451
+ "878": {"name": "保健食品", "parent": "593"},
452
+ "879": {"name": "叉车", "parent": "620"},
453
+ "880": {"name": "厨卫电器", "parent": "601"},
454
+ "882": {"name": "地暖设备", "parent": "627"},
455
+ "883": {"name": "钢结构制造", "parent": "548"},
456
+ "884": {"name": "投影机", "parent": "606"},
457
+ "885": {"name": "啤酒销售", "parent": "594"},
458
+ "886": {"name": "度假村旅游", "parent": "657"},
459
+ "887": {"name": "电力元件设备", "parent": "626"},
460
+ "888": {"name": "管理软件", "parent": "414"},
461
+ "889": {"name": "轴承", "parent": "628"},
462
+ "890": {"name": "餐饮设备", "parent": "656"},
463
+ "891": {"name": "肉制品及副产品加工", "parent": "593"},
464
+ "892": {"name": "艺术收藏品投资交易", "parent": "584"},
465
+ "893": {"name": "净水器", "parent": "601"},
466
+ "894": {"name": "进口食品", "parent": "593"},
467
+ "895": {"name": "娱乐文化传播", "parent": "585"},
468
+ "896": {"name": "文化传播", "parent": "585"},
469
+ "897": {"name": "商旅传媒", "parent": "580"},
470
+ "898": {"name": "广告设计制作", "parent": "579"},
471
+ "899": {"name": "金属丝绳及其制品制造", "parent": "627"},
472
+ "900": {"name": "建筑涂料", "parent": "627"},
473
+ "901": {"name": "抵押贷款", "parent": "543"},
474
+ "902": {"name": "早教", "parent": "570"},
475
+ "903": {"name": "电影放映", "parent": "583"},
476
+ "904": {"name": "内衣服饰", "parent": "597"},
477
+ "905": {"name": "无线网络通信", "parent": "418"},
478
+ "906": {"name": "记忆卡", "parent": "415"},
479
+ "907": {"name": "女装服饰", "parent": "597"},
480
+ "908": {"name": "建筑机械", "parent": "620"},
481
+ "909": {"name": "制冷电器", "parent": "601"},
482
+ "910": {"name": "通信设备", "parent": "417"},
483
+ "911": {"name": "空调设备", "parent": "601"},
484
+ "912": {"name": "建筑装饰", "parent": "553"},
485
+ "913": {"name": "办公设备", "parent": "603"},
486
+ "916": {"name": "数据处理软件", "parent": "414"},
487
+ "917": {"name": "葡萄酒贸易", "parent": "594"},
488
+ "918": {"name": "通讯器材", "parent": "417"},
489
+ "919": {"name": "铜业", "parent": "633"},
490
+ "920": {"name": "食堂", "parent": "656"},
491
+ "921": {"name": "糖果零食", "parent": "593"},
492
+ "922": {"name": "文化艺术传播", "parent": "584"},
493
+ "923": {"name": "太阳能电器", "parent": "601"},
494
+ "924": {"name": "药品零售", "parent": "645"},
495
+ "925": {"name": "果蔬食品", "parent": "593"},
496
+ "926": {"name": "文化活动策划", "parent": "585"},
497
+ "928": {"name": "汽车广告", "parent": "657"},
498
+ "929": {"name": "条码设备", "parent": "630"},
499
+ "930": {"name": "建筑石材", "parent": "627"},
500
+ "931": {"name": "贵金属", "parent": "545"},
501
+ "932": {"name": "体育", "parent": "660"},
502
+ "933": {"name": "金融信息服务", "parent": "414"},
503
+ "934": {"name": "玻璃建材", "parent": "627"},
504
+ "935": {"name": "家教", "parent": "569"},
505
+ "936": {"name": "歌舞厅娱乐活动", "parent": "586"},
506
+ "937": {"name": "计算机服务器", "parent": "415"},
507
+ "938": {"name": "管道", "parent": "627"},
508
+ "939": {"name": "婴幼儿服饰", "parent": "597"},
509
+ "940": {"name": "热水器", "parent": "601"},
510
+ "941": {"name": "计算机及零部件制造", "parent": "415"},
511
+ "942": {"name": "钢铁贸易", "parent": "633"},
512
+ "944": {"name": "包装材料", "parent": "632"},
513
+ "945": {"name": "计算机办公设备", "parent": "603"},
514
+ "946": {"name": "白酒", "parent": "594"},
515
+ "948": {"name": "发动机", "parent": "620"},
516
+ "949": {"name": "快餐服务", "parent": "656"},
517
+ "950": {"name": "酒类销售", "parent": "594"},
518
+ "951": {"name": "电子产品、机电设备", "parent": "626"},
519
+ "952": {"name": "激光设备", "parent": "626"},
520
+ "953": {"name": "餐饮策划", "parent": "656"},
521
+ "954": {"name": "饮料、食品", "parent": "594"},
522
+ "955": {"name": "文化娱乐经纪", "parent": "585"},
523
+ "956": {"name": "天然气", "parent": "665"},
524
+ "957": {"name": "农副食品", "parent": "593"},
525
+ "958": {"name": "艺术表演", "parent": "585"},
526
+ "959": {"name": "石膏、水泥制品及类似制品制造", "parent": "627"},
527
+ "960": {"name": "橱柜", "parent": "602"},
528
+ "961": {"name": "管理培训", "parent": "577"},
529
+ "962": {"name": "男装服饰", "parent": "597"},
530
+ "963": {"name": "化肥制造", "parent": "675"},
531
+ "964": {"name": "童装服饰", "parent": "597"},
532
+ "965": {"name": "电源电池", "parent": "626"},
533
+ "966": {"name": "家电维修", "parent": "664"},
534
+ "967": {"name": "光电子器件", "parent": "419"},
535
+ "968": {"name": "旅行社服务", "parent": "657"},
536
+ "969": {"name": "电线、电缆制造", "parent": "626"},
537
+ "970": {"name": "软件开发、信息系统集成", "parent": "419"},
538
+ "971": {"name": "白酒制造", "parent": "594"},
539
+ "973": {"name": "甜品服务", "parent": "656"},
540
+ "974": {"name": "糕点、面包制造", "parent": "593"},
541
+ "975": {"name": "木工机械", "parent": "620"},
542
+ "976": {"name": "酒吧服务", "parent": "656"},
543
+ "977": {"name": "火腿肠", "parent": "593"},
544
+ "978": {"name": "广告策划推广", "parent": "579"},
545
+ "979": {"name": "新能源产品和生产装备制造", "parent": "667"},
546
+ "980": {"name": "调味品", "parent": "593"},
547
+ "981": {"name": "礼仪表演", "parent": "585"},
548
+ "982": {"name": "劳务派遣", "parent": "560"},
549
+ "983": {"name": "建材零售", "parent": "627"},
550
+ "984": {"name": "商品交易中心", "parent": "545"},
551
+ "985": {"name": "体育推广", "parent": "585"},
552
+ "986": {"name": "茶饮��及其他饮料制造", "parent": "594"},
553
+ "987": {"name": "金属建材", "parent": "627"},
554
+ "988": {"name": "职业技能培训", "parent": "571"},
555
+ "989": {"name": "网吧活动", "parent": "586"},
556
+ "990": {"name": "洗衣服务", "parent": "658"},
557
+ "991": {"name": "管道工程", "parent": "554"},
558
+ "992": {"name": "通信工程", "parent": "417"},
559
+ "993": {"name": "电子元器件", "parent": "626"},
560
+ "994": {"name": "电子设备", "parent": "419"},
561
+ "995": {"name": "茶馆服务", "parent": "656"},
562
+ "996": {"name": "旅游开发", "parent": "657"},
563
+ "997": {"name": "视频通讯", "parent": "417"},
564
+ "998": {"name": "白酒销售", "parent": "594"},
565
+ "1000": {"name": "咖啡馆服务", "parent": "656"},
566
+ "1001": {"name": "食品零售", "parent": "593"},
567
+ "1002": {"name": "健康疗养旅游", "parent": "655"},
568
+ "1003": {"name": "粮油食品", "parent": "593"},
569
+ "1004": {"name": "儿童教育影视", "parent": "583"},
570
+ "1005": {"name": "新能源发电", "parent": "667"},
571
+ "1006": {"name": "旅游策划", "parent": "657"},
572
+ "1007": {"name": "绘画", "parent": "575"},
573
+ "1008": {"name": "方便面及其他方便食品", "parent": "593"},
574
+ "1009": {"name": "房地产经纪", "parent": "550"},
575
+ "1010": {"name": "母婴家政", "parent": "661"},
576
+ "1011": {"name": "居家养老健康服务", "parent": "661"},
577
+ "1012": {"name": "文化艺术投资", "parent": "545"},
578
+ "1013": {"name": "运动健身", "parent": "660"},
579
+ "1014": {"name": "瓶(罐)装饮用水制造", "parent": "594"},
580
+ "1015": {"name": "金属门窗", "parent": "627"},
581
+ "1016": {"name": "机动车检测", "parent": "563"},
582
+ "1017": {"name": "货物运输", "parent": "634"},
583
+ "1018": {"name": "服饰专卖", "parent": "690"},
584
+ "1019": {"name": "酒店服装", "parent": "597"},
585
+ "1020": {"name": "通讯软件", "parent": "417"},
586
+ "1021": {"name": "消防工程", "parent": "554"},
587
+ "1022": {"name": "嵌入式电子系统", "parent": "419"},
588
+ "1023": {"name": "航空票务", "parent": "636"},
589
+ "1024": {"name": "电气设备", "parent": "626"},
590
+ "1025": {"name": "酒业贸易", "parent": "594"},
591
+ "1027": {"name": "其他饮料及冷饮服务", "parent": "656"},
592
+ "1028": {"name": "乳制品", "parent": "593"},
593
+ "1029": {"name": "新闻期刊出版", "parent": "588"},
594
+ "1030": {"name": "水污染治理", "parent": "672"},
595
+ "1031": {"name": "谷物食品", "parent": "593"},
596
+ "1032": {"name": "数字动漫设计制造服务", "parent": "590"},
597
+ "1033": {"name": "医院", "parent": "646"},
598
+ "1034": {"name": "旅游广告", "parent": "657"},
599
+ "1035": {"name": "办公家具", "parent": "602"},
600
+ "1036": {"name": "房地产营销策划", "parent": "550"},
601
+ "1037": {"name": "保洁家政", "parent": "661"},
602
+ "1038": {"name": "水泥制造", "parent": "627"},
603
+ "1039": {"name": "市场研究咨询", "parent": "567"},
604
+ "1040": {"name": "驾校", "parent": "571"},
605
+ "1041": {"name": "正餐服务", "parent": "656"},
606
+ "1043": {"name": "机动车燃油", "parent": "665"},
607
+ "1044": {"name": "食品", "parent": "593"},
608
+ "1045": {"name": "新能源汽车", "parent": "629"},
609
+ "1046": {"name": "手机无线网络推广", "parent": "417"},
610
+ "1047": {"name": "环保设备", "parent": "672"},
611
+ "1048": {"name": "通讯工程", "parent": "418"},
612
+ "1049": {"name": "半导体集成电路", "parent": "419"},
613
+ "1050": {"name": "航空服务", "parent": "636"},
614
+ "1051": {"name": "电机设备", "parent": "626"},
615
+ "1052": {"name": "档案软件", "parent": "414"},
616
+ "1053": {"name": "冷链物流服务", "parent": "634"},
617
+ "1054": {"name": "小吃服务", "parent": "656"},
618
+ "1055": {"name": "水产品加工", "parent": "593"},
619
+ "1056": {"name": "图书出版", "parent": "588"},
620
+ "1057": {"name": "固体废物治理", "parent": "672"},
621
+ "1059": {"name": "坚果食品", "parent": "593"},
622
+ "1060": {"name": "广告传媒", "parent": "579"},
623
+ "1061": {"name": "电梯", "parent": "622"},
624
+ "1062": {"name": "社区医疗与卫生院", "parent": "646"},
625
+ "1063": {"name": "广告、印刷包装", "parent": "630"},
626
+ "1064": {"name": "婚纱礼服", "parent": "662"},
627
+ "1065": {"name": "地毯", "parent": "602"},
628
+ "1066": {"name": "互联网物业", "parent": "551"},
629
+ "1067": {"name": "跨境电商", "parent": "3"},
630
+ "1068": {"name": "信息安全、系统集成", "parent": "9"},
631
+ "1069": {"name": "专用汽车制造", "parent": "750"},
632
+ "1070": {"name": "商品贸易", "parent": "3"},
633
+ "1071": {"name": "墙壁装饰材料", "parent": "746"},
634
+ "1072": {"name": "窗帘装饰材料", "parent": "746"},
635
+ "1073": {"name": "电子商务、本地生活服务", "parent": "3"},
636
+ "1075": {"name": "白酒电子商务", "parent": "3"},
637
+ "1076": {"name": "商品贸易、电子商务", "parent": "3"},
638
+ "1077": {"name": "木质装饰材料", "parent": "746"},
639
+ "1078": {"name": "���子商务、汽车电商交易平台", "parent": "3"},
640
+ "1079": {"name": "汽车轮胎", "parent": "751"},
641
+ "1080": {"name": "气体压缩机械制造", "parent": "732"},
642
+ "1081": {"name": "家装家具电子商务", "parent": "3"},
643
+ "1082": {"name": "化妆品电子商务", "parent": "3"},
644
+ "1083": {"name": "汽车销售", "parent": "749"},
645
+ "1084": {"name": "新闻资讯网站", "parent": "510"},
646
+ "1085": {"name": "母婴电商", "parent": "3"},
647
+ "1086": {"name": "电商商务、收藏品交易", "parent": "3"},
648
+ "1088": {"name": "电子商务、数码产品", "parent": "3"},
649
+ "1089": {"name": "二手车交易", "parent": "749"},
650
+ "1090": {"name": "游戏制作服务", "parent": "5"},
651
+ "1091": {"name": "母婴服务", "parent": "510"},
652
+ "1092": {"name": "家具电子商务", "parent": "3"},
653
+ "1093": {"name": "汽车配件电子商务", "parent": "3"},
654
+ "1094": {"name": "输配电设备", "parent": "777"},
655
+ "1095": {"name": "矿山设备", "parent": "727"},
656
+ "1096": {"name": "机床机械", "parent": "726"},
657
+ "1097": {"name": "农产品电商", "parent": "3"},
658
+ "1098": {"name": "陶瓷装饰材料", "parent": "746"},
659
+ "1099": {"name": "车载联网设备", "parent": "487"},
660
+ "1100": {"name": "汽车销售电子商务", "parent": "3"},
661
+ "1101": {"name": "石油设备", "parent": "730"},
662
+ "1102": {"name": "智能家居", "parent": "487"},
663
+ "1103": {"name": "散热器", "parent": "751"},
664
+ "1104": {"name": "电力工程", "parent": "775"},
665
+ "1105": {"name": "生鲜电商", "parent": "3"},
666
+ "1106": {"name": "互联网数据服务", "parent": "490"},
667
+ "1107": {"name": "房车、商务车销售", "parent": "749"},
668
+ "1108": {"name": "茶叶电子商务", "parent": "3"},
669
+ "1109": {"name": "酒类电子商务", "parent": "3"},
670
+ "1110": {"name": "阀门", "parent": "730"},
671
+ "1111": {"name": "食品电商", "parent": "3"},
672
+ "1112": {"name": "儿童摄影", "parent": "871"},
673
+ "1113": {"name": "广告摄影", "parent": "871"},
674
+ "1114": {"name": "婚纱摄影", "parent": "871"},
675
+ "1115": {"name": "模具制造", "parent": "620"},
676
+ "1116": {"name": "汽车模具", "parent": "629"},
677
+ "1117": {"name": "认证咨询", "parent": "567"},
678
+ "1118": {"name": "数字视觉制作服务", "parent": "590"},
679
+ "1119": {"name": "牙科及医疗器械", "parent": "646"},
680
+ "1120": {"name": "猎头招聘", "parent": "560"},
681
+ "1121": {"name": "家居", "parent": "518"},
682
+ "1122": {"name": "收藏品", "parent": "518"},
683
+ "1123": {"name": "首饰", "parent": "518"},
684
+ "1124": {"name": "工艺品", "parent": "518"},
685
+ "1125": {"name": "财务", "parent": "515"},
686
+ "1126": {"name": "税务", "parent": "515"},
687
+ "1127": {"name": "分类信息", "parent": "2"},
688
+ "1128": {"name": "宠物", "parent": "0"},
689
+ "1129": {"name": "快消品", "parent": "518"},
690
+ "1130": {"name": "人工智能", "parent": "2"},
691
+ "1131": {"name": "农/林/牧/渔", "parent": "0"},
692
  }
693
 
694
+
695
  def get_names(id):
696
  id = str(id)
697
  nms = []
698
  d = TBL.get(id)
699
+ if not d:
700
+ return []
701
  nms.append(d["name"])
702
  p = get_names(d["parent"])
703
+ if p:
704
+ nms.extend(p)
705
  return nms
706
 
707
+
708
  if __name__ == "__main__":
709
  print(get_names("1119"))
deepdoc/parser/resume/entities/regions.py CHANGED
@@ -10,766 +10,776 @@
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
 
13
 
14
  TBL = {
15
- "2":{"name":"北京","parent":"1"},
16
- "3":{"name":"天津","parent":"1"},
17
- "4":{"name":"河北","parent":"1"},
18
- "5":{"name":"山西","parent":"1"},
19
- "6":{"name":"内蒙古","parent":"1"},
20
- "7":{"name":"辽宁","parent":"1"},
21
- "8":{"name":"吉林","parent":"1"},
22
- "9":{"name":"黑龙江","parent":"1"},
23
- "10":{"name":"上海","parent":"1"},
24
- "11":{"name":"江苏","parent":"1"},
25
- "12":{"name":"浙江","parent":"1"},
26
- "13":{"name":"安徽","parent":"1"},
27
- "14":{"name":"福建","parent":"1"},
28
- "15":{"name":"江西","parent":"1"},
29
- "16":{"name":"山东","parent":"1"},
30
- "17":{"name":"河南","parent":"1"},
31
- "18":{"name":"湖北","parent":"1"},
32
- "19":{"name":"湖南","parent":"1"},
33
- "20":{"name":"广东","parent":"1"},
34
- "21":{"name":"广西","parent":"1"},
35
- "22":{"name":"海南","parent":"1"},
36
- "23":{"name":"重庆","parent":"1"},
37
- "24":{"name":"四川","parent":"1"},
38
- "25":{"name":"贵州","parent":"1"},
39
- "26":{"name":"云南","parent":"1"},
40
- "27":{"name":"西藏","parent":"1"},
41
- "28":{"name":"陕西","parent":"1"},
42
- "29":{"name":"甘肃","parent":"1"},
43
- "30":{"name":"青海","parent":"1"},
44
- "31":{"name":"宁夏","parent":"1"},
45
- "32":{"name":"新疆","parent":"1"},
46
- "33":{"name":"北京市","parent":"2"},
47
- "34":{"name":"天津市","parent":"3"},
48
- "35":{"name":"石家庄市","parent":"4"},
49
- "36":{"name":"唐山市","parent":"4"},
50
- "37":{"name":"秦皇岛市","parent":"4"},
51
- "38":{"name":"邯郸市","parent":"4"},
52
- "39":{"name":"邢台市","parent":"4"},
53
- "40":{"name":"保定市","parent":"4"},
54
- "41":{"name":"张家口市","parent":"4"},
55
- "42":{"name":"承德市","parent":"4"},
56
- "43":{"name":"沧州市","parent":"4"},
57
- "44":{"name":"廊坊市","parent":"4"},
58
- "45":{"name":"衡水市","parent":"4"},
59
- "46":{"name":"太原市","parent":"5"},
60
- "47":{"name":"大同市","parent":"5"},
61
- "48":{"name":"阳泉市","parent":"5"},
62
- "49":{"name":"长治市","parent":"5"},
63
- "50":{"name":"晋城市","parent":"5"},
64
- "51":{"name":"朔州市","parent":"5"},
65
- "52":{"name":"晋中市","parent":"5"},
66
- "53":{"name":"运城市","parent":"5"},
67
- "54":{"name":"忻州市","parent":"5"},
68
- "55":{"name":"临汾市","parent":"5"},
69
- "56":{"name":"吕梁市","parent":"5"},
70
- "57":{"name":"呼和浩特市","parent":"6"},
71
- "58":{"name":"包头市","parent":"6"},
72
- "59":{"name":"乌海市","parent":"6"},
73
- "60":{"name":"赤峰市","parent":"6"},
74
- "61":{"name":"通辽市","parent":"6"},
75
- "62":{"name":"鄂尔多斯市","parent":"6"},
76
- "63":{"name":"呼伦贝尔市","parent":"6"},
77
- "64":{"name":"巴彦淖尔市","parent":"6"},
78
- "65":{"name":"乌兰察布市","parent":"6"},
79
- "66":{"name":"兴安盟","parent":"6"},
80
- "67":{"name":"锡林郭勒盟","parent":"6"},
81
- "68":{"name":"阿拉善盟","parent":"6"},
82
- "69":{"name":"沈阳市","parent":"7"},
83
- "70":{"name":"大连市","parent":"7"},
84
- "71":{"name":"鞍山市","parent":"7"},
85
- "72":{"name":"抚顺市","parent":"7"},
86
- "73":{"name":"本溪市","parent":"7"},
87
- "74":{"name":"丹东市","parent":"7"},
88
- "75":{"name":"锦州市","parent":"7"},
89
- "76":{"name":"营口市","parent":"7"},
90
- "77":{"name":"阜新市","parent":"7"},
91
- "78":{"name":"辽阳市","parent":"7"},
92
- "79":{"name":"盘锦市","parent":"7"},
93
- "80":{"name":"铁岭市","parent":"7"},
94
- "81":{"name":"朝阳市","parent":"7"},
95
- "82":{"name":"葫芦岛市","parent":"7"},
96
- "83":{"name":"长春市","parent":"8"},
97
- "84":{"name":"吉林市","parent":"8"},
98
- "85":{"name":"四平市","parent":"8"},
99
- "86":{"name":"辽源市","parent":"8"},
100
- "87":{"name":"通化市","parent":"8"},
101
- "88":{"name":"白山市","parent":"8"},
102
- "89":{"name":"松原市","parent":"8"},
103
- "90":{"name":"白城市","parent":"8"},
104
- "91":{"name":"延边朝鲜族自治州","parent":"8"},
105
- "92":{"name":"哈尔滨市","parent":"9"},
106
- "93":{"name":"齐齐哈尔市","parent":"9"},
107
- "94":{"name":"鸡西市","parent":"9"},
108
- "95":{"name":"鹤岗市","parent":"9"},
109
- "96":{"name":"双鸭山市","parent":"9"},
110
- "97":{"name":"大庆市","parent":"9"},
111
- "98":{"name":"伊春市","parent":"9"},
112
- "99":{"name":"佳木斯市","parent":"9"},
113
- "100":{"name":"七台河市","parent":"9"},
114
- "101":{"name":"牡丹江市","parent":"9"},
115
- "102":{"name":"黑河市","parent":"9"},
116
- "103":{"name":"绥化市","parent":"9"},
117
- "104":{"name":"大兴安岭地区","parent":"9"},
118
- "105":{"name":"上海市","parent":"10"},
119
- "106":{"name":"南京市","parent":"11"},
120
- "107":{"name":"无锡市","parent":"11"},
121
- "108":{"name":"徐州市","parent":"11"},
122
- "109":{"name":"常州市","parent":"11"},
123
- "110":{"name":"苏州市","parent":"11"},
124
- "111":{"name":"南通市","parent":"11"},
125
- "112":{"name":"连云港市","parent":"11"},
126
- "113":{"name":"淮安市","parent":"11"},
127
- "114":{"name":"盐城市","parent":"11"},
128
- "115":{"name":"扬州市","parent":"11"},
129
- "116":{"name":"镇江市","parent":"11"},
130
- "117":{"name":"泰州市","parent":"11"},
131
- "118":{"name":"宿迁市","parent":"11"},
132
- "119":{"name":"杭州市","parent":"12"},
133
- "120":{"name":"宁波市","parent":"12"},
134
- "121":{"name":"温州市","parent":"12"},
135
- "122":{"name":"嘉兴市","parent":"12"},
136
- "123":{"name":"湖州市","parent":"12"},
137
- "124":{"name":"绍兴市","parent":"12"},
138
- "125":{"name":"金华市","parent":"12"},
139
- "126":{"name":"衢州市","parent":"12"},
140
- "127":{"name":"舟山市","parent":"12"},
141
- "128":{"name":"台州市","parent":"12"},
142
- "129":{"name":"丽水市","parent":"12"},
143
- "130":{"name":"合肥市","parent":"13"},
144
- "131":{"name":"芜湖市","parent":"13"},
145
- "132":{"name":"蚌埠市","parent":"13"},
146
- "133":{"name":"淮南市","parent":"13"},
147
- "134":{"name":"马鞍山市","parent":"13"},
148
- "135":{"name":"淮北市","parent":"13"},
149
- "136":{"name":"铜陵市","parent":"13"},
150
- "137":{"name":"安庆市","parent":"13"},
151
- "138":{"name":"黄山市","parent":"13"},
152
- "139":{"name":"滁州市","parent":"13"},
153
- "140":{"name":"阜阳市","parent":"13"},
154
- "141":{"name":"宿州市","parent":"13"},
155
- "143":{"name":"六安市","parent":"13"},
156
- "144":{"name":"亳州市","parent":"13"},
157
- "145":{"name":"池州市","parent":"13"},
158
- "146":{"name":"宣城市","parent":"13"},
159
- "147":{"name":"福州市","parent":"14"},
160
- "148":{"name":"厦门市","parent":"14"},
161
- "149":{"name":"莆田市","parent":"14"},
162
- "150":{"name":"三明市","parent":"14"},
163
- "151":{"name":"泉州市","parent":"14"},
164
- "152":{"name":"漳州市","parent":"14"},
165
- "153":{"name":"南平市","parent":"14"},
166
- "154":{"name":"龙岩市","parent":"14"},
167
- "155":{"name":"宁德市","parent":"14"},
168
- "156":{"name":"南昌市","parent":"15"},
169
- "157":{"name":"景德镇市","parent":"15"},
170
- "158":{"name":"萍乡市","parent":"15"},
171
- "159":{"name":"九江市","parent":"15"},
172
- "160":{"name":"新余市","parent":"15"},
173
- "161":{"name":"鹰潭市","parent":"15"},
174
- "162":{"name":"赣州市","parent":"15"},
175
- "163":{"name":"吉安市","parent":"15"},
176
- "164":{"name":"宜春市","parent":"15"},
177
- "165":{"name":"抚州市","parent":"15"},
178
- "166":{"name":"上饶市","parent":"15"},
179
- "167":{"name":"济南市","parent":"16"},
180
- "168":{"name":"青岛市","parent":"16"},
181
- "169":{"name":"淄博市","parent":"16"},
182
- "170":{"name":"枣庄市","parent":"16"},
183
- "171":{"name":"东营市","parent":"16"},
184
- "172":{"name":"烟台市","parent":"16"},
185
- "173":{"name":"潍坊市","parent":"16"},
186
- "174":{"name":"济宁市","parent":"16"},
187
- "175":{"name":"泰安市","parent":"16"},
188
- "176":{"name":"威海市","parent":"16"},
189
- "177":{"name":"日照市","parent":"16"},
190
- "179":{"name":"临沂市","parent":"16"},
191
- "180":{"name":"德州市","parent":"16"},
192
- "181":{"name":"聊城市","parent":"16"},
193
- "182":{"name":"滨州市","parent":"16"},
194
- "183":{"name":"菏泽市","parent":"16"},
195
- "184":{"name":"郑州市","parent":"17"},
196
- "185":{"name":"开封市","parent":"17"},
197
- "186":{"name":"洛阳市","parent":"17"},
198
- "187":{"name":"平顶山市","parent":"17"},
199
- "188":{"name":"安阳市","parent":"17"},
200
- "189":{"name":"鹤壁市","parent":"17"},
201
- "190":{"name":"新乡市","parent":"17"},
202
- "191":{"name":"焦作市","parent":"17"},
203
- "192":{"name":"濮阳市","parent":"17"},
204
- "193":{"name":"许昌市","parent":"17"},
205
- "194":{"name":"漯河市","parent":"17"},
206
- "195":{"name":"三门峡市","parent":"17"},
207
- "196":{"name":"南阳市","parent":"17"},
208
- "197":{"name":"商丘市","parent":"17"},
209
- "198":{"name":"信阳市","parent":"17"},
210
- "199":{"name":"周口市","parent":"17"},
211
- "200":{"name":"驻马店市","parent":"17"},
212
- "201":{"name":"武汉市","parent":"18"},
213
- "202":{"name":"黄石市","parent":"18"},
214
- "203":{"name":"十堰市","parent":"18"},
215
- "204":{"name":"宜昌市","parent":"18"},
216
- "205":{"name":"襄阳市","parent":"18"},
217
- "206":{"name":"鄂州市","parent":"18"},
218
- "207":{"name":"荆门市","parent":"18"},
219
- "208":{"name":"孝感市","parent":"18"},
220
- "209":{"name":"荆州市","parent":"18"},
221
- "210":{"name":"黄冈市","parent":"18"},
222
- "211":{"name":"咸宁市","parent":"18"},
223
- "212":{"name":"随州市","parent":"18"},
224
- "213":{"name":"恩施土家族苗族自治州","parent":"18"},
225
- "215":{"name":"长沙市","parent":"19"},
226
- "216":{"name":"株洲市","parent":"19"},
227
- "217":{"name":"湘潭市","parent":"19"},
228
- "218":{"name":"衡阳市","parent":"19"},
229
- "219":{"name":"邵阳市","parent":"19"},
230
- "220":{"name":"岳阳市","parent":"19"},
231
- "221":{"name":"常德市","parent":"19"},
232
- "222":{"name":"张家界市","parent":"19"},
233
- "223":{"name":"益阳市","parent":"19"},
234
- "224":{"name":"郴州市","parent":"19"},
235
- "225":{"name":"永州市","parent":"19"},
236
- "226":{"name":"怀化市","parent":"19"},
237
- "227":{"name":"娄底市","parent":"19"},
238
- "228":{"name":"湘西土家族苗族自治州","parent":"19"},
239
- "229":{"name":"广州市","parent":"20"},
240
- "230":{"name":"韶关市","parent":"20"},
241
- "231":{"name":"深圳市","parent":"20"},
242
- "232":{"name":"珠海市","parent":"20"},
243
- "233":{"name":"汕头市","parent":"20"},
244
- "234":{"name":"佛山市","parent":"20"},
245
- "235":{"name":"江门市","parent":"20"},
246
- "236":{"name":"湛江市","parent":"20"},
247
- "237":{"name":"茂名市","parent":"20"},
248
- "238":{"name":"肇庆市","parent":"20"},
249
- "239":{"name":"惠州市","parent":"20"},
250
- "240":{"name":"梅州市","parent":"20"},
251
- "241":{"name":"汕尾市","parent":"20"},
252
- "242":{"name":"河源市","parent":"20"},
253
- "243":{"name":"阳江市","parent":"20"},
254
- "244":{"name":"清远市","parent":"20"},
255
- "245":{"name":"东莞市","parent":"20"},
256
- "246":{"name":"中山市","parent":"20"},
257
- "247":{"name":"潮州市","parent":"20"},
258
- "248":{"name":"揭阳市","parent":"20"},
259
- "249":{"name":"云浮市","parent":"20"},
260
- "250":{"name":"南宁市","parent":"21"},
261
- "251":{"name":"柳州市","parent":"21"},
262
- "252":{"name":"桂林市","parent":"21"},
263
- "253":{"name":"梧州市","parent":"21"},
264
- "254":{"name":"北海市","parent":"21"},
265
- "255":{"name":"防城港市","parent":"21"},
266
- "256":{"name":"钦州市","parent":"21"},
267
- "257":{"name":"贵港市","parent":"21"},
268
- "258":{"name":"玉林市","parent":"21"},
269
- "259":{"name":"百色市","parent":"21"},
270
- "260":{"name":"贺州市","parent":"21"},
271
- "261":{"name":"河池市","parent":"21"},
272
- "262":{"name":"来宾市","parent":"21"},
273
- "263":{"name":"崇左市","parent":"21"},
274
- "264":{"name":"海口市","parent":"22"},
275
- "265":{"name":"三亚市","parent":"22"},
276
- "267":{"name":"重庆市","parent":"23"},
277
- "268":{"name":"成都市","parent":"24"},
278
- "269":{"name":"自贡市","parent":"24"},
279
- "270":{"name":"攀枝花市","parent":"24"},
280
- "271":{"name":"泸州市","parent":"24"},
281
- "272":{"name":"德阳市","parent":"24"},
282
- "273":{"name":"绵阳市","parent":"24"},
283
- "274":{"name":"广元市","parent":"24"},
284
- "275":{"name":"遂宁市","parent":"24"},
285
- "276":{"name":"内江市","parent":"24"},
286
- "277":{"name":"乐山市","parent":"24"},
287
- "278":{"name":"南充市","parent":"24"},
288
- "279":{"name":"眉山市","parent":"24"},
289
- "280":{"name":"宜宾市","parent":"24"},
290
- "281":{"name":"广安市","parent":"24"},
291
- "282":{"name":"达州市","parent":"24"},
292
- "283":{"name":"雅安市","parent":"24"},
293
- "284":{"name":"巴中市","parent":"24"},
294
- "285":{"name":"资阳市","parent":"24"},
295
- "286":{"name":"阿坝藏族羌族自治州","parent":"24"},
296
- "287":{"name":"甘孜藏族自治州","parent":"24"},
297
- "288":{"name":"凉山彝族自治州","parent":"24"},
298
- "289":{"name":"贵阳市","parent":"25"},
299
- "290":{"name":"六盘水市","parent":"25"},
300
- "291":{"name":"遵义市","parent":"25"},
301
- "292":{"name":"安顺市","parent":"25"},
302
- "293":{"name":"铜仁市","parent":"25"},
303
- "294":{"name":"黔西南布依族苗族自治州","parent":"25"},
304
- "295":{"name":"毕节市","parent":"25"},
305
- "296":{"name":"黔东南苗族侗族自治州","parent":"25"},
306
- "297":{"name":"黔南布依族苗族自治州","parent":"25"},
307
- "298":{"name":"昆明市","parent":"26"},
308
- "299":{"name":"曲靖市","parent":"26"},
309
- "300":{"name":"玉溪市","parent":"26"},
310
- "301":{"name":"保山市","parent":"26"},
311
- "302":{"name":"昭通市","parent":"26"},
312
- "303":{"name":"丽江市","parent":"26"},
313
- "304":{"name":"普洱市","parent":"26"},
314
- "305":{"name":"临沧市","parent":"26"},
315
- "306":{"name":"楚雄彝族自治州","parent":"26"},
316
- "307":{"name":"红河哈尼族彝族自治州","parent":"26"},
317
- "308":{"name":"文山壮族苗族自治州","parent":"26"},
318
- "309":{"name":"西双版纳傣族自治州","parent":"26"},
319
- "310":{"name":"大理白族自治州","parent":"26"},
320
- "311":{"name":"德宏傣族景颇族自治州","parent":"26"},
321
- "312":{"name":"怒江傈僳族自治州","parent":"26"},
322
- "313":{"name":"迪庆藏族自治州","parent":"26"},
323
- "314":{"name":"拉萨市","parent":"27"},
324
- "315":{"name":"昌都市","parent":"27"},
325
- "316":{"name":"山南市","parent":"27"},
326
- "317":{"name":"日喀则市","parent":"27"},
327
- "318":{"name":"那曲市","parent":"27"},
328
- "319":{"name":"阿里地区","parent":"27"},
329
- "320":{"name":"林芝市","parent":"27"},
330
- "321":{"name":"西安市","parent":"28"},
331
- "322":{"name":"铜川市","parent":"28"},
332
- "323":{"name":"宝鸡市","parent":"28"},
333
- "324":{"name":"咸阳市","parent":"28"},
334
- "325":{"name":"渭南市","parent":"28"},
335
- "326":{"name":"延安市","parent":"28"},
336
- "327":{"name":"汉中市","parent":"28"},
337
- "328":{"name":"榆林市","parent":"28"},
338
- "329":{"name":"安康市","parent":"28"},
339
- "330":{"name":"商洛市","parent":"28"},
340
- "331":{"name":"兰州市","parent":"29"},
341
- "332":{"name":"嘉峪关市","parent":"29"},
342
- "333":{"name":"金昌市","parent":"29"},
343
- "334":{"name":"白银市","parent":"29"},
344
- "335":{"name":"天水市","parent":"29"},
345
- "336":{"name":"武威市","parent":"29"},
346
- "337":{"name":"张掖市","parent":"29"},
347
- "338":{"name":"平凉市","parent":"29"},
348
- "339":{"name":"酒泉市","parent":"29"},
349
- "340":{"name":"庆阳市","parent":"29"},
350
- "341":{"name":"定西市","parent":"29"},
351
- "342":{"name":"陇南市","parent":"29"},
352
- "343":{"name":"临夏回族自治州","parent":"29"},
353
- "344":{"name":"甘南藏族自治州","parent":"29"},
354
- "345":{"name":"西宁市","parent":"30"},
355
- "346":{"name":"海东市","parent":"30"},
356
- "347":{"name":"海北藏族自治州","parent":"30"},
357
- "348":{"name":"黄南藏族自治州","parent":"30"},
358
- "349":{"name":"海南藏族自治州","parent":"30"},
359
- "350":{"name":"果洛藏族自治州","parent":"30"},
360
- "351":{"name":"玉树藏族自治州","parent":"30"},
361
- "352":{"name":"海西蒙古族藏族自治州","parent":"30"},
362
- "353":{"name":"银川市","parent":"31"},
363
- "354":{"name":"石嘴山市","parent":"31"},
364
- "355":{"name":"吴忠市","parent":"31"},
365
- "356":{"name":"固原市","parent":"31"},
366
- "357":{"name":"中卫市","parent":"31"},
367
- "358":{"name":"乌鲁木齐市","parent":"32"},
368
- "359":{"name":"克拉玛依市","parent":"32"},
369
- "360":{"name":"吐鲁番市","parent":"32"},
370
- "361":{"name":"哈密市","parent":"32"},
371
- "362":{"name":"昌吉回族自治州","parent":"32"},
372
- "363":{"name":"博尔塔拉蒙古自治州","parent":"32"},
373
- "364":{"name":"巴音郭楞蒙古自治州","parent":"32"},
374
- "365":{"name":"阿克苏地区","parent":"32"},
375
- "366":{"name":"克孜勒苏柯尔克孜自治州","parent":"32"},
376
- "367":{"name":"喀什地区","parent":"32"},
377
- "368":{"name":"和田地区","parent":"32"},
378
- "369":{"name":"伊犁哈萨克自治州","parent":"32"},
379
- "370":{"name":"塔城地区","parent":"32"},
380
- "371":{"name":"阿勒泰地区","parent":"32"},
381
- "372":{"name":"新疆省直辖行政单位","parent":"32"},
382
- "373":{"name":"可克达拉市","parent":"32"},
383
- "374":{"name":"昆玉市","parent":"32"},
384
- "375":{"name":"胡杨河市","parent":"32"},
385
- "376":{"name":"双河市","parent":"32"},
386
- "3560":{"name":"北票市","parent":"7"},
387
- "3615":{"name":"高州市","parent":"20"},
388
- "3651":{"name":"济源市","parent":"17"},
389
- "3662":{"name":"胶南市","parent":"16"},
390
- "3683":{"name":"老河口市","parent":"18"},
391
- "3758":{"name":"沙河市","parent":"4"},
392
- "3822":{"name":"宜城市","parent":"18"},
393
- "3842":{"name":"枣阳市","parent":"18"},
394
- "3850":{"name":"肇东市","parent":"9"},
395
- "3905":{"name":"澳门","parent":"1"},
396
- "3906":{"name":"澳门","parent":"3905"},
397
- "3907":{"name":"香港","parent":"1"},
398
- "3908":{"name":"香港","parent":"3907"},
399
- "3947":{"name":"仙桃市","parent":"18"},
400
- "3954":{"name":"台湾","parent":"1"},
401
- "3955":{"name":"台湾","parent":"3954"},
402
- "3956":{"name":"海外","parent":"1"},
403
- "3957":{"name":"海外","parent":"3956"},
404
- "3958":{"name":"美国","parent":"3956"},
405
- "3959":{"name":"加拿大","parent":"3956"},
406
- "3961":{"name":"日本","parent":"3956"},
407
- "3962":{"name":"韩国","parent":"3956"},
408
- "3963":{"name":"德国","parent":"3956"},
409
- "3964":{"name":"英国","parent":"3956"},
410
- "3965":{"name":"意大利","parent":"3956"},
411
- "3966":{"name":"西班牙","parent":"3956"},
412
- "3967":{"name":"法国","parent":"3956"},
413
- "3968":{"name":"澳大利亚","parent":"3956"},
414
- "3969":{"name":"东城区","parent":"2"},
415
- "3970":{"name":"西城区","parent":"2"},
416
- "3971":{"name":"崇文区","parent":"2"},
417
- "3972":{"name":"宣武区","parent":"2"},
418
- "3973":{"name":"朝阳区","parent":"2"},
419
- "3974":{"name":"海淀区","parent":"2"},
420
- "3975":{"name":"丰台区","parent":"2"},
421
- "3976":{"name":"石景山区","parent":"2"},
422
- "3977":{"name":"门头沟区","parent":"2"},
423
- "3978":{"name":"房山区","parent":"2"},
424
- "3979":{"name":"通州区","parent":"2"},
425
- "3980":{"name":"顺义区","parent":"2"},
426
- "3981":{"name":"昌平区","parent":"2"},
427
- "3982":{"name":"大兴区","parent":"2"},
428
- "3983":{"name":"平谷区","parent":"2"},
429
- "3984":{"name":"怀柔区","parent":"2"},
430
- "3985":{"name":"密云区","parent":"2"},
431
- "3986":{"name":"延庆区","parent":"2"},
432
- "3987":{"name":"黄浦区","parent":"10"},
433
- "3988":{"name":"徐汇区","parent":"10"},
434
- "3989":{"name":"长宁区","parent":"10"},
435
- "3990":{"name":"静安区","parent":"10"},
436
- "3991":{"name":"普陀区","parent":"10"},
437
- "3992":{"name":"闸北区","parent":"10"},
438
- "3993":{"name":"虹口区","parent":"10"},
439
- "3994":{"name":"杨浦区","parent":"10"},
440
- "3995":{"name":"宝山区","parent":"10"},
441
- "3996":{"name":"闵行区","parent":"10"},
442
- "3997":{"name":"嘉定区","parent":"10"},
443
- "3998":{"name":"浦东新区","parent":"10"},
444
- "3999":{"name":"松江区","parent":"10"},
445
- "4000":{"name":"金山区","parent":"10"},
446
- "4001":{"name":"青浦区","parent":"10"},
447
- "4002":{"name":"奉贤区","parent":"10"},
448
- "4003":{"name":"崇明区","parent":"10"},
449
- "4004":{"name":"和平区","parent":"3"},
450
- "4005":{"name":"河东区","parent":"3"},
451
- "4006":{"name":"河西区","parent":"3"},
452
- "4007":{"name":"南开区","parent":"3"},
453
- "4008":{"name":"红桥区","parent":"3"},
454
- "4009":{"name":"河北区","parent":"3"},
455
- "4010":{"name":"滨海新区","parent":"3"},
456
- "4011":{"name":"东丽区","parent":"3"},
457
- "4012":{"name":"西青区","parent":"3"},
458
- "4013":{"name":"北辰区","parent":"3"},
459
- "4014":{"name":"津南区","parent":"3"},
460
- "4015":{"name":"武清区","parent":"3"},
461
- "4016":{"name":"宝坻区","parent":"3"},
462
- "4017":{"name":"静海区","parent":"3"},
463
- "4018":{"name":"宁河区","parent":"3"},
464
- "4019":{"name":"蓟州区","parent":"3"},
465
- "4020":{"name":"渝中区","parent":"23"},
466
- "4021":{"name":"江北区","parent":"23"},
467
- "4022":{"name":"南岸区","parent":"23"},
468
- "4023":{"name":"沙坪坝区","parent":"23"},
469
- "4024":{"name":"九龙坡区","parent":"23"},
470
- "4025":{"name":"大渡口区","parent":"23"},
471
- "4026":{"name":"渝北区","parent":"23"},
472
- "4027":{"name":"巴南区","parent":"23"},
473
- "4028":{"name":"北碚区","parent":"23"},
474
- "4029":{"name":"万州区","parent":"23"},
475
- "4030":{"name":"黔江区","parent":"23"},
476
- "4031":{"name":"永川区","parent":"23"},
477
- "4032":{"name":"涪���区","parent":"23"},
478
- "4033":{"name":"江津区","parent":"23"},
479
- "4034":{"name":"合川区","parent":"23"},
480
- "4035":{"name":"双桥区","parent":"23"},
481
- "4036":{"name":"万盛区","parent":"23"},
482
- "4037":{"name":"荣昌区","parent":"23"},
483
- "4038":{"name":"大足区","parent":"23"},
484
- "4039":{"name":"璧山区","parent":"23"},
485
- "4040":{"name":"铜梁区","parent":"23"},
486
- "4041":{"name":"潼南区","parent":"23"},
487
- "4042":{"name":"綦江区","parent":"23"},
488
- "4043":{"name":"忠县","parent":"23"},
489
- "4044":{"name":"开州区","parent":"23"},
490
- "4045":{"name":"云阳县","parent":"23"},
491
- "4046":{"name":"梁平区","parent":"23"},
492
- "4047":{"name":"垫江县","parent":"23"},
493
- "4048":{"name":"丰都县","parent":"23"},
494
- "4049":{"name":"奉节县","parent":"23"},
495
- "4050":{"name":"巫山县","parent":"23"},
496
- "4051":{"name":"巫溪县","parent":"23"},
497
- "4052":{"name":"城口县","parent":"23"},
498
- "4053":{"name":"武隆区","parent":"23"},
499
- "4054":{"name":"石柱土家族自治县","parent":"23"},
500
- "4055":{"name":"秀山土家族苗族自治县","parent":"23"},
501
- "4056":{"name":"酉阳土家族苗族自治县","parent":"23"},
502
- "4057":{"name":"彭水苗族土家族自治县","parent":"23"},
503
- "4058":{"name":"潜江市","parent":"18"},
504
- "4059":{"name":"三沙市","parent":"22"},
505
- "4060":{"name":"石河子市","parent":"32"},
506
- "4061":{"name":"阿拉尔市","parent":"32"},
507
- "4062":{"name":"图木舒克市","parent":"32"},
508
- "4063":{"name":"五家渠市","parent":"32"},
509
- "4064":{"name":"北屯市","parent":"32"},
510
- "4065":{"name":"铁门关市","parent":"32"},
511
- "4066":{"name":"儋州市","parent":"22"},
512
- "4067":{"name":"五指山市","parent":"22"},
513
- "4068":{"name":"文昌市","parent":"22"},
514
- "4069":{"name":"琼海市","parent":"22"},
515
- "4070":{"name":"万宁市","parent":"22"},
516
- "4072":{"name":"定安县","parent":"22"},
517
- "4073":{"name":"屯昌县","parent":"22"},
518
- "4074":{"name":"澄迈县","parent":"22"},
519
- "4075":{"name":"临高县","parent":"22"},
520
- "4076":{"name":"琼中黎族苗族自治县","parent":"22"},
521
- "4077":{"name":"保亭黎族苗族自治县","parent":"22"},
522
- "4078":{"name":"白沙黎族自治县","parent":"22"},
523
- "4079":{"name":"昌江黎族自治县","parent":"22"},
524
- "4080":{"name":"乐东黎族自治县","parent":"22"},
525
- "4081":{"name":"陵水黎族自治县","parent":"22"},
526
- "4082":{"name":"马来西亚","parent":"3956"},
527
- "6047":{"name":"长寿区","parent":"23"},
528
- "6857":{"name":"阿富汗","parent":"3956"},
529
- "6858":{"name":"阿尔巴尼亚","parent":"3956"},
530
- "6859":{"name":"阿尔及利亚","parent":"3956"},
531
- "6860":{"name":"美属萨摩亚","parent":"3956"},
532
- "6861":{"name":"安道尔","parent":"3956"},
533
- "6862":{"name":"安哥拉","parent":"3956"},
534
- "6863":{"name":"安圭拉","parent":"3956"},
535
- "6864":{"name":"南极洲","parent":"3956"},
536
- "6865":{"name":"安提瓜和巴布达","parent":"3956"},
537
- "6866":{"name":"阿根廷","parent":"3956"},
538
- "6867":{"name":"亚美尼亚","parent":"3956"},
539
- "6869":{"name":"奥地利","parent":"3956"},
540
- "6870":{"name":"阿塞拜疆","parent":"3956"},
541
- "6871":{"name":"巴哈马","parent":"3956"},
542
- "6872":{"name":"巴林","parent":"3956"},
543
- "6873":{"name":"孟加拉国","parent":"3956"},
544
- "6874":{"name":"巴巴多斯","parent":"3956"},
545
- "6875":{"name":"白俄罗斯","parent":"3956"},
546
- "6876":{"name":"比利时","parent":"3956"},
547
- "6877":{"name":"伯利兹","parent":"3956"},
548
- "6878":{"name":"贝宁","parent":"3956"},
549
- "6879":{"name":"百慕大","parent":"3956"},
550
- "6880":{"name":"不丹","parent":"3956"},
551
- "6881":{"name":"玻利维亚","parent":"3956"},
552
- "6882":{"name":"波黑","parent":"3956"},
553
- "6883":{"name":"博茨瓦纳","parent":"3956"},
554
- "6884":{"name":"布维岛","parent":"3956"},
555
- "6885":{"name":"巴西","parent":"3956"},
556
- "6886":{"name":"英属印度洋领土","parent":"3956"},
557
- "6887":{"name":"文莱","parent":"3956"},
558
- "6888":{"name":"保加利亚","parent":"3956"},
559
- "6889":{"name":"布基纳法索","parent":"3956"},
560
- "6890":{"name":"布隆迪","parent":"3956"},
561
- "6891":{"name":"柬埔寨","parent":"3956"},
562
- "6892":{"name":"喀麦隆","parent":"3956"},
563
- "6893":{"name":"佛得角","parent":"3956"},
564
- "6894":{"name":"开曼群岛","parent":"3956"},
565
- "6895":{"name":"中非","parent":"3956"},
566
- "6896":{"name":"乍得","parent":"3956"},
567
- "6897":{"name":"智利","parent":"3956"},
568
- "6898":{"name":"圣诞岛","parent":"3956"},
569
- "6899":{"name":"科科斯(基林)群岛","parent":"3956"},
570
- "6900":{"name":"哥伦比亚","parent":"3956"},
571
- "6901":{"name":"科摩罗","parent":"3956"},
572
- "6902":{"name":"刚果(布)","parent":"3956"},
573
- "6903":{"name":"刚果(金)","parent":"3956"},
574
- "6904":{"name":"库克群岛","parent":"3956"},
575
- "6905":{"name":"哥斯达黎加","parent":"3956"},
576
- "6906":{"name":"科特迪瓦","parent":"3956"},
577
- "6907":{"name":"克罗地亚","parent":"3956"},
578
- "6908":{"name":"古巴","parent":"3956"},
579
- "6909":{"name":"塞浦路斯","parent":"3956"},
580
- "6910":{"name":"捷克","parent":"3956"},
581
- "6911":{"name":"丹麦","parent":"3956"},
582
- "6912":{"name":"吉布提","parent":"3956"},
583
- "6913":{"name":"多米尼克","parent":"3956"},
584
- "6914":{"name":"多米尼加共和国","parent":"3956"},
585
- "6915":{"name":"东帝汶","parent":"3956"},
586
- "6916":{"name":"厄瓜多尔","parent":"3956"},
587
- "6917":{"name":"埃及","parent":"3956"},
588
- "6918":{"name":"萨尔瓦多","parent":"3956"},
589
- "6919":{"name":"赤道几内亚","parent":"3956"},
590
- "6920":{"name":"厄立特里亚","parent":"3956"},
591
- "6921":{"name":"爱沙尼亚","parent":"3956"},
592
- "6922":{"name":"埃塞俄比亚","parent":"3956"},
593
- "6923":{"name":"福克兰群岛(马尔维纳斯)","parent":"3956"},
594
- "6924":{"name":"法罗群岛","parent":"3956"},
595
- "6925":{"name":"斐济","parent":"3956"},
596
- "6926":{"name":"芬兰","parent":"3956"},
597
- "6927":{"name":"法属圭亚那","parent":"3956"},
598
- "6928":{"name":"法属波利尼西亚","parent":"3956"},
599
- "6929":{"name":"法属南部领土","parent":"3956"},
600
- "6930":{"name":"加蓬","parent":"3956"},
601
- "6931":{"name":"冈比亚","parent":"3956"},
602
- "6932":{"name":"格鲁吉亚","parent":"3956"},
603
- "6933":{"name":"加纳","parent":"3956"},
604
- "6934":{"name":"直布罗陀","parent":"3956"},
605
- "6935":{"name":"希腊","parent":"3956"},
606
- "6936":{"name":"格陵兰","parent":"3956"},
607
- "6937":{"name":"格林纳达","parent":"3956"},
608
- "6938":{"name":"瓜德罗普","parent":"3956"},
609
- "6939":{"name":"关岛","parent":"3956"},
610
- "6940":{"name":"危地马拉","parent":"3956"},
611
- "6941":{"name":"几内亚","parent":"3956"},
612
- "6942":{"name":"几内亚比绍","parent":"3956"},
613
- "6943":{"name":"圭亚那","parent":"3956"},
614
- "6944":{"name":"海地","parent":"3956"},
615
- "6945":{"name":"赫德岛和麦克唐纳岛","parent":"3956"},
616
- "6946":{"name":"洪都拉斯","parent":"3956"},
617
- "6947":{"name":"匈牙利","parent":"3956"},
618
- "6948":{"name":"冰岛","parent":"3956"},
619
- "6949":{"name":"印度","parent":"3956"},
620
- "6950":{"name":"印度尼西亚","parent":"3956"},
621
- "6951":{"name":"伊朗","parent":"3956"},
622
- "6952":{"name":"伊拉克","parent":"3956"},
623
- "6953":{"name":"爱尔兰","parent":"3956"},
624
- "6954":{"name":"以色列","parent":"3956"},
625
- "6955":{"name":"牙买加","parent":"3956"},
626
- "6956":{"name":"约旦","parent":"3956"},
627
- "6957":{"name":"哈萨克斯坦","parent":"3956"},
628
- "6958":{"name":"肯尼亚","parent":"3956"},
629
- "6959":{"name":"基里巴斯","parent":"3956"},
630
- "6960":{"name":"朝鲜","parent":"3956"},
631
- "6961":{"name":"科威特","parent":"3956"},
632
- "6962":{"name":"吉尔吉斯斯坦","parent":"3956"},
633
- "6963":{"name":"老挝","parent":"3956"},
634
- "6964":{"name":"拉脱维亚","parent":"3956"},
635
- "6965":{"name":"黎巴嫩","parent":"3956"},
636
- "6966":{"name":"莱索托","parent":"3956"},
637
- "6967":{"name":"利比里亚","parent":"3956"},
638
- "6968":{"name":"利比亚","parent":"3956"},
639
- "6969":{"name":"列支敦士登","parent":"3956"},
640
- "6970":{"name":"立陶宛","parent":"3956"},
641
- "6971":{"name":"卢森堡","parent":"3956"},
642
- "6972":{"name":"前南马其顿","parent":"3956"},
643
- "6973":{"name":"马达加斯加","parent":"3956"},
644
- "6974":{"name":"马拉维","parent":"3956"},
645
- "6975":{"name":"马尔代夫","parent":"3956"},
646
- "6976":{"name":"马里","parent":"3956"},
647
- "6977":{"name":"马耳他","parent":"3956"},
648
- "6978":{"name":"马绍尔群岛","parent":"3956"},
649
- "6979":{"name":"马提尼克","parent":"3956"},
650
- "6980":{"name":"毛里塔尼亚","parent":"3956"},
651
- "6981":{"name":"毛里求斯","parent":"3956"},
652
- "6982":{"name":"马约特","parent":"3956"},
653
- "6983":{"name":"墨西哥","parent":"3956"},
654
- "6984":{"name":"密克罗尼西亚联邦","parent":"3956"},
655
- "6985":{"name":"摩尔多瓦","parent":"3956"},
656
- "6986":{"name":"摩纳哥","parent":"3956"},
657
- "6987":{"name":"蒙古","parent":"3956"},
658
- "6988":{"name":"蒙特塞拉特","parent":"3956"},
659
- "6989":{"name":"摩洛哥","parent":"3956"},
660
- "6990":{"name":"莫桑比克","parent":"3956"},
661
- "6991":{"name":"缅甸","parent":"3956"},
662
- "6992":{"name":"纳米比亚","parent":"3956"},
663
- "6993":{"name":"瑙鲁","parent":"3956"},
664
- "6994":{"name":"尼泊尔","parent":"3956"},
665
- "6995":{"name":"荷兰","parent":"3956"},
666
- "6996":{"name":"荷属安的列斯","parent":"3956"},
667
- "6997":{"name":"新喀里多尼亚","parent":"3956"},
668
- "6998":{"name":"新西兰","parent":"3956"},
669
- "6999":{"name":"尼加拉瓜","parent":"3956"},
670
- "7000":{"name":"尼日尔","parent":"3956"},
671
- "7001":{"name":"尼日利亚","parent":"3956"},
672
- "7002":{"name":"纽埃","parent":"3956"},
673
- "7003":{"name":"诺福克岛","parent":"3956"},
674
- "7004":{"name":"北马里亚纳","parent":"3956"},
675
- "7005":{"name":"挪威","parent":"3956"},
676
- "7006":{"name":"阿曼","parent":"3956"},
677
- "7007":{"name":"巴基斯坦","parent":"3956"},
678
- "7008":{"name":"帕劳","parent":"3956"},
679
- "7009":{"name":"巴勒斯坦","parent":"3956"},
680
- "7010":{"name":"巴拿马","parent":"3956"},
681
- "7011":{"name":"巴布亚新几内亚","parent":"3956"},
682
- "7012":{"name":"巴拉圭","parent":"3956"},
683
- "7013":{"name":"秘鲁","parent":"3956"},
684
- "7014":{"name":"菲律宾","parent":"3956"},
685
- "7015":{"name":"皮特凯恩群岛","parent":"3956"},
686
- "7016":{"name":"波兰","parent":"3956"},
687
- "7017":{"name":"葡萄牙","parent":"3956"},
688
- "7018":{"name":"波多黎各","parent":"3956"},
689
- "7019":{"name":"卡塔尔","parent":"3956"},
690
- "7020":{"name":"留尼汪","parent":"3956"},
691
- "7021":{"name":"罗马尼亚","parent":"3956"},
692
- "7022":{"name":"俄罗斯联邦","parent":"3956"},
693
- "7023":{"name":"卢旺达","parent":"3956"},
694
- "7024":{"name":"圣赫勒拿","parent":"3956"},
695
- "7025":{"name":"圣基茨和尼维斯","parent":"3956"},
696
- "7026":{"name":"圣卢西亚","parent":"3956"},
697
- "7027":{"name":"圣皮埃尔和密克隆","parent":"3956"},
698
- "7028":{"name":"圣文森特和格林纳丁斯","parent":"3956"},
699
- "7029":{"name":"萨摩亚","parent":"3956"},
700
- "7030":{"name":"圣马力诺","parent":"3956"},
701
- "7031":{"name":"圣多美和普林西比","parent":"3956"},
702
- "7032":{"name":"沙特阿拉伯","parent":"3956"},
703
- "7033":{"name":"塞内加尔","parent":"3956"},
704
- "7034":{"name":"塞舌尔","parent":"3956"},
705
- "7035":{"name":"塞拉利昂","parent":"3956"},
706
- "7036":{"name":"新加坡","parent":"3956"},
707
- "7037":{"name":"斯洛伐克","parent":"3956"},
708
- "7038":{"name":"斯洛文尼亚","parent":"3956"},
709
- "7039":{"name":"所罗门群岛","parent":"3956"},
710
- "7040":{"name":"索马里","parent":"3956"},
711
- "7041":{"name":"南非","parent":"3956"},
712
- "7042":{"name":"南乔治亚岛和南桑德韦奇岛","parent":"3956"},
713
- "7043":{"name":"斯里兰卡","parent":"3956"},
714
- "7044":{"name":"苏丹","parent":"3956"},
715
- "7045":{"name":"苏里南","parent":"3956"},
716
- "7046":{"name":"斯瓦尔巴群岛","parent":"3956"},
717
- "7047":{"name":"斯威士兰","parent":"3956"},
718
- "7048":{"name":"瑞典","parent":"3956"},
719
- "7049":{"name":"瑞士","parent":"3956"},
720
- "7050":{"name":"叙利亚","parent":"3956"},
721
- "7051":{"name":"塔吉克斯坦","parent":"3956"},
722
- "7052":{"name":"坦桑尼亚","parent":"3956"},
723
- "7053":{"name":"泰国","parent":"3956"},
724
- "7054":{"name":"多哥","parent":"3956"},
725
- "7055":{"name":"托克劳","parent":"3956"},
726
- "7056":{"name":"汤加","parent":"3956"},
727
- "7057":{"name":"特立尼达和多巴哥","parent":"3956"},
728
- "7058":{"name":"突尼斯","parent":"3956"},
729
- "7059":{"name":"土耳其","parent":"3956"},
730
- "7060":{"name":"土库曼斯坦","parent":"3956"},
731
- "7061":{"name":"特克斯科斯群岛","parent":"3956"},
732
- "7062":{"name":"图瓦卢","parent":"3956"},
733
- "7063":{"name":"乌干达","parent":"3956"},
734
- "7064":{"name":"乌克兰","parent":"3956"},
735
- "7065":{"name":"阿联酋","parent":"3956"},
736
- "7066":{"name":"美国本土外小岛屿","parent":"3956"},
737
- "7067":{"name":"乌拉圭","parent":"3956"},
738
- "7068":{"name":"乌兹别克斯坦","parent":"3956"},
739
- "7069":{"name":"瓦努阿图","parent":"3956"},
740
- "7070":{"name":"梵蒂冈","parent":"3956"},
741
- "7071":{"name":"委内瑞拉","parent":"3956"},
742
- "7072":{"name":"越南","parent":"3956"},
743
- "7073":{"name":"英属维尔京群岛","parent":"3956"},
744
- "7074":{"name":"美属维尔京群岛","parent":"3956"},
745
- "7075":{"name":"瓦利斯和富图纳","parent":"3956"},
746
- "7076":{"name":"西撒哈拉","parent":"3956"},
747
- "7077":{"name":"也门","parent":"3956"},
748
- "7078":{"name":"南斯拉夫","parent":"3956"},
749
- "7079":{"name":"赞比亚","parent":"3956"},
750
- "7080":{"name":"津巴布韦","parent":"3956"},
751
- "7081":{"name":"塞尔维亚","parent":"3956"},
752
- "7082":{"name":"雄安新区","parent":"4"},
753
- "7084":{"name":"天门市","parent":"18"}
754
  }
755
 
756
- NM_SET = set([v["name"] for _,v in TBL.items()])
 
757
 
758
  def get_names(id):
759
- if not id or str(id).lower() == "none":return []
 
760
  id = str(id)
761
- if not re.match("[0-9]+$", id.strip()):return [id]
 
762
  nms = []
763
  d = TBL.get(id)
764
- if not d:return[]
 
765
  nms.append(d["name"])
766
  p = get_names(d["parent"])
767
- if p: nms.extend(p)
 
768
  return nms
769
 
770
- import re
 
771
  def isName(nm):
772
- if nm in NM_SET:return True
773
- if nm + "市" in NM_SET:return True
774
- if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:return True
 
 
 
775
  return False
 
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
+ import re
14
 
15
  TBL = {
16
+ "2": {"name": "北京", "parent": "1"},
17
+ "3": {"name": "天津", "parent": "1"},
18
+ "4": {"name": "河北", "parent": "1"},
19
+ "5": {"name": "山西", "parent": "1"},
20
+ "6": {"name": "内蒙古", "parent": "1"},
21
+ "7": {"name": "辽宁", "parent": "1"},
22
+ "8": {"name": "吉林", "parent": "1"},
23
+ "9": {"name": "黑龙江", "parent": "1"},
24
+ "10": {"name": "上海", "parent": "1"},
25
+ "11": {"name": "江苏", "parent": "1"},
26
+ "12": {"name": "浙江", "parent": "1"},
27
+ "13": {"name": "安徽", "parent": "1"},
28
+ "14": {"name": "福建", "parent": "1"},
29
+ "15": {"name": "江西", "parent": "1"},
30
+ "16": {"name": "山东", "parent": "1"},
31
+ "17": {"name": "河南", "parent": "1"},
32
+ "18": {"name": "湖北", "parent": "1"},
33
+ "19": {"name": "湖南", "parent": "1"},
34
+ "20": {"name": "广东", "parent": "1"},
35
+ "21": {"name": "广西", "parent": "1"},
36
+ "22": {"name": "海南", "parent": "1"},
37
+ "23": {"name": "重庆", "parent": "1"},
38
+ "24": {"name": "四川", "parent": "1"},
39
+ "25": {"name": "贵州", "parent": "1"},
40
+ "26": {"name": "云南", "parent": "1"},
41
+ "27": {"name": "西藏", "parent": "1"},
42
+ "28": {"name": "陕西", "parent": "1"},
43
+ "29": {"name": "甘肃", "parent": "1"},
44
+ "30": {"name": "青海", "parent": "1"},
45
+ "31": {"name": "宁夏", "parent": "1"},
46
+ "32": {"name": "新疆", "parent": "1"},
47
+ "33": {"name": "北京市", "parent": "2"},
48
+ "34": {"name": "天津市", "parent": "3"},
49
+ "35": {"name": "石家庄市", "parent": "4"},
50
+ "36": {"name": "唐山市", "parent": "4"},
51
+ "37": {"name": "秦皇岛市", "parent": "4"},
52
+ "38": {"name": "邯郸市", "parent": "4"},
53
+ "39": {"name": "邢台市", "parent": "4"},
54
+ "40": {"name": "保定市", "parent": "4"},
55
+ "41": {"name": "张家口市", "parent": "4"},
56
+ "42": {"name": "承德市", "parent": "4"},
57
+ "43": {"name": "沧州市", "parent": "4"},
58
+ "44": {"name": "廊��市", "parent": "4"},
59
+ "45": {"name": "衡水市", "parent": "4"},
60
+ "46": {"name": "太原市", "parent": "5"},
61
+ "47": {"name": "大同市", "parent": "5"},
62
+ "48": {"name": "阳泉市", "parent": "5"},
63
+ "49": {"name": "长治市", "parent": "5"},
64
+ "50": {"name": "晋城市", "parent": "5"},
65
+ "51": {"name": "朔州市", "parent": "5"},
66
+ "52": {"name": "晋中市", "parent": "5"},
67
+ "53": {"name": "运城市", "parent": "5"},
68
+ "54": {"name": "忻州市", "parent": "5"},
69
+ "55": {"name": "临汾市", "parent": "5"},
70
+ "56": {"name": "吕梁市", "parent": "5"},
71
+ "57": {"name": "呼和浩特市", "parent": "6"},
72
+ "58": {"name": "包头市", "parent": "6"},
73
+ "59": {"name": "乌海市", "parent": "6"},
74
+ "60": {"name": "赤峰市", "parent": "6"},
75
+ "61": {"name": "通辽市", "parent": "6"},
76
+ "62": {"name": "鄂尔多斯市", "parent": "6"},
77
+ "63": {"name": "呼伦贝尔市", "parent": "6"},
78
+ "64": {"name": "巴彦淖尔市", "parent": "6"},
79
+ "65": {"name": "乌兰察布市", "parent": "6"},
80
+ "66": {"name": "兴安盟", "parent": "6"},
81
+ "67": {"name": "锡林郭勒盟", "parent": "6"},
82
+ "68": {"name": "阿拉善盟", "parent": "6"},
83
+ "69": {"name": "沈阳市", "parent": "7"},
84
+ "70": {"name": "大连市", "parent": "7"},
85
+ "71": {"name": "鞍山市", "parent": "7"},
86
+ "72": {"name": "抚顺市", "parent": "7"},
87
+ "73": {"name": "本溪市", "parent": "7"},
88
+ "74": {"name": "丹东市", "parent": "7"},
89
+ "75": {"name": "锦州市", "parent": "7"},
90
+ "76": {"name": "营口市", "parent": "7"},
91
+ "77": {"name": "阜新市", "parent": "7"},
92
+ "78": {"name": "辽阳市", "parent": "7"},
93
+ "79": {"name": "盘锦市", "parent": "7"},
94
+ "80": {"name": "铁岭市", "parent": "7"},
95
+ "81": {"name": "朝阳市", "parent": "7"},
96
+ "82": {"name": "葫芦岛市", "parent": "7"},
97
+ "83": {"name": "长春市", "parent": "8"},
98
+ "84": {"name": "吉林市", "parent": "8"},
99
+ "85": {"name": "四平市", "parent": "8"},
100
+ "86": {"name": "辽源市", "parent": "8"},
101
+ "87": {"name": "通化市", "parent": "8"},
102
+ "88": {"name": "白山市", "parent": "8"},
103
+ "89": {"name": "松原市", "parent": "8"},
104
+ "90": {"name": "白城市", "parent": "8"},
105
+ "91": {"name": "延边朝鲜族自治州", "parent": "8"},
106
+ "92": {"name": "哈尔滨市", "parent": "9"},
107
+ "93": {"name": "齐齐哈尔市", "parent": "9"},
108
+ "94": {"name": "鸡西市", "parent": "9"},
109
+ "95": {"name": "鹤岗市", "parent": "9"},
110
+ "96": {"name": "双鸭山市", "parent": "9"},
111
+ "97": {"name": "大庆市", "parent": "9"},
112
+ "98": {"name": "伊春市", "parent": "9"},
113
+ "99": {"name": "佳木斯市", "parent": "9"},
114
+ "100": {"name": "七台河市", "parent": "9"},
115
+ "101": {"name": "牡丹江市", "parent": "9"},
116
+ "102": {"name": "黑河市", "parent": "9"},
117
+ "103": {"name": "绥化市", "parent": "9"},
118
+ "104": {"name": "大兴安岭地区", "parent": "9"},
119
+ "105": {"name": "上海市", "parent": "10"},
120
+ "106": {"name": "南京市", "parent": "11"},
121
+ "107": {"name": "无锡市", "parent": "11"},
122
+ "108": {"name": "徐州市", "parent": "11"},
123
+ "109": {"name": "常州市", "parent": "11"},
124
+ "110": {"name": "苏州市", "parent": "11"},
125
+ "111": {"name": "南通市", "parent": "11"},
126
+ "112": {"name": "连云港市", "parent": "11"},
127
+ "113": {"name": "淮安市", "parent": "11"},
128
+ "114": {"name": "盐城市", "parent": "11"},
129
+ "115": {"name": "扬州市", "parent": "11"},
130
+ "116": {"name": "镇江市", "parent": "11"},
131
+ "117": {"name": "泰州市", "parent": "11"},
132
+ "118": {"name": "宿迁市", "parent": "11"},
133
+ "119": {"name": "杭州市", "parent": "12"},
134
+ "120": {"name": "宁波市", "parent": "12"},
135
+ "121": {"name": "温州市", "parent": "12"},
136
+ "122": {"name": "嘉兴市", "parent": "12"},
137
+ "123": {"name": "湖州市", "parent": "12"},
138
+ "124": {"name": "绍兴市", "parent": "12"},
139
+ "125": {"name": "金华市", "parent": "12"},
140
+ "126": {"name": "衢州市", "parent": "12"},
141
+ "127": {"name": "舟山市", "parent": "12"},
142
+ "128": {"name": "台州市", "parent": "12"},
143
+ "129": {"name": "丽水市", "parent": "12"},
144
+ "130": {"name": "合肥市", "parent": "13"},
145
+ "131": {"name": "芜湖市", "parent": "13"},
146
+ "132": {"name": "蚌埠市", "parent": "13"},
147
+ "133": {"name": "淮南市", "parent": "13"},
148
+ "134": {"name": "马鞍山市", "parent": "13"},
149
+ "135": {"name": "淮北市", "parent": "13"},
150
+ "136": {"name": "铜陵市", "parent": "13"},
151
+ "137": {"name": "安庆市", "parent": "13"},
152
+ "138": {"name": "黄山市", "parent": "13"},
153
+ "139": {"name": "滁州市", "parent": "13"},
154
+ "140": {"name": "阜阳市", "parent": "13"},
155
+ "141": {"name": "宿州市", "parent": "13"},
156
+ "143": {"name": "六安市", "parent": "13"},
157
+ "144": {"name": "亳州市", "parent": "13"},
158
+ "145": {"name": "池州市", "parent": "13"},
159
+ "146": {"name": "宣城市", "parent": "13"},
160
+ "147": {"name": "福州市", "parent": "14"},
161
+ "148": {"name": "厦门市", "parent": "14"},
162
+ "149": {"name": "莆田市", "parent": "14"},
163
+ "150": {"name": "三明市", "parent": "14"},
164
+ "151": {"name": "泉州市", "parent": "14"},
165
+ "152": {"name": "漳州市", "parent": "14"},
166
+ "153": {"name": "南平市", "parent": "14"},
167
+ "154": {"name": "龙岩市", "parent": "14"},
168
+ "155": {"name": "宁德市", "parent": "14"},
169
+ "156": {"name": "南昌市", "parent": "15"},
170
+ "157": {"name": "景德镇市", "parent": "15"},
171
+ "158": {"name": "萍乡市", "parent": "15"},
172
+ "159": {"name": "九江市", "parent": "15"},
173
+ "160": {"name": "新余市", "parent": "15"},
174
+ "161": {"name": "鹰潭市", "parent": "15"},
175
+ "162": {"name": "赣州市", "parent": "15"},
176
+ "163": {"name": "吉安市", "parent": "15"},
177
+ "164": {"name": "宜春市", "parent": "15"},
178
+ "165": {"name": "抚州市", "parent": "15"},
179
+ "166": {"name": "上饶市", "parent": "15"},
180
+ "167": {"name": "济南市", "parent": "16"},
181
+ "168": {"name": "青岛市", "parent": "16"},
182
+ "169": {"name": "淄博市", "parent": "16"},
183
+ "170": {"name": "枣庄市", "parent": "16"},
184
+ "171": {"name": "东营市", "parent": "16"},
185
+ "172": {"name": "烟台市", "parent": "16"},
186
+ "173": {"name": "潍坊市", "parent": "16"},
187
+ "174": {"name": "济宁市", "parent": "16"},
188
+ "175": {"name": "泰安市", "parent": "16"},
189
+ "176": {"name": "威海市", "parent": "16"},
190
+ "177": {"name": "日照市", "parent": "16"},
191
+ "179": {"name": "临沂市", "parent": "16"},
192
+ "180": {"name": "德州市", "parent": "16"},
193
+ "181": {"name": "聊城市", "parent": "16"},
194
+ "182": {"name": "滨州市", "parent": "16"},
195
+ "183": {"name": "菏泽市", "parent": "16"},
196
+ "184": {"name": "郑州市", "parent": "17"},
197
+ "185": {"name": "开封市", "parent": "17"},
198
+ "186": {"name": "洛阳市", "parent": "17"},
199
+ "187": {"name": "平顶山市", "parent": "17"},
200
+ "188": {"name": "安阳市", "parent": "17"},
201
+ "189": {"name": "鹤壁市", "parent": "17"},
202
+ "190": {"name": "新乡市", "parent": "17"},
203
+ "191": {"name": "焦作市", "parent": "17"},
204
+ "192": {"name": "濮阳市", "parent": "17"},
205
+ "193": {"name": "许昌市", "parent": "17"},
206
+ "194": {"name": "漯河市", "parent": "17"},
207
+ "195": {"name": "三门峡市", "parent": "17"},
208
+ "196": {"name": "南阳市", "parent": "17"},
209
+ "197": {"name": "商丘市", "parent": "17"},
210
+ "198": {"name": "信阳市", "parent": "17"},
211
+ "199": {"name": "周口市", "parent": "17"},
212
+ "200": {"name": "驻马店市", "parent": "17"},
213
+ "201": {"name": "武汉市", "parent": "18"},
214
+ "202": {"name": "黄石市", "parent": "18"},
215
+ "203": {"name": "十堰市", "parent": "18"},
216
+ "204": {"name": "宜昌市", "parent": "18"},
217
+ "205": {"name": "襄阳市", "parent": "18"},
218
+ "206": {"name": "鄂州市", "parent": "18"},
219
+ "207": {"name": "荆门市", "parent": "18"},
220
+ "208": {"name": "孝感市", "parent": "18"},
221
+ "209": {"name": "荆州市", "parent": "18"},
222
+ "210": {"name": "黄冈市", "parent": "18"},
223
+ "211": {"name": "咸宁市", "parent": "18"},
224
+ "212": {"name": "随州市", "parent": "18"},
225
+ "213": {"name": "恩施土家族苗族自治州", "parent": "18"},
226
+ "215": {"name": "长沙市", "parent": "19"},
227
+ "216": {"name": "株洲市", "parent": "19"},
228
+ "217": {"name": "湘潭市", "parent": "19"},
229
+ "218": {"name": "衡阳市", "parent": "19"},
230
+ "219": {"name": "邵阳市", "parent": "19"},
231
+ "220": {"name": "岳阳市", "parent": "19"},
232
+ "221": {"name": "常德市", "parent": "19"},
233
+ "222": {"name": "张家界市", "parent": "19"},
234
+ "223": {"name": "益阳市", "parent": "19"},
235
+ "224": {"name": "郴州市", "parent": "19"},
236
+ "225": {"name": "永州市", "parent": "19"},
237
+ "226": {"name": "怀化市", "parent": "19"},
238
+ "227": {"name": "娄底市", "parent": "19"},
239
+ "228": {"name": "湘西土家族苗族自治州", "parent": "19"},
240
+ "229": {"name": "广州市", "parent": "20"},
241
+ "230": {"name": "韶关市", "parent": "20"},
242
+ "231": {"name": "深圳市", "parent": "20"},
243
+ "232": {"name": "珠海市", "parent": "20"},
244
+ "233": {"name": "汕头市", "parent": "20"},
245
+ "234": {"name": "佛山市", "parent": "20"},
246
+ "235": {"name": "江门市", "parent": "20"},
247
+ "236": {"name": "湛江市", "parent": "20"},
248
+ "237": {"name": "茂名市", "parent": "20"},
249
+ "238": {"name": "肇庆市", "parent": "20"},
250
+ "239": {"name": "惠州市", "parent": "20"},
251
+ "240": {"name": "梅州市", "parent": "20"},
252
+ "241": {"name": "汕尾市", "parent": "20"},
253
+ "242": {"name": "河源市", "parent": "20"},
254
+ "243": {"name": "阳江市", "parent": "20"},
255
+ "244": {"name": "清远市", "parent": "20"},
256
+ "245": {"name": "东莞市", "parent": "20"},
257
+ "246": {"name": "中山市", "parent": "20"},
258
+ "247": {"name": "潮州市", "parent": "20"},
259
+ "248": {"name": "揭阳市", "parent": "20"},
260
+ "249": {"name": "云浮市", "parent": "20"},
261
+ "250": {"name": "南宁市", "parent": "21"},
262
+ "251": {"name": "柳州市", "parent": "21"},
263
+ "252": {"name": "桂林市", "parent": "21"},
264
+ "253": {"name": "梧州市", "parent": "21"},
265
+ "254": {"name": "北海市", "parent": "21"},
266
+ "255": {"name": "防城港市", "parent": "21"},
267
+ "256": {"name": "钦州市", "parent": "21"},
268
+ "257": {"name": "贵港市", "parent": "21"},
269
+ "258": {"name": "玉林市", "parent": "21"},
270
+ "259": {"name": "百色市", "parent": "21"},
271
+ "260": {"name": "贺州市", "parent": "21"},
272
+ "261": {"name": "河池市", "parent": "21"},
273
+ "262": {"name": "来宾市", "parent": "21"},
274
+ "263": {"name": "崇左市", "parent": "21"},
275
+ "264": {"name": "海口市", "parent": "22"},
276
+ "265": {"name": "三亚市", "parent": "22"},
277
+ "267": {"name": "重庆市", "parent": "23"},
278
+ "268": {"name": "成都市", "parent": "24"},
279
+ "269": {"name": "自贡市", "parent": "24"},
280
+ "270": {"name": "攀枝花市", "parent": "24"},
281
+ "271": {"name": "泸州市", "parent": "24"},
282
+ "272": {"name": "德阳市", "parent": "24"},
283
+ "273": {"name": "绵阳市", "parent": "24"},
284
+ "274": {"name": "广元市", "parent": "24"},
285
+ "275": {"name": "遂宁市", "parent": "24"},
286
+ "276": {"name": "内江市", "parent": "24"},
287
+ "277": {"name": "乐山市", "parent": "24"},
288
+ "278": {"name": "南充市", "parent": "24"},
289
+ "279": {"name": "眉山市", "parent": "24"},
290
+ "280": {"name": "宜宾市", "parent": "24"},
291
+ "281": {"name": "广安市", "parent": "24"},
292
+ "282": {"name": "达州市", "parent": "24"},
293
+ "283": {"name": "雅安市", "parent": "24"},
294
+ "284": {"name": "巴中市", "parent": "24"},
295
+ "285": {"name": "资阳市", "parent": "24"},
296
+ "286": {"name": "阿坝藏族羌族自治州", "parent": "24"},
297
+ "287": {"name": "甘孜藏族自治州", "parent": "24"},
298
+ "288": {"name": "凉山彝族自治州", "parent": "24"},
299
+ "289": {"name": "贵阳市", "parent": "25"},
300
+ "290": {"name": "六盘水市", "parent": "25"},
301
+ "291": {"name": "遵义市", "parent": "25"},
302
+ "292": {"name": "安顺市", "parent": "25"},
303
+ "293": {"name": "铜仁市", "parent": "25"},
304
+ "294": {"name": "黔西南布依族苗族自治州", "parent": "25"},
305
+ "295": {"name": "毕节市", "parent": "25"},
306
+ "296": {"name": "黔东南苗族侗族自治州", "parent": "25"},
307
+ "297": {"name": "黔南布依族苗族自治州", "parent": "25"},
308
+ "298": {"name": "昆明市", "parent": "26"},
309
+ "299": {"name": "曲靖市", "parent": "26"},
310
+ "300": {"name": "玉溪市", "parent": "26"},
311
+ "301": {"name": "保山市", "parent": "26"},
312
+ "302": {"name": "昭通市", "parent": "26"},
313
+ "303": {"name": "丽江市", "parent": "26"},
314
+ "304": {"name": "普洱市", "parent": "26"},
315
+ "305": {"name": "临沧市", "parent": "26"},
316
+ "306": {"name": "楚雄彝族自治州", "parent": "26"},
317
+ "307": {"name": "红河哈尼族彝族自治州", "parent": "26"},
318
+ "308": {"name": "文山壮族苗族自治州", "parent": "26"},
319
+ "309": {"name": "西双版纳傣族自治州", "parent": "26"},
320
+ "310": {"name": "大理白族自治州", "parent": "26"},
321
+ "311": {"name": "德宏傣族景颇族自治州", "parent": "26"},
322
+ "312": {"name": "怒江傈僳族自治州", "parent": "26"},
323
+ "313": {"name": "迪庆藏族自治州", "parent": "26"},
324
+ "314": {"name": "拉萨市", "parent": "27"},
325
+ "315": {"name": "昌都市", "parent": "27"},
326
+ "316": {"name": "山南市", "parent": "27"},
327
+ "317": {"name": "日喀则市", "parent": "27"},
328
+ "318": {"name": "那曲市", "parent": "27"},
329
+ "319": {"name": "阿里地区", "parent": "27"},
330
+ "320": {"name": "林芝市", "parent": "27"},
331
+ "321": {"name": "西安市", "parent": "28"},
332
+ "322": {"name": "铜川市", "parent": "28"},
333
+ "323": {"name": "宝鸡市", "parent": "28"},
334
+ "324": {"name": "咸阳市", "parent": "28"},
335
+ "325": {"name": "渭南市", "parent": "28"},
336
+ "326": {"name": "延安市", "parent": "28"},
337
+ "327": {"name": "汉中市", "parent": "28"},
338
+ "328": {"name": "榆林市", "parent": "28"},
339
+ "329": {"name": "安康市", "parent": "28"},
340
+ "330": {"name": "商洛市", "parent": "28"},
341
+ "331": {"name": "兰州市", "parent": "29"},
342
+ "332": {"name": "嘉峪关市", "parent": "29"},
343
+ "333": {"name": "金昌市", "parent": "29"},
344
+ "334": {"name": "白银市", "parent": "29"},
345
+ "335": {"name": "天水市", "parent": "29"},
346
+ "336": {"name": "武威市", "parent": "29"},
347
+ "337": {"name": "张掖市", "parent": "29"},
348
+ "338": {"name": "平凉市", "parent": "29"},
349
+ "339": {"name": "酒泉市", "parent": "29"},
350
+ "340": {"name": "庆阳市", "parent": "29"},
351
+ "341": {"name": "定西市", "parent": "29"},
352
+ "342": {"name": "陇南市", "parent": "29"},
353
+ "343": {"name": "临夏回族自治州", "parent": "29"},
354
+ "344": {"name": "甘南藏族自治州", "parent": "29"},
355
+ "345": {"name": "西宁市", "parent": "30"},
356
+ "346": {"name": "海东市", "parent": "30"},
357
+ "347": {"name": "海北藏族自治州", "parent": "30"},
358
+ "348": {"name": "黄南藏族自治州", "parent": "30"},
359
+ "349": {"name": "海南藏族自治州", "parent": "30"},
360
+ "350": {"name": "果洛藏族自治州", "parent": "30"},
361
+ "351": {"name": "玉树藏族自治州", "parent": "30"},
362
+ "352": {"name": "海西蒙古族藏族自治州", "parent": "30"},
363
+ "353": {"name": "银川市", "parent": "31"},
364
+ "354": {"name": "石嘴山市", "parent": "31"},
365
+ "355": {"name": "吴忠市", "parent": "31"},
366
+ "356": {"name": "固原市", "parent": "31"},
367
+ "357": {"name": "中卫市", "parent": "31"},
368
+ "358": {"name": "乌鲁木齐市", "parent": "32"},
369
+ "359": {"name": "克拉玛依市", "parent": "32"},
370
+ "360": {"name": "吐鲁番市", "parent": "32"},
371
+ "361": {"name": "哈密市", "parent": "32"},
372
+ "362": {"name": "昌吉回族自治州", "parent": "32"},
373
+ "363": {"name": "博尔塔拉蒙古自治州", "parent": "32"},
374
+ "364": {"name": "巴音郭楞蒙古自治州", "parent": "32"},
375
+ "365": {"name": "阿克苏地区", "parent": "32"},
376
+ "366": {"name": "克孜勒苏柯尔克孜自治州", "parent": "32"},
377
+ "367": {"name": "喀什地区", "parent": "32"},
378
+ "368": {"name": "和田地区", "parent": "32"},
379
+ "369": {"name": "伊犁哈萨克自治州", "parent": "32"},
380
+ "370": {"name": "塔城地区", "parent": "32"},
381
+ "371": {"name": "阿勒泰地区", "parent": "32"},
382
+ "372": {"name": "新疆省直辖行政单位", "parent": "32"},
383
+ "373": {"name": "可克达拉市", "parent": "32"},
384
+ "374": {"name": "昆玉市", "parent": "32"},
385
+ "375": {"name": "胡杨河市", "parent": "32"},
386
+ "376": {"name": "双河市", "parent": "32"},
387
+ "3560": {"name": "北票市", "parent": "7"},
388
+ "3615": {"name": "高州市", "parent": "20"},
389
+ "3651": {"name": "济源市", "parent": "17"},
390
+ "3662": {"name": "胶南市", "parent": "16"},
391
+ "3683": {"name": "老河口市", "parent": "18"},
392
+ "3758": {"name": "沙河市", "parent": "4"},
393
+ "3822": {"name": "宜城市", "parent": "18"},
394
+ "3842": {"name": "枣阳市", "parent": "18"},
395
+ "3850": {"name": "肇东市", "parent": "9"},
396
+ "3905": {"name": "澳门", "parent": "1"},
397
+ "3906": {"name": "澳门", "parent": "3905"},
398
+ "3907": {"name": "香港", "parent": "1"},
399
+ "3908": {"name": "香港", "parent": "3907"},
400
+ "3947": {"name": "仙桃市", "parent": "18"},
401
+ "3954": {"name": "台湾", "parent": "1"},
402
+ "3955": {"name": "台湾", "parent": "3954"},
403
+ "3956": {"name": "海外", "parent": "1"},
404
+ "3957": {"name": "海外", "parent": "3956"},
405
+ "3958": {"name": "美国", "parent": "3956"},
406
+ "3959": {"name": "加拿大", "parent": "3956"},
407
+ "3961": {"name": "日本", "parent": "3956"},
408
+ "3962": {"name": "韩国", "parent": "3956"},
409
+ "3963": {"name": "德国", "parent": "3956"},
410
+ "3964": {"name": "英国", "parent": "3956"},
411
+ "3965": {"name": "意大利", "parent": "3956"},
412
+ "3966": {"name": "西班牙", "parent": "3956"},
413
+ "3967": {"name": "法国", "parent": "3956"},
414
+ "3968": {"name": "澳大利亚", "parent": "3956"},
415
+ "3969": {"name": "东城区", "parent": "2"},
416
+ "3970": {"name": "西城区", "parent": "2"},
417
+ "3971": {"name": "崇文区", "parent": "2"},
418
+ "3972": {"name": "宣武区", "parent": "2"},
419
+ "3973": {"name": "朝阳区", "parent": "2"},
420
+ "3974": {"name": "海淀区", "parent": "2"},
421
+ "3975": {"name": "丰台区", "parent": "2"},
422
+ "3976": {"name": "石景山区", "parent": "2"},
423
+ "3977": {"name": "门头沟区", "parent": "2"},
424
+ "3978": {"name": "房山区", "parent": "2"},
425
+ "3979": {"name": "通州区", "parent": "2"},
426
+ "3980": {"name": "顺义区", "parent": "2"},
427
+ "3981": {"name": "昌平区", "parent": "2"},
428
+ "3982": {"name": "大兴区", "parent": "2"},
429
+ "3983": {"name": "平谷区", "parent": "2"},
430
+ "3984": {"name": "怀柔区", "parent": "2"},
431
+ "3985": {"name": "密云区", "parent": "2"},
432
+ "3986": {"name": "延庆区", "parent": "2"},
433
+ "3987": {"name": "黄浦区", "parent": "10"},
434
+ "3988": {"name": "徐汇区", "parent": "10"},
435
+ "3989": {"name": "长宁区", "parent": "10"},
436
+ "3990": {"name": "静安区", "parent": "10"},
437
+ "3991": {"name": "普陀区", "parent": "10"},
438
+ "3992": {"name": "闸北区", "parent": "10"},
439
+ "3993": {"name": "虹口区", "parent": "10"},
440
+ "3994": {"name": "杨浦区", "parent": "10"},
441
+ "3995": {"name": "宝山区", "parent": "10"},
442
+ "3996": {"name": "闵行区", "parent": "10"},
443
+ "3997": {"name": "嘉定区", "parent": "10"},
444
+ "3998": {"name": "浦东新区", "parent": "10"},
445
+ "3999": {"name": "松江区", "parent": "10"},
446
+ "4000": {"name": "金山区", "parent": "10"},
447
+ "4001": {"name": "青浦区", "parent": "10"},
448
+ "4002": {"name": "奉贤区", "parent": "10"},
449
+ "4003": {"name": "崇明区", "parent": "10"},
450
+ "4004": {"name": "和平区", "parent": "3"},
451
+ "4005": {"name": "河东区", "parent": "3"},
452
+ "4006": {"name": "河西区", "parent": "3"},
453
+ "4007": {"name": "南开区", "parent": "3"},
454
+ "4008": {"name": "红桥区", "parent": "3"},
455
+ "4009": {"name": "河北区", "parent": "3"},
456
+ "4010": {"name": "滨海新区", "parent": "3"},
457
+ "4011": {"name": "东丽区", "parent": "3"},
458
+ "4012": {"name": "西青区", "parent": "3"},
459
+ "4013": {"name": "北辰区", "parent": "3"},
460
+ "4014": {"name": "津南区", "parent": "3"},
461
+ "4015": {"name": "武清区", "parent": "3"},
462
+ "4016": {"name": "宝坻区", "parent": "3"},
463
+ "4017": {"name": "静海区", "parent": "3"},
464
+ "4018": {"name": "宁河区", "parent": "3"},
465
+ "4019": {"name": "蓟州区", "parent": "3"},
466
+ "4020": {"name": "渝中区", "parent": "23"},
467
+ "4021": {"name": "江北区", "parent": "23"},
468
+ "4022": {"name": "南岸区", "parent": "23"},
469
+ "4023": {"name": "沙坪坝区", "parent": "23"},
470
+ "4024": {"name": "九龙坡区", "parent": "23"},
471
+ "4025": {"name": "大渡口区", "parent": "23"},
472
+ "4026": {"name": "渝北区", "parent": "23"},
473
+ "4027": {"name": "巴南区", "parent": "23"},
474
+ "4028": {"name": "北碚区", "parent": "23"},
475
+ "4029": {"name": "万州区", "parent": "23"},
476
+ "4030": {"name": "黔江区", "parent": "23"},
477
+ "4031": {"name": "永川区", "parent": "23"},
478
+ "4032": {"name": "涪陵区", "parent": "23"},
479
+ "4033": {"name": "江津区", "parent": "23"},
480
+ "4034": {"name": "合川区", "parent": "23"},
481
+ "4035": {"name": "双桥区", "parent": "23"},
482
+ "4036": {"name": "万盛区", "parent": "23"},
483
+ "4037": {"name": "荣昌区", "parent": "23"},
484
+ "4038": {"name": "大足区", "parent": "23"},
485
+ "4039": {"name": "璧山区", "parent": "23"},
486
+ "4040": {"name": "铜梁区", "parent": "23"},
487
+ "4041": {"name": "潼南区", "parent": "23"},
488
+ "4042": {"name": "綦江区", "parent": "23"},
489
+ "4043": {"name": "忠县", "parent": "23"},
490
+ "4044": {"name": "开州区", "parent": "23"},
491
+ "4045": {"name": "云阳县", "parent": "23"},
492
+ "4046": {"name": "梁平区", "parent": "23"},
493
+ "4047": {"name": "垫江县", "parent": "23"},
494
+ "4048": {"name": "丰都县", "parent": "23"},
495
+ "4049": {"name": "奉节县", "parent": "23"},
496
+ "4050": {"name": "巫山县", "parent": "23"},
497
+ "4051": {"name": "巫溪县", "parent": "23"},
498
+ "4052": {"name": "城口县", "parent": "23"},
499
+ "4053": {"name": "武隆区", "parent": "23"},
500
+ "4054": {"name": "石柱土家族自治县", "parent": "23"},
501
+ "4055": {"name": "秀山土家族苗族自治县", "parent": "23"},
502
+ "4056": {"name": "酉阳土家族苗族自治县", "parent": "23"},
503
+ "4057": {"name": "彭水苗族土家族自治县", "parent": "23"},
504
+ "4058": {"name": "潜江市", "parent": "18"},
505
+ "4059": {"name": "三沙市", "parent": "22"},
506
+ "4060": {"name": "石河子市", "parent": "32"},
507
+ "4061": {"name": "阿拉尔市", "parent": "32"},
508
+ "4062": {"name": "图木舒克市", "parent": "32"},
509
+ "4063": {"name": "五家渠市", "parent": "32"},
510
+ "4064": {"name": "北屯市", "parent": "32"},
511
+ "4065": {"name": "铁门关市", "parent": "32"},
512
+ "4066": {"name": "儋州市", "parent": "22"},
513
+ "4067": {"name": "五指山市", "parent": "22"},
514
+ "4068": {"name": "文昌市", "parent": "22"},
515
+ "4069": {"name": "琼海市", "parent": "22"},
516
+ "4070": {"name": "万宁市", "parent": "22"},
517
+ "4072": {"name": "定安县", "parent": "22"},
518
+ "4073": {"name": "屯昌县", "parent": "22"},
519
+ "4074": {"name": "澄迈县", "parent": "22"},
520
+ "4075": {"name": "临高县", "parent": "22"},
521
+ "4076": {"name": "琼中黎族苗族自治县", "parent": "22"},
522
+ "4077": {"name": "保亭黎族苗族自治县", "parent": "22"},
523
+ "4078": {"name": "白沙黎族自治县", "parent": "22"},
524
+ "4079": {"name": "昌江黎族自治县", "parent": "22"},
525
+ "4080": {"name": "乐东黎族自治县", "parent": "22"},
526
+ "4081": {"name": "陵水黎族自治县", "parent": "22"},
527
+ "4082": {"name": "马来西亚", "parent": "3956"},
528
+ "6047": {"name": "长寿区", "parent": "23"},
529
+ "6857": {"name": "阿富汗", "parent": "3956"},
530
+ "6858": {"name": "阿尔巴尼亚", "parent": "3956"},
531
+ "6859": {"name": "阿尔及利亚", "parent": "3956"},
532
+ "6860": {"name": "美属萨摩亚", "parent": "3956"},
533
+ "6861": {"name": "安道尔", "parent": "3956"},
534
+ "6862": {"name": "安哥拉", "parent": "3956"},
535
+ "6863": {"name": "安圭拉", "parent": "3956"},
536
+ "6864": {"name": "南极洲", "parent": "3956"},
537
+ "6865": {"name": "安提瓜和巴布达", "parent": "3956"},
538
+ "6866": {"name": "阿根廷", "parent": "3956"},
539
+ "6867": {"name": "亚美尼亚", "parent": "3956"},
540
+ "6869": {"name": "奥地利", "parent": "3956"},
541
+ "6870": {"name": "阿塞拜疆", "parent": "3956"},
542
+ "6871": {"name": "巴哈马", "parent": "3956"},
543
+ "6872": {"name": "巴林", "parent": "3956"},
544
+ "6873": {"name": "孟加拉国", "parent": "3956"},
545
+ "6874": {"name": "巴巴多斯", "parent": "3956"},
546
+ "6875": {"name": "白俄罗斯", "parent": "3956"},
547
+ "6876": {"name": "比利时", "parent": "3956"},
548
+ "6877": {"name": "伯利兹", "parent": "3956"},
549
+ "6878": {"name": "贝宁", "parent": "3956"},
550
+ "6879": {"name": "百慕大", "parent": "3956"},
551
+ "6880": {"name": "不丹", "parent": "3956"},
552
+ "6881": {"name": "玻利维亚", "parent": "3956"},
553
+ "6882": {"name": "波黑", "parent": "3956"},
554
+ "6883": {"name": "博茨瓦纳", "parent": "3956"},
555
+ "6884": {"name": "布维岛", "parent": "3956"},
556
+ "6885": {"name": "巴西", "parent": "3956"},
557
+ "6886": {"name": "英属印度洋领土", "parent": "3956"},
558
+ "6887": {"name": "文莱", "parent": "3956"},
559
+ "6888": {"name": "保加利亚", "parent": "3956"},
560
+ "6889": {"name": "布基纳法索", "parent": "3956"},
561
+ "6890": {"name": "布隆迪", "parent": "3956"},
562
+ "6891": {"name": "柬埔寨", "parent": "3956"},
563
+ "6892": {"name": "喀麦隆", "parent": "3956"},
564
+ "6893": {"name": "佛得角", "parent": "3956"},
565
+ "6894": {"name": "开曼群岛", "parent": "3956"},
566
+ "6895": {"name": "中非", "parent": "3956"},
567
+ "6896": {"name": "乍得", "parent": "3956"},
568
+ "6897": {"name": "智利", "parent": "3956"},
569
+ "6898": {"name": "圣诞岛", "parent": "3956"},
570
+ "6899": {"name": "科科斯(基林)群岛", "parent": "3956"},
571
+ "6900": {"name": "哥伦比亚", "parent": "3956"},
572
+ "6901": {"name": "科摩罗", "parent": "3956"},
573
+ "6902": {"name": "刚果(布)", "parent": "3956"},
574
+ "6903": {"name": "刚果(金)", "parent": "3956"},
575
+ "6904": {"name": "库克群岛", "parent": "3956"},
576
+ "6905": {"name": "哥斯达黎加", "parent": "3956"},
577
+ "6906": {"name": "科特迪瓦", "parent": "3956"},
578
+ "6907": {"name": "克罗地亚", "parent": "3956"},
579
+ "6908": {"name": "古巴", "parent": "3956"},
580
+ "6909": {"name": "塞浦路斯", "parent": "3956"},
581
+ "6910": {"name": "捷克", "parent": "3956"},
582
+ "6911": {"name": "丹麦", "parent": "3956"},
583
+ "6912": {"name": "吉布提", "parent": "3956"},
584
+ "6913": {"name": "多米尼克", "parent": "3956"},
585
+ "6914": {"name": "多米尼加共和国", "parent": "3956"},
586
+ "6915": {"name": "东帝汶", "parent": "3956"},
587
+ "6916": {"name": "厄瓜多尔", "parent": "3956"},
588
+ "6917": {"name": "埃及", "parent": "3956"},
589
+ "6918": {"name": "萨尔瓦多", "parent": "3956"},
590
+ "6919": {"name": "赤道几内亚", "parent": "3956"},
591
+ "6920": {"name": "厄立特里亚", "parent": "3956"},
592
+ "6921": {"name": "爱沙尼亚", "parent": "3956"},
593
+ "6922": {"name": "埃塞俄比亚", "parent": "3956"},
594
+ "6923": {"name": "福克兰群岛(马尔维纳斯)", "parent": "3956"},
595
+ "6924": {"name": "法罗群岛", "parent": "3956"},
596
+ "6925": {"name": "斐济", "parent": "3956"},
597
+ "6926": {"name": "芬兰", "parent": "3956"},
598
+ "6927": {"name": "法属圭亚那", "parent": "3956"},
599
+ "6928": {"name": "法属波利尼西亚", "parent": "3956"},
600
+ "6929": {"name": "法属南部领土", "parent": "3956"},
601
+ "6930": {"name": "加蓬", "parent": "3956"},
602
+ "6931": {"name": "冈比亚", "parent": "3956"},
603
+ "6932": {"name": "格鲁吉亚", "parent": "3956"},
604
+ "6933": {"name": "加纳", "parent": "3956"},
605
+ "6934": {"name": "直布罗陀", "parent": "3956"},
606
+ "6935": {"name": "希腊", "parent": "3956"},
607
+ "6936": {"name": "格陵兰", "parent": "3956"},
608
+ "6937": {"name": "格林纳达", "parent": "3956"},
609
+ "6938": {"name": "瓜德罗普", "parent": "3956"},
610
+ "6939": {"name": "关岛", "parent": "3956"},
611
+ "6940": {"name": "危地马拉", "parent": "3956"},
612
+ "6941": {"name": "几内亚", "parent": "3956"},
613
+ "6942": {"name": "几内亚比绍", "parent": "3956"},
614
+ "6943": {"name": "圭亚那", "parent": "3956"},
615
+ "6944": {"name": "海地", "parent": "3956"},
616
+ "6945": {"name": "赫德岛和麦克唐纳岛", "parent": "3956"},
617
+ "6946": {"name": "洪都拉斯", "parent": "3956"},
618
+ "6947": {"name": "匈牙利", "parent": "3956"},
619
+ "6948": {"name": "冰岛", "parent": "3956"},
620
+ "6949": {"name": "印度", "parent": "3956"},
621
+ "6950": {"name": "印度尼西亚", "parent": "3956"},
622
+ "6951": {"name": "伊朗", "parent": "3956"},
623
+ "6952": {"name": "伊拉克", "parent": "3956"},
624
+ "6953": {"name": "爱尔兰", "parent": "3956"},
625
+ "6954": {"name": "以色列", "parent": "3956"},
626
+ "6955": {"name": "牙买加", "parent": "3956"},
627
+ "6956": {"name": "约旦", "parent": "3956"},
628
+ "6957": {"name": "哈萨克斯坦", "parent": "3956"},
629
+ "6958": {"name": "肯尼亚", "parent": "3956"},
630
+ "6959": {"name": "基里巴斯", "parent": "3956"},
631
+ "6960": {"name": "朝鲜", "parent": "3956"},
632
+ "6961": {"name": "科威特", "parent": "3956"},
633
+ "6962": {"name": "吉尔吉斯斯坦", "parent": "3956"},
634
+ "6963": {"name": "老挝", "parent": "3956"},
635
+ "6964": {"name": "拉脱维亚", "parent": "3956"},
636
+ "6965": {"name": "黎巴嫩", "parent": "3956"},
637
+ "6966": {"name": "莱索托", "parent": "3956"},
638
+ "6967": {"name": "利比里亚", "parent": "3956"},
639
+ "6968": {"name": "利比亚", "parent": "3956"},
640
+ "6969": {"name": "列支敦士登", "parent": "3956"},
641
+ "6970": {"name": "立陶宛", "parent": "3956"},
642
+ "6971": {"name": "卢森堡", "parent": "3956"},
643
+ "6972": {"name": "前南马其顿", "parent": "3956"},
644
+ "6973": {"name": "马达加斯加", "parent": "3956"},
645
+ "6974": {"name": "马拉维", "parent": "3956"},
646
+ "6975": {"name": "马尔代夫", "parent": "3956"},
647
+ "6976": {"name": "马里", "parent": "3956"},
648
+ "6977": {"name": "马耳他", "parent": "3956"},
649
+ "6978": {"name": "马绍尔群岛", "parent": "3956"},
650
+ "6979": {"name": "马提尼克", "parent": "3956"},
651
+ "6980": {"name": "毛里塔尼亚", "parent": "3956"},
652
+ "6981": {"name": "毛里求斯", "parent": "3956"},
653
+ "6982": {"name": "马约特", "parent": "3956"},
654
+ "6983": {"name": "墨西哥", "parent": "3956"},
655
+ "6984": {"name": "密克罗尼西亚联邦", "parent": "3956"},
656
+ "6985": {"name": "摩尔多瓦", "parent": "3956"},
657
+ "6986": {"name": "摩纳哥", "parent": "3956"},
658
+ "6987": {"name": "蒙古", "parent": "3956"},
659
+ "6988": {"name": "蒙特塞拉特", "parent": "3956"},
660
+ "6989": {"name": "摩洛哥", "parent": "3956"},
661
+ "6990": {"name": "莫桑比克", "parent": "3956"},
662
+ "6991": {"name": "缅甸", "parent": "3956"},
663
+ "6992": {"name": "纳米比亚", "parent": "3956"},
664
+ "6993": {"name": "瑙鲁", "parent": "3956"},
665
+ "6994": {"name": "尼泊尔", "parent": "3956"},
666
+ "6995": {"name": "荷兰", "parent": "3956"},
667
+ "6996": {"name": "荷属安的列斯", "parent": "3956"},
668
+ "6997": {"name": "新喀里多尼亚", "parent": "3956"},
669
+ "6998": {"name": "新西兰", "parent": "3956"},
670
+ "6999": {"name": "尼加拉瓜", "parent": "3956"},
671
+ "7000": {"name": "尼日尔", "parent": "3956"},
672
+ "7001": {"name": "尼日利亚", "parent": "3956"},
673
+ "7002": {"name": "纽埃", "parent": "3956"},
674
+ "7003": {"name": "诺福克岛", "parent": "3956"},
675
+ "7004": {"name": "北马里亚纳", "parent": "3956"},
676
+ "7005": {"name": "挪威", "parent": "3956"},
677
+ "7006": {"name": "阿曼", "parent": "3956"},
678
+ "7007": {"name": "巴基斯坦", "parent": "3956"},
679
+ "7008": {"name": "帕劳", "parent": "3956"},
680
+ "7009": {"name": "巴勒斯坦", "parent": "3956"},
681
+ "7010": {"name": "巴拿马", "parent": "3956"},
682
+ "7011": {"name": "巴布亚新几内亚", "parent": "3956"},
683
+ "7012": {"name": "巴拉圭", "parent": "3956"},
684
+ "7013": {"name": "秘鲁", "parent": "3956"},
685
+ "7014": {"name": "菲律宾", "parent": "3956"},
686
+ "7015": {"name": "皮特凯恩群岛", "parent": "3956"},
687
+ "7016": {"name": "波兰", "parent": "3956"},
688
+ "7017": {"name": "葡萄牙", "parent": "3956"},
689
+ "7018": {"name": "波多黎各", "parent": "3956"},
690
+ "7019": {"name": "卡塔尔", "parent": "3956"},
691
+ "7020": {"name": "留尼汪", "parent": "3956"},
692
+ "7021": {"name": "罗马尼亚", "parent": "3956"},
693
+ "7022": {"name": "俄罗斯联邦", "parent": "3956"},
694
+ "7023": {"name": "卢旺达", "parent": "3956"},
695
+ "7024": {"name": "圣赫勒拿", "parent": "3956"},
696
+ "7025": {"name": "圣基茨和尼维斯", "parent": "3956"},
697
+ "7026": {"name": "圣卢西亚", "parent": "3956"},
698
+ "7027": {"name": "圣皮埃尔和密克隆", "parent": "3956"},
699
+ "7028": {"name": "圣文森特和格林纳丁斯", "parent": "3956"},
700
+ "7029": {"name": "萨摩亚", "parent": "3956"},
701
+ "7030": {"name": "圣马力诺", "parent": "3956"},
702
+ "7031": {"name": "圣多美和普林西比", "parent": "3956"},
703
+ "7032": {"name": "沙特阿拉伯", "parent": "3956"},
704
+ "7033": {"name": "塞内加尔", "parent": "3956"},
705
+ "7034": {"name": "塞舌尔", "parent": "3956"},
706
+ "7035": {"name": "塞拉利昂", "parent": "3956"},
707
+ "7036": {"name": "新加坡", "parent": "3956"},
708
+ "7037": {"name": "斯洛伐克", "parent": "3956"},
709
+ "7038": {"name": "斯洛文尼亚", "parent": "3956"},
710
+ "7039": {"name": "所罗门群岛", "parent": "3956"},
711
+ "7040": {"name": "索马里", "parent": "3956"},
712
+ "7041": {"name": "南非", "parent": "3956"},
713
+ "7042": {"name": "南乔治亚岛和南桑德韦奇岛", "parent": "3956"},
714
+ "7043": {"name": "斯里兰卡", "parent": "3956"},
715
+ "7044": {"name": "苏丹", "parent": "3956"},
716
+ "7045": {"name": "苏里南", "parent": "3956"},
717
+ "7046": {"name": "斯瓦尔巴群岛", "parent": "3956"},
718
+ "7047": {"name": "斯威士兰", "parent": "3956"},
719
+ "7048": {"name": "瑞典", "parent": "3956"},
720
+ "7049": {"name": "瑞士", "parent": "3956"},
721
+ "7050": {"name": "叙利亚", "parent": "3956"},
722
+ "7051": {"name": "塔吉克斯坦", "parent": "3956"},
723
+ "7052": {"name": "坦桑尼亚", "parent": "3956"},
724
+ "7053": {"name": "泰国", "parent": "3956"},
725
+ "7054": {"name": "多哥", "parent": "3956"},
726
+ "7055": {"name": "托克劳", "parent": "3956"},
727
+ "7056": {"name": "汤加", "parent": "3956"},
728
+ "7057": {"name": "特立尼达和多巴哥", "parent": "3956"},
729
+ "7058": {"name": "突尼斯", "parent": "3956"},
730
+ "7059": {"name": "土耳其", "parent": "3956"},
731
+ "7060": {"name": "土库曼斯坦", "parent": "3956"},
732
+ "7061": {"name": "特克斯科斯群岛", "parent": "3956"},
733
+ "7062": {"name": "图瓦卢", "parent": "3956"},
734
+ "7063": {"name": "乌干达", "parent": "3956"},
735
+ "7064": {"name": "乌克兰", "parent": "3956"},
736
+ "7065": {"name": "阿联酋", "parent": "3956"},
737
+ "7066": {"name": "美国本土外小岛屿", "parent": "3956"},
738
+ "7067": {"name": "乌拉圭", "parent": "3956"},
739
+ "7068": {"name": "乌兹别克斯坦", "parent": "3956"},
740
+ "7069": {"name": "瓦努阿图", "parent": "3956"},
741
+ "7070": {"name": "梵蒂冈", "parent": "3956"},
742
+ "7071": {"name": "委内瑞拉", "parent": "3956"},
743
+ "7072": {"name": "越南", "parent": "3956"},
744
+ "7073": {"name": "英属维尔京群岛", "parent": "3956"},
745
+ "7074": {"name": "美属维尔京群岛", "parent": "3956"},
746
+ "7075": {"name": "瓦利斯和富图纳", "parent": "3956"},
747
+ "7076": {"name": "西撒哈拉", "parent": "3956"},
748
+ "7077": {"name": "也门", "parent": "3956"},
749
+ "7078": {"name": "南斯拉夫", "parent": "3956"},
750
+ "7079": {"name": "赞比亚", "parent": "3956"},
751
+ "7080": {"name": "津巴布韦", "parent": "3956"},
752
+ "7081": {"name": "塞尔维亚", "parent": "3956"},
753
+ "7082": {"name": "雄安新区", "parent": "4"},
754
+ "7084": {"name": "天门市", "parent": "18"},
755
  }
756
 
757
+ NM_SET = set([v["name"] for _, v in TBL.items()])
758
+
759
 
760
  def get_names(id):
761
+ if not id or str(id).lower() == "none":
762
+ return []
763
  id = str(id)
764
+ if not re.match("[0-9]+$", id.strip()):
765
+ return [id]
766
  nms = []
767
  d = TBL.get(id)
768
+ if not d:
769
+ return []
770
  nms.append(d["name"])
771
  p = get_names(d["parent"])
772
+ if p:
773
+ nms.extend(p)
774
  return nms
775
 
776
+
777
+
778
  def isName(nm):
779
+ if nm in NM_SET:
780
+ return True
781
+ if nm + "" in NM_SET:
782
+ return True
783
+ if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:
784
+ return True
785
  return False
deepdoc/parser/resume/entities/schools.py CHANGED
@@ -16,8 +16,11 @@ import json
16
  import re
17
  import copy
18
  import pandas as pd
 
19
  current_file_path = os.path.dirname(os.path.abspath(__file__))
20
- TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("")
 
 
21
  TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
22
  GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
23
  GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
@@ -26,14 +29,15 @@ GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
26
  def loadRank(fnm):
27
  global TBL
28
  TBL["rank"] = 1000000
29
- with open(fnm, "r", encoding='utf-8') as f:
30
  while True:
31
- l = f.readline()
32
- if not l:break
33
- l = l.strip("\n").split(",")
 
34
  try:
35
- nm,rk = l[0].strip(),int(l[1])
36
- #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
37
  TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
38
  except Exception:
39
  pass
@@ -44,27 +48,35 @@ loadRank(os.path.join(current_file_path, "res/school.rank.csv"))
44
 
45
  def split(txt):
46
  tks = []
47
- for t in re.sub(r"[ \t]+", " ",txt).split():
48
- if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \
49
- re.match(r"[a-zA-Z]", t) and tks:
 
 
 
 
50
  tks[-1] = tks[-1] + " " + t
51
- else:tks.append(t)
 
52
  return tks
53
 
54
 
55
  def select(nm):
56
  global TBL
57
- if not nm:return
58
- if isinstance(nm, list):nm = str(nm[0])
 
 
59
  nm = split(nm)[0]
60
  nm = str(nm).lower().strip()
61
  nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
62
  nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm)
63
  nm = re.sub(r"大学.*学院", "大学", nm)
64
  tbl = copy.deepcopy(TBL)
65
- tbl["hit_alias"] = tbl["alias"].map(lambda x:nm in set(x.split("+")))
66
- res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | (tbl.hit_alias == True))]
67
- if res.empty:return
 
68
 
69
  return json.loads(res.to_json(orient="records"))[0]
70
 
@@ -74,4 +86,3 @@ def is_good(nm):
74
  nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
75
  nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm)
76
  return nm in GOOD_SCH
77
-
 
16
  import re
17
  import copy
18
  import pandas as pd
19
+
20
  current_file_path = os.path.dirname(os.path.abspath(__file__))
21
+ TBL = pd.read_csv(
22
+ os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0
23
+ ).fillna("")
24
  TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
25
  GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
26
  GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
 
29
  def loadRank(fnm):
30
  global TBL
31
  TBL["rank"] = 1000000
32
+ with open(fnm, "r", encoding="utf-8") as f:
33
  while True:
34
+ line = f.readline()
35
+ if not line:
36
+ break
37
+ line = line.strip("\n").split(",")
38
  try:
39
+ nm, rk = line[0].strip(), int(line[1])
40
+ # assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
41
  TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
42
  except Exception:
43
  pass
 
48
 
49
  def split(txt):
50
  tks = []
51
+ for t in re.sub(r"[ \t]+", " ", txt).split():
52
+ if (
53
+ tks
54
+ and re.match(r".*[a-zA-Z]$", tks[-1])
55
+ and re.match(r"[a-zA-Z]", t)
56
+ and tks
57
+ ):
58
  tks[-1] = tks[-1] + " " + t
59
+ else:
60
+ tks.append(t)
61
  return tks
62
 
63
 
64
  def select(nm):
65
  global TBL
66
+ if not nm:
67
+ return
68
+ if isinstance(nm, list):
69
+ nm = str(nm[0])
70
  nm = split(nm)[0]
71
  nm = str(nm).lower().strip()
72
  nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
73
  nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm)
74
  nm = re.sub(r"大学.*学院", "大学", nm)
75
  tbl = copy.deepcopy(TBL)
76
+ tbl["hit_alias"] = tbl["alias"].map(lambda x: nm in set(x.split("+")))
77
+ res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | tbl.hit_alias)]
78
+ if res.empty:
79
+ return
80
 
81
  return json.loads(res.to_json(orient="records"))[0]
82
 
 
86
  nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
87
  nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm)
88
  return nm in GOOD_SCH
 
deepdoc/parser/resume/step_two.py CHANGED
@@ -25,7 +25,8 @@ from xpinyin import Pinyin
25
  from contextlib import contextmanager
26
 
27
 
28
- class TimeoutException(Exception): pass
 
29
 
30
 
31
  @contextmanager
@@ -50,8 +51,10 @@ def rmHtmlTag(line):
50
 
51
 
52
  def highest_degree(dg):
53
- if not dg: return ""
54
- if type(dg) == type(""): dg = [dg]
 
 
55
  m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
56
  return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]
57
 
@@ -68,10 +71,12 @@ def forEdu(cv):
68
  for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
69
  e = {}
70
  if n.get("end_time"):
71
- if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"]
 
72
  try:
73
  dt = n["end_time"]
74
- if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
 
75
  y, m, d = getYMD(dt)
76
  ed_dt.append(str(y))
77
  e["end_dt_kwd"] = str(y)
@@ -80,7 +85,8 @@ def forEdu(cv):
80
  if n.get("start_time"):
81
  try:
82
  dt = n["start_time"]
83
- if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
 
84
  y, m, d = getYMD(dt)
85
  st_dt.append(str(y))
86
  e["start_dt_kwd"] = str(y)
@@ -89,13 +95,20 @@ def forEdu(cv):
89
 
90
  r = schools.select(n.get("school_name", ""))
91
  if r:
92
- if str(r.get("type", "")) == "1": fea.append("211")
93
- if str(r.get("type", "")) == "2": fea.append("211")
94
- if str(r.get("is_abroad", "")) == "1": fea.append("留学")
95
- if str(r.get("is_double_first", "")) == "1": fea.append("双一流")
96
- if str(r.get("is_985", "")) == "1": fea.append("985")
97
- if str(r.get("is_world_known", "")) == "1": fea.append("海外知名")
98
- if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"]
 
 
 
 
 
 
 
99
 
100
  if n.get("school_name") and isinstance(n["school_name"], str):
101
  sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"]))
@@ -106,22 +119,25 @@ def forEdu(cv):
106
  maj.append(n["discipline_name"])
107
  e["major_kwd"] = n["discipline_name"]
108
 
109
- if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1"
 
110
 
111
  if n.get("degree"):
112
  d = degrees.get_name(n["degree"])
113
- if d: e["degree_kwd"] = d
114
- if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)",
115
- n.get(
116
- "school_name",
117
- ""))): d = "专升本"
118
- if d: deg.append(d)
119
 
120
  # for first degree
121
  if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
122
  fdeg = [d]
123
- if n.get("school_name"): fsch = [n["school_name"]]
124
- if n.get("discipline_name"): fmaj = [n["discipline_name"]]
 
 
125
  first_fea = copy.deepcopy(fea)
126
 
127
  edu_nst.append(e)
@@ -140,16 +156,26 @@ def forEdu(cv):
140
  else:
141
  cv["sch_rank_kwd"].append("一般学校")
142
 
143
- if edu_nst: cv["edu_nst"] = edu_nst
144
- if fea: cv["edu_fea_kwd"] = list(set(fea))
145
- if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea))
146
- if maj: cv["major_kwd"] = maj
147
- if fsch: cv["first_school_name_kwd"] = fsch
148
- if fdeg: cv["first_degree_kwd"] = fdeg
149
- if fmaj: cv["first_major_kwd"] = fmaj
150
- if st_dt: cv["edu_start_kwd"] = st_dt
151
- if ed_dt: cv["edu_end_kwd"] = ed_dt
152
- if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt])
 
 
 
 
 
 
 
 
 
 
153
  if deg:
154
  if "本科" in deg and "专科" in deg:
155
  deg.append("专升本")
@@ -158,8 +184,10 @@ def forEdu(cv):
158
  cv["highest_degree_kwd"] = highest_degree(deg)
159
  if edu_end_dt:
160
  try:
161
- if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt)
162
- if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
 
 
163
  y, m, d = getYMD(edu_end_dt)
164
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
165
  except Exception as e:
@@ -171,7 +199,8 @@ def forEdu(cv):
171
  or not cv.get("degree_kwd"):
172
  for c in sch:
173
  if schools.is_good(c):
174
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
 
175
  cv["tag_kwd"].append("好学校")
176
  cv["tag_kwd"].append("好学历")
177
  break
@@ -180,28 +209,39 @@ def forEdu(cv):
180
  any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
181
  or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
182
  or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
183
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
184
- if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历")
185
-
186
- if cv.get("major_kwd"): cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
187
- if cv.get("school_name_kwd"): cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
188
- if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
189
- if cv.get("first_major_kwd"): cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))
 
 
 
 
 
 
190
 
191
  return cv
192
 
193
 
194
  def forProj(cv):
195
- if not cv.get("project_obj"): return cv
 
196
 
197
  pro_nms, desc = [], []
198
  for i, n in enumerate(
199
- sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "",
200
  reverse=True)):
201
- if n.get("name"): pro_nms.append(n["name"])
202
- if n.get("describe"): desc.append(str(n["describe"]))
203
- if n.get("responsibilities"): desc.append(str(n["responsibilities"]))
204
- if n.get("achivement"): desc.append(str(n["achivement"]))
 
 
 
 
205
 
206
  if pro_nms:
207
  # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms))
@@ -233,15 +273,16 @@ def forWork(cv):
233
  work_st_tm = ""
234
  corp_tags = []
235
  for i, n in enumerate(
236
- sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "",
237
  reverse=True)):
238
- if type(n) == type(""):
239
  try:
240
  n = json_loads(n)
241
  except Exception:
242
  continue
243
 
244
- if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
 
245
  for c in flds:
246
  if not n.get(c) or str(n[c]) == '0':
247
  fea[c].append("")
@@ -262,14 +303,18 @@ def forWork(cv):
262
  fea[c].append(rmHtmlTag(str(n[c]).lower()))
263
 
264
  y, m, d = getYMD(n.get("start_time"))
265
- if not y or not m: continue
 
266
  st = "%s-%02d-%02d" % (y, int(m), int(d))
267
  latest_job_tm = st
268
 
269
  y, m, d = getYMD(n.get("end_time"))
270
- if (not y or not m) and i > 0: continue
271
- if not y or not m or int(y) > 2022: y, m, d = getYMD(str(n.get("updated_at", "")))
272
- if not y or not m: continue
 
 
 
273
  ed = "%s-%02d-%02d" % (y, int(m), int(d))
274
 
275
  try:
@@ -279,22 +324,28 @@ def forWork(cv):
279
 
280
  if n.get("scale"):
281
  r = re.search(r"^([0-9]+)", str(n["scale"]))
282
- if r: scales.append(int(r.group(1)))
 
283
 
284
  if goodcorp:
285
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
 
286
  cv["tag_kwd"].append("好公司")
287
  if goodcorp_:
288
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
 
289
  cv["tag_kwd"].append("好公司(曾)")
290
 
291
  if corp_tags:
292
- if "tag_kwd" not in cv: cv["tag_kwd"] = []
 
293
  cv["tag_kwd"].extend(corp_tags)
294
  cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]
295
 
296
- if latest_job_tm: cv["latest_job_dt"] = latest_job_tm
297
- if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"]
 
 
298
 
299
  if fea["position_name"]:
300
  cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0])
@@ -317,18 +368,23 @@ def forWork(cv):
317
  cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0])
318
  cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:]))
319
 
320
- if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
 
321
  re.match(r"[^0-9]+$", str(i))]
322
- if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
 
323
 
324
- if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])]
325
- if not cv.get("corporation_id"): cv["corporation_id"] = []
 
 
326
  for i in cv.get("corporation_id", []):
327
  cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)
328
 
329
  if work_st_tm:
330
  try:
331
- if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm)
 
332
  y, m, d = getYMD(work_st_tm)
333
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
334
  except Exception as e:
@@ -339,28 +395,37 @@ def forWork(cv):
339
  cv["dua_flt"] = np.mean(duas)
340
  cv["cur_dua_int"] = duas[0]
341
  cv["job_num_int"] = len(duas)
342
- if scales: cv["scale_flt"] = np.max(scales)
 
343
  return cv
344
 
345
 
346
  def turnTm2Dt(b):
347
- if not b: return
 
348
  b = str(b).strip()
349
- if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
 
350
  return b
351
 
352
 
353
  def getYMD(b):
354
  y, m, d = "", "", "01"
355
- if not b: return (y, m, d)
 
356
  b = turnTm2Dt(b)
357
- if re.match(r"[0-9]{4}", b): y = int(b[:4])
 
358
  r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
359
- if r: m = r.group(1)
 
360
  r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
361
- if r: d = r.group(1)
362
- if not d or int(d) == 0 or int(d) > 31: d = "1"
363
- if not m or int(m) > 12 or int(m) < 1: m = "1"
 
 
 
364
  return (y, m, d)
365
 
366
 
@@ -369,7 +434,8 @@ def birth(cv):
369
  cv["integerity_flt"] *= 0.9
370
  return cv
371
  y, m, d = getYMD(cv["birth"])
372
- if not m or not y: return cv
 
373
  b = "%s-%02d-%02d" % (y, int(m), int(d))
374
  cv["birth_dt"] = b
375
  cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
@@ -380,7 +446,8 @@ def birth(cv):
380
 
381
  def parse(cv):
382
  for k in cv.keys():
383
- if cv[k] == '\\N': cv[k] = ''
 
384
  # cv = cv.asDict()
385
  tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
386
  "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
@@ -402,9 +469,12 @@ def parse(cv):
402
 
403
  rmkeys = []
404
  for k in cv.keys():
405
- if cv[k] is None: rmkeys.append(k)
406
- if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k)
407
- for k in rmkeys: del cv[k]
 
 
 
408
 
409
  integerity = 0.
410
  flds_num = 0.
@@ -414,7 +484,8 @@ def parse(cv):
414
  flds_num += len(flds)
415
  for f in flds:
416
  v = str(cv.get(f, ""))
417
- if len(v) > 0 and v != '0' and v != '[]': integerity += 1
 
418
 
419
  hasValues(tks_fld)
420
  hasValues(small_tks_fld)
@@ -433,7 +504,8 @@ def parse(cv):
433
  (r"[ ()\(\)人/·0-9-]+", ""),
434
  (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
435
  cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
436
- if len(cv["corporation_type"]) < 2: del cv["corporation_type"]
 
437
 
438
  if cv.get("political_status"):
439
  for p, r in [
@@ -441,9 +513,11 @@ def parse(cv):
441
  (r".*(无党派|公民).*", "群众"),
442
  (r".*团员.*", "团员")]:
443
  cv["political_status"] = re.sub(p, r, cv["political_status"])
444
- if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"]
 
445
 
446
- if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
 
447
 
448
  keys = list(cv.keys())
449
  for k in keys:
@@ -454,9 +528,11 @@ def parse(cv):
454
  cv[k] = [a for _, a in cv[k].items()]
455
  nms = []
456
  for n in cv[k]:
457
- if type(n) != type({}) or "name" not in n or not n.get("name"): continue
 
458
  n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower()
459
- if not n["name"]: continue
 
460
  nms.append(n["name"])
461
  if nms:
462
  t = k[:-4]
@@ -469,15 +545,18 @@ def parse(cv):
469
  # tokenize fields
470
  if k in tks_fld:
471
  cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k])
472
- if k in small_tks_fld: cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])
 
473
 
474
  # keyword fields
475
- if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower()
 
476
  for n in re.split(r"[\t,,;;. ]",
477
  re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k])
478
  ) if n]
479
 
480
- if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k]
 
481
 
482
  cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
483
  # for name field
@@ -501,10 +580,12 @@ def parse(cv):
501
  cv["name_py_pref0_tks"] = ""
502
  cv["name_py_pref_tks"] = ""
503
  for py in PY.get_pinyins(nm[:20], ''):
504
- for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i]
 
505
  for py in PY.get_pinyins(nm[:20], ' '):
506
  py = py.split()
507
- for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i])
 
508
 
509
  cv["name_kwd"] = name
510
  cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
@@ -526,22 +607,30 @@ def parse(cv):
526
  cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
527
  else:
528
  y, m, d = getYMD(str(cv.get("updated_at", "")))
529
- if not y: y = "2012"
530
- if not m: m = "01"
531
- if not d: d = "01"
 
 
 
532
  cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
533
  # long text tokenize
534
 
535
- if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
 
536
 
537
  # for yes or no field
538
  fea = []
539
  for f, y, n in is_fld:
540
- if f not in cv: continue
541
- if cv[f] == '是': fea.append(y)
542
- if cv[f] == '': fea.append(n)
 
 
 
543
 
544
- if fea: cv["tag_kwd"] = fea
 
545
 
546
  cv = forEdu(cv)
547
  cv = forProj(cv)
@@ -550,9 +639,11 @@ def parse(cv):
550
 
551
  cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
552
  for i in range(len(cv["corp_proj_sch_deg_kwd"])):
553
- for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j
 
554
  for i in range(len(cv["corp_proj_sch_deg_kwd"])):
555
- if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
 
556
 
557
  try:
558
  if not cv.get("work_exp_flt") and cv.get("work_start_time"):
@@ -565,17 +656,21 @@ def parse(cv):
565
  cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
566
  except Exception as e:
567
  logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
568
- if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
 
569
 
570
  keys = list(cv.keys())
571
  for k in keys:
572
- if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k]
 
573
  for k in cv.keys():
574
- if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue
 
575
  cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
576
  keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
577
  for k in keys:
578
- if cv[k] <= 0: del cv[k]
 
579
 
580
  cv["tob_resume_id"] = str(cv["tob_resume_id"])
581
  cv["id"] = cv["tob_resume_id"]
@@ -592,5 +687,6 @@ def dealWithInt64(d):
592
  if isinstance(d, list):
593
  d = [dealWithInt64(t) for t in d]
594
 
595
- if isinstance(d, np.integer): d = int(d)
 
596
  return d
 
25
  from contextlib import contextmanager
26
 
27
 
28
+ class TimeoutException(Exception):
29
+ pass
30
 
31
 
32
  @contextmanager
 
51
 
52
 
53
  def highest_degree(dg):
54
+ if not dg:
55
+ return ""
56
+ if isinstance(dg, str):
57
+ dg = [dg]
58
  m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
59
  return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]
60
 
 
71
  for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
72
  e = {}
73
  if n.get("end_time"):
74
+ if n["end_time"] > edu_end_dt:
75
+ edu_end_dt = n["end_time"]
76
  try:
77
  dt = n["end_time"]
78
+ if re.match(r"[0-9]{9,}", dt):
79
+ dt = turnTm2Dt(dt)
80
  y, m, d = getYMD(dt)
81
  ed_dt.append(str(y))
82
  e["end_dt_kwd"] = str(y)
 
85
  if n.get("start_time"):
86
  try:
87
  dt = n["start_time"]
88
+ if re.match(r"[0-9]{9,}", dt):
89
+ dt = turnTm2Dt(dt)
90
  y, m, d = getYMD(dt)
91
  st_dt.append(str(y))
92
  e["start_dt_kwd"] = str(y)
 
95
 
96
  r = schools.select(n.get("school_name", ""))
97
  if r:
98
+ if str(r.get("type", "")) == "1":
99
+ fea.append("211")
100
+ if str(r.get("type", "")) == "2":
101
+ fea.append("211")
102
+ if str(r.get("is_abroad", "")) == "1":
103
+ fea.append("留学")
104
+ if str(r.get("is_double_first", "")) == "1":
105
+ fea.append("双一流")
106
+ if str(r.get("is_985", "")) == "1":
107
+ fea.append("985")
108
+ if str(r.get("is_world_known", "")) == "1":
109
+ fea.append("海外知名")
110
+ if r.get("rank") and cv["school_rank_int"] > r["rank"]:
111
+ cv["school_rank_int"] = r["rank"]
112
 
113
  if n.get("school_name") and isinstance(n["school_name"], str):
114
  sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"]))
 
119
  maj.append(n["discipline_name"])
120
  e["major_kwd"] = n["discipline_name"]
121
 
122
+ if not n.get("degree") and "985" in fea and not first_fea:
123
+ n["degree"] = "1"
124
 
125
  if n.get("degree"):
126
  d = degrees.get_name(n["degree"])
127
+ if d:
128
+ e["degree_kwd"] = d
129
+ if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", n.get("school_name",""))):
130
+ d = "专升本"
131
+ if d:
132
+ deg.append(d)
133
 
134
  # for first degree
135
  if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
136
  fdeg = [d]
137
+ if n.get("school_name"):
138
+ fsch = [n["school_name"]]
139
+ if n.get("discipline_name"):
140
+ fmaj = [n["discipline_name"]]
141
  first_fea = copy.deepcopy(fea)
142
 
143
  edu_nst.append(e)
 
156
  else:
157
  cv["sch_rank_kwd"].append("一般学校")
158
 
159
+ if edu_nst:
160
+ cv["edu_nst"] = edu_nst
161
+ if fea:
162
+ cv["edu_fea_kwd"] = list(set(fea))
163
+ if first_fea:
164
+ cv["edu_first_fea_kwd"] = list(set(first_fea))
165
+ if maj:
166
+ cv["major_kwd"] = maj
167
+ if fsch:
168
+ cv["first_school_name_kwd"] = fsch
169
+ if fdeg:
170
+ cv["first_degree_kwd"] = fdeg
171
+ if fmaj:
172
+ cv["first_major_kwd"] = fmaj
173
+ if st_dt:
174
+ cv["edu_start_kwd"] = st_dt
175
+ if ed_dt:
176
+ cv["edu_end_kwd"] = ed_dt
177
+ if ed_dt:
178
+ cv["edu_end_int"] = max([int(t) for t in ed_dt])
179
  if deg:
180
  if "本科" in deg and "专科" in deg:
181
  deg.append("专升本")
 
184
  cv["highest_degree_kwd"] = highest_degree(deg)
185
  if edu_end_dt:
186
  try:
187
+ if re.match(r"[0-9]{9,}", edu_end_dt):
188
+ edu_end_dt = turnTm2Dt(edu_end_dt)
189
+ if edu_end_dt.strip("\n") == "至今":
190
+ edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
191
  y, m, d = getYMD(edu_end_dt)
192
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
193
  except Exception as e:
 
199
  or not cv.get("degree_kwd"):
200
  for c in sch:
201
  if schools.is_good(c):
202
+ if "tag_kwd" not in cv:
203
+ cv["tag_kwd"] = []
204
  cv["tag_kwd"].append("好学校")
205
  cv["tag_kwd"].append("好学历")
206
  break
 
209
  any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
210
  or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
211
  or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
212
+ if "tag_kwd" not in cv:
213
+ cv["tag_kwd"] = []
214
+ if "好学历" not in cv["tag_kwd"]:
215
+ cv["tag_kwd"].append("好学历")
216
+
217
+ if cv.get("major_kwd"):
218
+ cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
219
+ if cv.get("school_name_kwd"):
220
+ cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
221
+ if cv.get("first_school_name_kwd"):
222
+ cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
223
+ if cv.get("first_major_kwd"):
224
+ cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))
225
 
226
  return cv
227
 
228
 
229
  def forProj(cv):
230
+ if not cv.get("project_obj"):
231
+ return cv
232
 
233
  pro_nms, desc = [], []
234
  for i, n in enumerate(
235
+ sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if isinstance(x, dict) else "",
236
  reverse=True)):
237
+ if n.get("name"):
238
+ pro_nms.append(n["name"])
239
+ if n.get("describe"):
240
+ desc.append(str(n["describe"]))
241
+ if n.get("responsibilities"):
242
+ desc.append(str(n["responsibilities"]))
243
+ if n.get("achivement"):
244
+ desc.append(str(n["achivement"]))
245
 
246
  if pro_nms:
247
  # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms))
 
273
  work_st_tm = ""
274
  corp_tags = []
275
  for i, n in enumerate(
276
+ sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if isinstance(x, dict) else "",
277
  reverse=True)):
278
+ if isinstance(n, str):
279
  try:
280
  n = json_loads(n)
281
  except Exception:
282
  continue
283
 
284
+ if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm):
285
+ work_st_tm = n["start_time"]
286
  for c in flds:
287
  if not n.get(c) or str(n[c]) == '0':
288
  fea[c].append("")
 
303
  fea[c].append(rmHtmlTag(str(n[c]).lower()))
304
 
305
  y, m, d = getYMD(n.get("start_time"))
306
+ if not y or not m:
307
+ continue
308
  st = "%s-%02d-%02d" % (y, int(m), int(d))
309
  latest_job_tm = st
310
 
311
  y, m, d = getYMD(n.get("end_time"))
312
+ if (not y or not m) and i > 0:
313
+ continue
314
+ if not y or not m or int(y) > 2022:
315
+ y, m, d = getYMD(str(n.get("updated_at", "")))
316
+ if not y or not m:
317
+ continue
318
  ed = "%s-%02d-%02d" % (y, int(m), int(d))
319
 
320
  try:
 
324
 
325
  if n.get("scale"):
326
  r = re.search(r"^([0-9]+)", str(n["scale"]))
327
+ if r:
328
+ scales.append(int(r.group(1)))
329
 
330
  if goodcorp:
331
+ if "tag_kwd" not in cv:
332
+ cv["tag_kwd"] = []
333
  cv["tag_kwd"].append("好公司")
334
  if goodcorp_:
335
+ if "tag_kwd" not in cv:
336
+ cv["tag_kwd"] = []
337
  cv["tag_kwd"].append("好公司(曾)")
338
 
339
  if corp_tags:
340
+ if "tag_kwd" not in cv:
341
+ cv["tag_kwd"] = []
342
  cv["tag_kwd"].extend(corp_tags)
343
  cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]
344
 
345
+ if latest_job_tm:
346
+ cv["latest_job_dt"] = latest_job_tm
347
+ if fea["corporation_id"]:
348
+ cv["corporation_id"] = fea["corporation_id"]
349
 
350
  if fea["position_name"]:
351
  cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0])
 
368
  cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0])
369
  cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:]))
370
 
371
+ if fea["subordinates_count"]:
372
+ fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
373
  re.match(r"[^0-9]+$", str(i))]
374
+ if fea["subordinates_count"]:
375
+ cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
376
 
377
+ if isinstance(cv.get("corporation_id"), int):
378
+ cv["corporation_id"] = [str(cv["corporation_id"])]
379
+ if not cv.get("corporation_id"):
380
+ cv["corporation_id"] = []
381
  for i in cv.get("corporation_id", []):
382
  cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)
383
 
384
  if work_st_tm:
385
  try:
386
+ if re.match(r"[0-9]{9,}", work_st_tm):
387
+ work_st_tm = turnTm2Dt(work_st_tm)
388
  y, m, d = getYMD(work_st_tm)
389
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
390
  except Exception as e:
 
395
  cv["dua_flt"] = np.mean(duas)
396
  cv["cur_dua_int"] = duas[0]
397
  cv["job_num_int"] = len(duas)
398
+ if scales:
399
+ cv["scale_flt"] = np.max(scales)
400
  return cv
401
 
402
 
403
  def turnTm2Dt(b):
404
+ if not b:
405
+ return
406
  b = str(b).strip()
407
+ if re.match(r"[0-9]{10,}", b):
408
+ b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
409
  return b
410
 
411
 
412
  def getYMD(b):
413
  y, m, d = "", "", "01"
414
+ if not b:
415
+ return (y, m, d)
416
  b = turnTm2Dt(b)
417
+ if re.match(r"[0-9]{4}", b):
418
+ y = int(b[:4])
419
  r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
420
+ if r:
421
+ m = r.group(1)
422
  r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
423
+ if r:
424
+ d = r.group(1)
425
+ if not d or int(d) == 0 or int(d) > 31:
426
+ d = "1"
427
+ if not m or int(m) > 12 or int(m) < 1:
428
+ m = "1"
429
  return (y, m, d)
430
 
431
 
 
434
  cv["integerity_flt"] *= 0.9
435
  return cv
436
  y, m, d = getYMD(cv["birth"])
437
+ if not m or not y:
438
+ return cv
439
  b = "%s-%02d-%02d" % (y, int(m), int(d))
440
  cv["birth_dt"] = b
441
  cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
 
446
 
447
  def parse(cv):
448
  for k in cv.keys():
449
+ if cv[k] == '\\N':
450
+ cv[k] = ''
451
  # cv = cv.asDict()
452
  tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
453
  "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
 
469
 
470
  rmkeys = []
471
  for k in cv.keys():
472
+ if cv[k] is None:
473
+ rmkeys.append(k)
474
+ if (isinstance(cv[k], list) or isinstance(cv[k], str)) and len(cv[k]) == 0:
475
+ rmkeys.append(k)
476
+ for k in rmkeys:
477
+ del cv[k]
478
 
479
  integerity = 0.
480
  flds_num = 0.
 
484
  flds_num += len(flds)
485
  for f in flds:
486
  v = str(cv.get(f, ""))
487
+ if len(v) > 0 and v != '0' and v != '[]':
488
+ integerity += 1
489
 
490
  hasValues(tks_fld)
491
  hasValues(small_tks_fld)
 
504
  (r"[ ()\(\)人/·0-9-]+", ""),
505
  (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
506
  cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
507
+ if len(cv["corporation_type"]) < 2:
508
+ del cv["corporation_type"]
509
 
510
  if cv.get("political_status"):
511
  for p, r in [
 
513
  (r".*(无党派|公民).*", "群众"),
514
  (r".*团员.*", "团员")]:
515
  cv["political_status"] = re.sub(p, r, cv["political_status"])
516
+ if not re.search(r"[党团群]", cv["political_status"]):
517
+ del cv["political_status"]
518
 
519
+ if cv.get("phone"):
520
+ cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
521
 
522
  keys = list(cv.keys())
523
  for k in keys:
 
528
  cv[k] = [a for _, a in cv[k].items()]
529
  nms = []
530
  for n in cv[k]:
531
+ if not isinstance(n, dict) or "name" not in n or not n.get("name"):
532
+ continue
533
  n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower()
534
+ if not n["name"]:
535
+ continue
536
  nms.append(n["name"])
537
  if nms:
538
  t = k[:-4]
 
545
  # tokenize fields
546
  if k in tks_fld:
547
  cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k])
548
+ if k in small_tks_fld:
549
+ cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])
550
 
551
  # keyword fields
552
+ if k in kwd_fld:
553
+ cv[f"{k}_kwd"] = [n.lower()
554
  for n in re.split(r"[\t,,;;. ]",
555
  re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k])
556
  ) if n]
557
 
558
+ if k in num_fld and cv.get(k):
559
+ cv[f"{k}_int"] = cv[k]
560
 
561
  cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
562
  # for name field
 
580
  cv["name_py_pref0_tks"] = ""
581
  cv["name_py_pref_tks"] = ""
582
  for py in PY.get_pinyins(nm[:20], ''):
583
+ for i in range(2, len(py) + 1):
584
+ cv["name_py_pref_tks"] += " " + py[:i]
585
  for py in PY.get_pinyins(nm[:20], ' '):
586
  py = py.split()
587
+ for i in range(1, len(py) + 1):
588
+ cv["name_py_pref0_tks"] += " " + "".join(py[:i])
589
 
590
  cv["name_kwd"] = name
591
  cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
 
607
  cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
608
  else:
609
  y, m, d = getYMD(str(cv.get("updated_at", "")))
610
+ if not y:
611
+ y = "2012"
612
+ if not m:
613
+ m = "01"
614
+ if not d:
615
+ d = "01"
616
  cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
617
  # long text tokenize
618
 
619
+ if cv.get("responsibilities"):
620
+ cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
621
 
622
  # for yes or no field
623
  fea = []
624
  for f, y, n in is_fld:
625
+ if f not in cv:
626
+ continue
627
+ if cv[f] == '':
628
+ fea.append(y)
629
+ if cv[f] == '否':
630
+ fea.append(n)
631
 
632
+ if fea:
633
+ cv["tag_kwd"] = fea
634
 
635
  cv = forEdu(cv)
636
  cv = forProj(cv)
 
639
 
640
  cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
641
  for i in range(len(cv["corp_proj_sch_deg_kwd"])):
642
+ for j in cv.get("sch_rank_kwd", []):
643
+ cv["corp_proj_sch_deg_kwd"][i] += "+" + j
644
  for i in range(len(cv["corp_proj_sch_deg_kwd"])):
645
+ if cv.get("highest_degree_kwd"):
646
+ cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
647
 
648
  try:
649
  if not cv.get("work_exp_flt") and cv.get("work_start_time"):
 
656
  cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
657
  except Exception as e:
658
  logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
659
+ if "work_exp_flt" not in cv and cv.get("work_experience", 0):
660
+ cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
661
 
662
  keys = list(cv.keys())
663
  for k in keys:
664
+ if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k):
665
+ del cv[k]
666
  for k in cv.keys():
667
+ if not re.search("_(kwd|id)$", k) or not isinstance(cv[k], list):
668
+ continue
669
  cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
670
  keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
671
  for k in keys:
672
+ if cv[k] <= 0:
673
+ del cv[k]
674
 
675
  cv["tob_resume_id"] = str(cv["tob_resume_id"])
676
  cv["id"] = cv["tob_resume_id"]
 
687
  if isinstance(d, list):
688
  d = [dealWithInt64(t) for t in d]
689
 
690
+ if isinstance(d, np.integer):
691
+ d = int(d)
692
  return d