|
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
|
import re
|
|
|
|
model_name = "ckiplab/bert-base-chinese-ner"
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
|
ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
|
|
|
|
|
def extract_conditions(text):
|
|
print("🧠 開始分析文字內容...")
|
|
entities = ner(text)
|
|
print("🧠 模型辨識到:", entities)
|
|
|
|
result = {}
|
|
merged_text = text.replace(" ", "")
|
|
|
|
for i, e in enumerate(entities):
|
|
word = e["word"]
|
|
label = e["entity_group"]
|
|
start = e["start"]
|
|
end = e["end"]
|
|
|
|
|
|
nums = re.findall(r"\d+", word)
|
|
if not nums:
|
|
continue
|
|
num = int(nums[0])
|
|
|
|
|
|
if "%" in word or label == "PERCENT":
|
|
|
|
percent_pos = start
|
|
context = merged_text[max(0, start-10):start]
|
|
|
|
|
|
context_text = merged_text[:percent_pos]
|
|
nearest = ""
|
|
|
|
if "容積獎勵" in context_text:
|
|
nearest = "容積獎勵"
|
|
elif "容積率" in context_text:
|
|
nearest = "容積率"
|
|
elif "建蔽率" in context_text:
|
|
nearest = "建蔽率"
|
|
|
|
if nearest == "建蔽率":
|
|
print(f"🏗️ 建蔽率:{num}%")
|
|
result["BCR"] = num
|
|
elif nearest == "容積率":
|
|
print(f"🏗️ 容積率:{num}%")
|
|
result["FAR"] = num
|
|
elif nearest == "容積獎勵":
|
|
print(f"🎁 容積獎勵:{num}%")
|
|
result["bonus_far"] = num
|
|
|
|
|
|
elif label == "QUANTITY":
|
|
|
|
next_token = entities[i+1]["word"] if i + 1 < len(entities) else ""
|
|
suffix_candidate = (word + next_token).replace(" ", "").replace("尺", "尺")
|
|
|
|
unit_keywords = ["坪", "平方公尺", "㎡", "m2", "m²"]
|
|
if any(u in suffix_candidate for u in unit_keywords):
|
|
if "坪" in suffix_candidate:
|
|
sqm = round(num * 3.3058)
|
|
print(f"📏 偵測到 {num} 坪 → {sqm} m²")
|
|
result["site_area"] = sqm
|
|
else:
|
|
print(f"📏 偵測到 {num} 平方公尺")
|
|
result["site_area"] = num
|
|
|
|
print("🧠 NLP 萃取結果:", result)
|
|
return result |