from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline import re model_name = "ckiplab/bert-base-chinese-ner" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForTokenClassification.from_pretrained(model_name) ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def extract_conditions(text): print("🧠 開始分析文字內容...") entities = ner(text) print("🧠 模型辨識到:", entities) result = {} merged_text = text.replace(" ", "") for i, e in enumerate(entities): word = e["word"] label = e["entity_group"] start = e["start"] end = e["end"] # 數字擷取 nums = re.findall(r"\d+", word) if not nums: continue num = int(nums[0]) # 百分比類型(建蔽率/容積率) if "%" in word or label == "PERCENT": # 找出目前百分比的實際位置 percent_pos = start context = merged_text[max(0, start-10):start] # 往回搜尋最近出現的關鍵詞 context_text = merged_text[:percent_pos] nearest = "" if "容積獎勵" in context_text: nearest = "容積獎勵" elif "容積率" in context_text: nearest = "容積率" elif "建蔽率" in context_text: nearest = "建蔽率" if nearest == "建蔽率": print(f"🏗️ 建蔽率:{num}%") result["BCR"] = num elif nearest == "容積率": print(f"🏗️ 容積率:{num}%") result["FAR"] = num elif nearest == "容積獎勵": print(f"🎁 容積獎勵:{num}%") result["bonus_far"] = num # 面積類型 elif label == "QUANTITY": # 將拆開的單位合併 next_token = entities[i+1]["word"] if i + 1 < len(entities) else "" suffix_candidate = (word + next_token).replace(" ", "").replace("尺", "尺") unit_keywords = ["坪", "平方公尺", "㎡", "m2", "m²"] if any(u in suffix_candidate for u in unit_keywords): if "坪" in suffix_candidate: sqm = round(num * 3.3058) print(f"📏 偵測到 {num} 坪 → {sqm} m²") result["site_area"] = sqm else: print(f"📏 偵測到 {num} 平方公尺") result["site_area"] = num print("🧠 NLP 萃取結果:", result) return result