Spaces:

Kaibo93
/

test

Sleeping

App Files Files Community

test / nlp_transform.py

Kaibo93

Upload 7 files

119f7b3 verified 22 days ago

raw

history blame contribute delete

2.75 kB

	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
	import re

	model_name = "ckiplab/bert-base-chinese-ner"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForTokenClassification.from_pretrained(model_name)
	ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")


	def extract_conditions(text):
	print("🧠 開始分析文字內容...")
	entities = ner(text)
	print("🧠 模型辨識到：", entities)

	result = {}
	merged_text = text.replace(" ", "")

	for i, e in enumerate(entities):
	word = e["word"]
	label = e["entity_group"]
	start = e["start"]
	end = e["end"]

	# 數字擷取
	nums = re.findall(r"\d+", word)
	if not nums:
	continue
	num = int(nums[0])

	# 百分比類型（建蔽率/容積率）
	if "%" in word or label == "PERCENT":
	# 找出目前百分比的實際位置
	percent_pos = start
	context = merged_text[max(0, start-10):start]

	# 往回搜尋最近出現的關鍵詞
	context_text = merged_text[:percent_pos]
	nearest = ""

	if "容積獎勵" in context_text:
	nearest = "容積獎勵"
	elif "容積率" in context_text:
	nearest = "容積率"
	elif "建蔽率" in context_text:
	nearest = "建蔽率"

	if nearest == "建蔽率":
	print(f"🏗️ 建蔽率：{num}%")
	result["BCR"] = num
	elif nearest == "容積率":
	print(f"🏗️ 容積率：{num}%")
	result["FAR"] = num
	elif nearest == "容積獎勵":
	print(f"🎁 容積獎勵：{num}%")
	result["bonus_far"] = num

	# 面積類型
	elif label == "QUANTITY":
	# 將拆開的單位合併
	next_token = entities[i+1]["word"] if i + 1 < len(entities) else ""
	suffix_candidate = (word + next_token).replace(" ", "").replace("尺", "尺")

	unit_keywords = ["坪", "平方公尺", "㎡", "m2", "m²"]
	if any(u in suffix_candidate for u in unit_keywords):
	if "坪" in suffix_candidate:
	sqm = round(num * 3.3058)
	print(f"📏 偵測到 {num} 坪 → {sqm} m²")
	result["site_area"] = sqm
	else:
	print(f"📏 偵測到 {num} 平方公尺")
	result["site_area"] = num

	print("🧠 NLP 萃取結果：", result)
	return result