Spaces:

delwinn
/

ner

Sleeping

ner / app.py

remove chunks from response.

78f775e verified about 1 year ago

1.69 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	import spacy
	from contextlib import asynccontextmanager

	# Global variable to store the model
	nlp = None


	@asynccontextmanager
	async def lifespan(app: FastAPI):
	await load_model()
	yield
	await cleanup()

	app = FastAPI(lifespan=lifespan)

	async def load_model():
	"""
	Event to load the NLP model into memory on application startup.
	"""
	global nlp
	nlp = spacy.load("en_core_web_sm")
	print("NLP model loaded successfully.")


	async def cleanup():
	"""
	Event to clean up resources on application shutdown (if needed).
	"""
	global nlp
	nlp = None
	print("NLP model unloaded.")


	class NERRequest(BaseModel):
	chunks: list[str]


	@app.post("/process")
	async def process_text(request: NERRequest):
	global nlp
	metadata_records = []

	# Dictionary to keep track of already seen entities
	seen_entities = {}

	for text in request.chunks:
	doc = nlp(text)

	for ent in doc.ents:
	print(f"{ent.text} - {ent.label_}")

	# Check if we've seen this entity before
	if ent.text in seen_entities:
	# Use the existing redacted word
	continue

	metadata_record = {
	"personal_info": ent.text,
	"redaction_type": ent.label_,
	}

	redacted_word = f"REDACTED_{ent.label_}"

	# Store for future reference
	seen_entities[ent.text] = redacted_word

	metadata_record["redacted_word"] = redacted_word
	metadata_records.append(metadata_record)

	return {
	"metadata": metadata_records,
	}