Spaces:

cameltech
/

japanese-gpt-1b-PII-masking-demo

Sleeping

Update app.py

5ec7bfb verified 9 months ago

1.42 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_name = "cameltech/japanese-gpt-1b-PII-masking"
	model = AutoModelForCausalLM.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	instruction = "# タスク\n入力文中の個人情報をマスキングせよ\n\n# 入力文\n"

	if torch.cuda.is_available():
	model = model.to("cuda")

	def preprocess(text):
	return text.replace("\n", "<LB>")

	def postprocess(text):
	return text.replace("<LB>", "\n")

	generation_config = {
	"max_new_tokens": 256,
	"num_beams": 3,
	"num_return_sequences": 1,
	"early_stopping": True,
	"eos_token_id": tokenizer.eos_token_id,
	"pad_token_id": tokenizer.pad_token_id,
	"repetition_penalty": 3.0
	}

	def generate(input_text):
	input_text = instruction + input_text
	input_text += tokenizer.eos_token
	input_text = preprocess(input_text)

	with torch.no_grad():
	token_ids = tokenizer.encode(input_text, add_special_tokens=False, return_tensors="pt")

	output_ids = model.generate(
	token_ids.to(model.device),
	**generation_config
	)
	output = tokenizer.decode(output_ids.tolist()[0][token_ids.size(1) :], skip_special_tokens=True)
	return postprocess(output)


	iface = gr.Interface(
	fn=generate,
	inputs="text",
	outputs="text"
	)

	iface.launch()