Spaces:

ando55
/

clinical_segment_splitter

Runtime error

App Files Files Community

clinical_segment_splitter / run_segbot.py

ando55

Update run_segbot.py

e3ce53f over 1 year ago

raw history blame contribute delete

No virus

2.43 kB

	import re
	import pickle
	import numpy as np
	import random
	import torch
	from solver import TrainSolver

	from model import PointerNetworks
	import gensim
	import MeCab
	import pysbd
	import io



	class CPU_Unpickler(pickle.Unpickler):
	def find_class(self, module, name):
	if module == 'torch.storage' and name == '_load_from_bytes':
	return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
	else: return super().find_class(module, name)


	def create_data(doc,fm,split_method):
	wakati = MeCab.Tagger("-Owakati -b 81920 -r /etc/mecabrc -d /home/user/app/mecab-ipadic-neologd")
	seg = pysbd.Segmenter(language="ja", clean=False)
	texts = []
	sent = ""
	label = []
	alls = []
	labels, text, num = [], [], []
	allab, altex, fukugenss = [], [], []
	for n in range(1):
	fukugens = []
	if split_method == "pySBD":
	lines = seg.segment(doc)
	else:
	doc = doc.strip().replace("。","。\n").replace("．","．\n")
	doc = re.sub("(\n)+","\n",doc)
	lines = doc.split("\n")
	for line in lines:
	line = line.strip()
	if line == "":
	continue
	sent = wakati.parse(line).split(" ")[:-1]
	flag = 0
	label = []
	texts = []
	fukugen = []
	for i in sent:
	try:
	texts.append(fm.vocab[i].index)
	except KeyError:
	texts.append(fm.vocab["<unk>"].index)
	fukugen.append(i)
	label.append(0)
	label[-1] = 1
	labels.append(np.array(label))
	text.append(np.array(texts))
	fukugens.append(fukugen)
	allab.append(labels)
	altex.append(text)
	fukugenss.append(fukugens)
	labels, text, fukugens= [], [], []
	return altex, allab, fukugenss


	def generate(doc, mymodel, fm, index2word, split_method):
	X_tes, Y_tes, fukugen = create_data(doc,fm,split_method)
	output_texts = mymodel.check_accuracy(X_tes, Y_tes,index2word, fukugen)

	return output_texts



	def setup():
	with open('index2word.pickle', 'rb') as f:
	index2word = pickle.load(f)
	with open('model.pickle', 'rb') as f:
	mysolver = CPU_Unpickler(f).load()
	with open('fm.pickle', 'rb') as f:
	fm = pickle.load(f)

	return mysolver,fm,index2word