Spaces:

svjack
/

Harry-Potter-Knowledge-Question-Answer-in-Chinese

Runtime error

Harry-Potter-Knowledge-Question-Answer-in-Chinese

File size: 6,477 Bytes

03c1af1

#from conf import *
import os
import sys
import re
from rapidfuzz import fuzz
import requests
import json

#assert os.path.exists(flair_ner_model_path)
#loaded_model: SequenceTagger = SequenceTagger.load(os.path.join(flair_ner_model_path ,"best-model.pt"))

'''
def one_item_process(r, loaded_model):
    #assert type(r) == type(pd.Series())
    zh = r["question"]
    zh = zh.replace(" ", "").strip()
    sentence = Sentence(" ".join(list(zh)))
    loaded_model.predict(sentence)
    sentence_str = str(sentence)
    ask_spans = re.findall(r'\["(.+?)"/ASK\]', sentence_str)
    sentence = re.findall(r'Sentence: "(.+?)"', sentence_str)
    if ask_spans:
        ask_spans = ask_spans[0]
    else:
        ask_spans = ""
    if sentence:
        sentence = sentence[0]
    else:
        sentence = ""
    ask_spans, sentence = map(lambda x: x.replace(" ", "").strip(), [ask_spans, sentence])
    return ask_spans, sentence
'''

def one_item_process_by_request(r):
    zh = r["question"]
    zh = zh.replace(" ", "").strip()
    response = requests.post("https://svjack-question-words-extractor-zh.hf.space/run/predict", json={
      "data": [
        zh,
    ]}).json()
    data = response["data"]
    #data = json.loads(data)
    if data:
        data = data[0]
        Question_words = data["Question words"]
    else:
        Question_words = ""
    return Question_words, zh


def retrieve_sent_split(sent,
                       stops_split_pattern = "|".join(map(lambda x: r"\{}".format(x),
                                                                 ",." + "，。" + ":？? "))
                       ):
    if not sent.strip():
        return []

    split_list = re.split(stops_split_pattern, sent)
    return split_list

def find_min_text_contain_entity_span(sent, entity_str, statement):
    #assert entity_str in sent
    span_list = list(filter(lambda x: entity_str in x ,retrieve_sent_split(sent)))
    if not span_list:
        return sent
    span_list = list(map(lambda x: (x, fuzz.ratio(x, statement)), span_list))
    return sorted(span_list, key = lambda t2: t2[1], reverse = True)[0][0]
    #return sorted(span_list, key = len)[0]

def to_statement(r):
    entity = r["entity"]
    question = r["question"]
    head = r["head"]
    context = r["context"]
    statement = question.replace(head, entity).replace("?", "").replace("？", "")
    top_chip = find_min_text_contain_entity_span(context, entity, statement)
    return statement, top_chip

'''
r = {'entity': '1901年',
 'question': '荷兰国会何时通过伦理政策?',
 'title': '爪哇岛',
 'context': '伊斯兰教被接受的同时，其教义也被融入了当地人长久以来的一些信仰，所以爪哇岛的伊斯兰教带有明显的本地特色  “荷兰东印度公司”在巴达维亚（今天的雅加达）建立了“贸易和行政管理总部” 在殖民统治时期，荷兰人将注意力集中在雅加达和其他一些海滨城市，例如三宝垄和泗水 荷兰殖民者还通过一些归顺的本土势力，间接对这个多山的岛屿进行统治，例如爪哇岛中部的马打兰王国  19世纪，荷兰政府从荷兰东印度公司手上接管了东印度群岛，1830年荷兰统治者开始实行所谓“耕种制”（荷兰语cultuurstelsel en cultuurprocenten）的变相奴役制度，导致了大范围的饥荒和贫困 随即发生了各种政治和社会反抗运动，其中一位名叫Multatuli的荷兰作家写了一本名叫《Max Havelaar》的小说，以抗议当时的社会状况 迫于各种反抗运动此起彼伏，1901年荷兰国会通过伦理政策（Etnisch beleid），客观上使一部分爪哇人接触到荷兰式教育，在这些人中，出现了很多杰出的印尼民族主义者，并且在二战后的印尼独立运动中起到了重要作用'}

qa_downstream_process(
    r["entity"],
    r["question"],
    r["context"],
    loaded_model
)

{'entity': '1901年',
 'question': '荷兰国会何时通过伦理政策?',
 'context': '伊斯兰教被接受的同时，其教义也被融入了当地人长久以来的一些信仰，所以爪哇岛的伊斯兰教带有明显的本地特色  “荷兰东印度公司”在巴达维亚（今天的雅加达）建立了“贸易和行政管理总部” 在殖民统治时期，荷兰人将注意力集中在雅加达和其他一些海滨城市，例如三宝垄和泗水 荷兰殖民者还通过一些归顺的本土势力，间接对这个多山的岛屿进行统治，例如爪哇岛中部的马打兰王国  19世纪，荷兰政府从荷兰东印度公司手上接管了东印度群岛，1830年荷兰统治者开始实行所谓“耕种制”（荷兰语cultuurstelsel en cultuurprocenten）的变相奴役制度，导致了大范围的饥荒和贫困 随即发生了各种政治和社会反抗运动，其中一位名叫Multatuli的荷兰作家写了一本名叫《Max Havelaar》的小说，以抗议当时的社会状况 迫于各种反抗运动此起彼伏，1901年荷兰国会通过伦理政策（Etnisch beleid），客观上使一部分爪哇人接触到荷兰式教育，在这些人中，出现了很多杰出的印尼民族主义者，并且在二战后的印尼独立运动中起到了重要作用',
 'head': '何时',
 'statement': '荷兰国会1901年通过伦理政策',
 'top_chip': '1901年荷兰国会通过伦理政策（Etnisch'}
'''
#def qa_downstream_process(entity, question, context, loaded_model = loaded_model):
def qa_downstream_process(entity, question, context):
    if entity not in context:
        return None
    d = {
        "entity": entity,
        "question": question,
        "context": context
    }
    #head_qst = one_item_process(d, loaded_model)
    head_qst = one_item_process_by_request(d)
    head, _ = head_qst
    d["head"] = head
    statement, top_chip = to_statement(d)
    d["statement"] = statement
    d["top_chip"] = top_chip
    return d

'''
@csrf_exempt
def qa_downstream_process_part(request):
    assert request.method == "POST"
    post_data = request.POST
    entity = post_data["entity"]
    question = post_data["question"]
    context = post_data["context"]
    output = qa_downstream_process(entity, question, context)
    if output is None:
        return HttpResponse(json.dumps(
            {"output": "No Answer"}
        ))
    assert type(output) == type({})
    req_str = json.dumps(output)
    return HttpResponse(
        req_str
    )
'''

if __name__ == "__main__":
    pass