document-qa / tests /test_grobid_processors.py
lfoppiano's picture
use paragraphs instead of sentences
5fd26bb
raw
history blame
570 Bytes
from bs4 import BeautifulSoup
from document_qa.grobid_processors import get_children_body
def test_get_children_paragraphs():
with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
children = get_children_body(soup, use_paragraphs=True)
assert len(children) == 70
def test_get_children_sentences():
with open("resources/2312.07559.sentences.tei.xml", 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
children = get_children_body(soup, use_paragraphs=False)
assert len(children) == 327