Spaces:
Running
Running
from bs4 import BeautifulSoup | |
from document_qa.grobid_processors import get_children_body | |
def test_get_children_paragraphs(): | |
with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo: | |
soup = BeautifulSoup(fo, 'xml') | |
children = get_children_body(soup, use_paragraphs=True) | |
assert len(children) == 70 | |
def test_get_children_sentences(): | |
with open("resources/2312.07559.sentences.tei.xml", 'r') as fo: | |
soup = BeautifulSoup(fo, 'xml') | |
children = get_children_body(soup, use_paragraphs=False) | |
assert len(children) == 327 | |