lfoppiano commited on
Commit
93bc8ec
1 Parent(s): d918754
tests/resources/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import os
2
+
3
+ TEST_DATA_PATH = os.path.dirname(__file__)
tests/test_grobid_processors.py CHANGED
@@ -1,9 +1,12 @@
 
 
1
  from bs4 import BeautifulSoup
2
  from document_qa.grobid_processors import get_xml_nodes_body, get_xml_nodes_figures, get_xml_nodes_header
 
3
 
4
 
5
  def test_get_xml_nodes_body_paragraphs():
6
- with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
7
  soup = BeautifulSoup(fo, 'xml')
8
 
9
  nodes = get_xml_nodes_body(soup, use_paragraphs=True)
@@ -12,7 +15,7 @@ def test_get_xml_nodes_body_paragraphs():
12
 
13
 
14
  def test_get_xml_nodes_body_sentences():
15
- with open("resources/2312.07559.sentences.tei.xml", 'r') as fo:
16
  soup = BeautifulSoup(fo, 'xml')
17
 
18
  children = get_xml_nodes_body(soup, use_paragraphs=False)
@@ -21,7 +24,7 @@ def test_get_xml_nodes_body_sentences():
21
 
22
 
23
  def test_get_xml_nodes_figures():
24
- with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
25
  soup = BeautifulSoup(fo, 'xml')
26
 
27
  children = get_xml_nodes_figures(soup)
@@ -30,15 +33,16 @@ def test_get_xml_nodes_figures():
30
 
31
 
32
  def test_get_xml_nodes_header_paragraphs():
33
- with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
34
  soup = BeautifulSoup(fo, 'xml')
35
 
36
  children = get_xml_nodes_header(soup)
37
 
38
  assert len(children) == 8
39
 
 
40
  def test_get_xml_nodes_header_sentences():
41
- with open("resources/2312.07559.sentences.tei.xml", 'r') as fo:
42
  soup = BeautifulSoup(fo, 'xml')
43
 
44
  children = get_xml_nodes_header(soup, use_paragraphs=False)
 
1
+ import os
2
+
3
  from bs4 import BeautifulSoup
4
  from document_qa.grobid_processors import get_xml_nodes_body, get_xml_nodes_figures, get_xml_nodes_header
5
+ from tests.resources import TEST_DATA_PATH
6
 
7
 
8
  def test_get_xml_nodes_body_paragraphs():
9
+ with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
10
  soup = BeautifulSoup(fo, 'xml')
11
 
12
  nodes = get_xml_nodes_body(soup, use_paragraphs=True)
 
15
 
16
 
17
  def test_get_xml_nodes_body_sentences():
18
+ with open(os.path.join(TEST_DATA_PATH, "2312.07559.sentences.tei.xml"), 'r') as fo:
19
  soup = BeautifulSoup(fo, 'xml')
20
 
21
  children = get_xml_nodes_body(soup, use_paragraphs=False)
 
24
 
25
 
26
  def test_get_xml_nodes_figures():
27
+ with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
28
  soup = BeautifulSoup(fo, 'xml')
29
 
30
  children = get_xml_nodes_figures(soup)
 
33
 
34
 
35
  def test_get_xml_nodes_header_paragraphs():
36
+ with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
37
  soup = BeautifulSoup(fo, 'xml')
38
 
39
  children = get_xml_nodes_header(soup)
40
 
41
  assert len(children) == 8
42
 
43
+
44
  def test_get_xml_nodes_header_sentences():
45
+ with open(os.path.join(TEST_DATA_PATH, "2312.07559.sentences.tei.xml"), 'r') as fo:
46
  soup = BeautifulSoup(fo, 'xml')
47
 
48
  children = get_xml_nodes_header(soup, use_paragraphs=False)