Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
from parse import parse_text
|
3 |
from nltk import Tree
|
4 |
import pandas as pd
|
5 |
import re
|
|
|
1 |
import streamlit as st
|
2 |
+
# from parse import parse_text
|
3 |
from nltk import Tree
|
4 |
import pandas as pd
|
5 |
import re
|
parse.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import sys
|
3 |
+
import benepar
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
|
6 |
+
model_path = "ParserModels/ENHG/new-convbert-german-europeana0_dev=83.03.pt"
|
7 |
+
hf_hub_download(repo_id=model_path, filename='german-delex-parser_dev=83.10.pt')
|
8 |
+
parser = benepar.Parser(model_path)
|
9 |
+
|
10 |
+
def parse(words):
|
11 |
+
words = [word.replace('(','-LRB-').replace(')','-RRB-') for word in words]
|
12 |
+
input_sentence = benepar.InputSentence(words=words)
|
13 |
+
tree = parser.parse(input_sentence)
|
14 |
+
tree = str(tree).replace('-LRB-','\\(').replace('-RRB-','\\)').replace('-LSB-','\\[').replace('-RSB-','\\]').replace('($(-','($\\(-')
|
15 |
+
# put the whole parse tree on a single line
|
16 |
+
tree = re.sub(r'\s+', ' ', tree.strip())
|
17 |
+
tree = re.sub(r' \(', '(', tree)
|
18 |
+
return tree
|
19 |
+
|
20 |
+
|
21 |
+
with open(sys.argv[1]) as file:
|
22 |
+
for line in file:
|
23 |
+
line = re.sub(r'(\S)([.,;:?!)"])', r'\1 \2', line.strip())
|
24 |
+
line = re.sub(r'(["(])(\S)', r'\1 \2', line)
|
25 |
+
words = line.split()
|
26 |
+
tree = parse(words)
|
27 |
+
print(tree)
|