nikolasmoya
commited on
Commit
•
3e6cb23
1
Parent(s):
df3589a
Update README.md
Browse files
README.md
CHANGED
@@ -11,8 +11,35 @@ model-index:
|
|
11 |
results: []
|
12 |
---
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# c4-binary-english-grammar-checker
|
18 |
|
|
|
11 |
results: []
|
12 |
---
|
13 |
|
14 |
+
# Usage instructions:
|
15 |
+
|
16 |
+
The recommendation is to split the text into sentences and evaluate sentence by sentence, you can do that using spacy:
|
17 |
+
```
|
18 |
+
import spacy
|
19 |
+
|
20 |
+
def clean_up_sentence(text: str) -> str:
|
21 |
+
text = text.replace("---", "")
|
22 |
+
text = text.replace("\n", " ")
|
23 |
+
text = text.strip()
|
24 |
+
if not text.endswith(('.', '!', '?', ":")):
|
25 |
+
# Since we are breaking a longer text into sentences ourselves, we should always end a sentence with a period.
|
26 |
+
text = text + "."
|
27 |
+
return text
|
28 |
+
|
29 |
+
sentence_splitter = spacy.load("en_core_web_sm")
|
30 |
+
spacy_document = sentence_splitter("This is a long text. It has two or more sentence. Spacy will break it down into sentences.")
|
31 |
+
results = []
|
32 |
+
for sentence in spacy_document.sents:
|
33 |
+
clean_text = clean_up_sentence(str(sentence))
|
34 |
+
classification = grammar_checker(clean_text)[0]
|
35 |
+
results.append({
|
36 |
+
"label": classification['label'],
|
37 |
+
"score": classification['score'],
|
38 |
+
"sentence": clean_text
|
39 |
+
})
|
40 |
+
pd.DataFrame.from_dict(results)
|
41 |
+
```
|
42 |
+
|
43 |
|
44 |
# c4-binary-english-grammar-checker
|
45 |
|