Keyven commited on
Commit
77de910
·
verified ·
1 Parent(s): 8527a08

Upload schemas/letter.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. schemas/letter.json +73 -0
schemas/letter.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "german-ocr-3/schemas/letter.json",
4
+ "title": "GermanOCR3 Letter (Brief)",
5
+ "description": "Schema fuer deutsche Geschaefts- oder Behoerdenbriefe (DIN-5008-naher Aufbau).",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": ["document_type", "language", "subject", "body"],
9
+ "properties": {
10
+ "document_type": {"const": "letter"},
11
+ "language": {"type": "string", "default": "de"},
12
+
13
+ "sender": {
14
+ "type": ["object", "null"],
15
+ "additionalProperties": false,
16
+ "properties": {
17
+ "name": {"type": ["string", "null"]},
18
+ "address": {"type": ["string", "null"]},
19
+ "email": {"type": ["string", "null"]},
20
+ "phone": {"type": ["string", "null"]}
21
+ }
22
+ },
23
+ "recipient": {
24
+ "type": ["object", "null"],
25
+ "additionalProperties": false,
26
+ "properties": {
27
+ "name": {"type": ["string", "null"]},
28
+ "address": {"type": ["string", "null"]}
29
+ }
30
+ },
31
+
32
+ "place": {"type": ["string", "null"]},
33
+ "date": {"description": "Briefdatum YYYY-MM-DD", "type": ["string", "null"]},
34
+ "reference": {
35
+ "description": "Aktenzeichen / Ihr Zeichen / Unser Zeichen.",
36
+ "type": ["object", "null"],
37
+ "additionalProperties": false,
38
+ "properties": {
39
+ "ihr_zeichen": {"type": ["string", "null"]},
40
+ "ihre_nachricht_vom": {"type": ["string", "null"]},
41
+ "unser_zeichen": {"type": ["string", "null"]},
42
+ "unsere_nachricht_vom": {"type": ["string", "null"]}
43
+ }
44
+ },
45
+
46
+ "subject": {"type": ["string", "null"]},
47
+ "salutation": {"type": ["string", "null"]},
48
+ "body": {
49
+ "description": "Kompletter Brieftext, Absaetze mit \\n\\n getrennt.",
50
+ "type": ["string", "null"]
51
+ },
52
+ "closing": {"type": ["string", "null"]},
53
+ "signatory": {"type": ["string", "null"]},
54
+ "enclosures": {"type": "array", "items": {"type": "string"}, "default": []},
55
+
56
+ "deadlines_mentioned": {
57
+ "type": "array",
58
+ "default": [],
59
+ "items": {
60
+ "type": "object",
61
+ "additionalProperties": false,
62
+ "properties": {
63
+ "date": {"type": ["string", "null"]},
64
+ "context": {"type": ["string", "null"]}
65
+ }
66
+ }
67
+ },
68
+
69
+ "raw_text": {"type": ["string", "null"]},
70
+ "confidence": {"type": ["number", "null"], "minimum": 0, "maximum": 1},
71
+ "notes": {"type": "array", "items": {"type": "string"}, "default": []}
72
+ }
73
+ }