Spaces:
Sleeping
Sleeping
Update utils/metadata.py
Browse files- utils/metadata.py +31 -16
utils/metadata.py
CHANGED
|
@@ -28,20 +28,35 @@ def extract_metadata(text):
|
|
| 28 |
words = text.split()
|
| 29 |
chunks = [" ".join(words[i:i + max_chunk_length]) for i in range(0, len(words), max_chunk_length)]
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
|
|
|
| 28 |
words = text.split()
|
| 29 |
chunks = [" ".join(words[i:i + max_chunk_length]) for i in range(0, len(words), max_chunk_length)]
|
| 30 |
|
| 31 |
+
metadata = {
|
| 32 |
+
"EFFECTIVE_DATE": [],
|
| 33 |
+
"PARTIES": [],
|
| 34 |
+
"GOVERNING_LAW": [], # derived from LOCATION
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# Use label mappings
|
| 38 |
+
label_mapping = {
|
| 39 |
+
"DATE": "EFFECTIVE_DATE",
|
| 40 |
+
"PERSON": "PARTIES",
|
| 41 |
+
"ORGANIZATION": "PARTIES",
|
| 42 |
+
"LOCATION": "GOVERNING_LAW"
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
for ent in ner_results:
|
| 46 |
+
label = ent["entity_group"]
|
| 47 |
+
word = ent["word"]
|
| 48 |
+
custom_label = label_mapping.get(label)
|
| 49 |
+
|
| 50 |
+
if custom_label and word not in metadata[custom_label]:
|
| 51 |
+
metadata[custom_label].append(word)
|
| 52 |
+
|
| 53 |
+
import re
|
| 54 |
+
|
| 55 |
+
def extract_governing_law(text):
|
| 56 |
+
match = re.search(r"(?i)governed by the laws of ([\w\s,]+)", text)
|
| 57 |
+
return match.group(1).strip() if match else None
|
| 58 |
+
|
| 59 |
+
def extract_venue(text):
|
| 60 |
+
match = re.search(r"(?i)submitted to.*?in ([\w\s,]+)", text)
|
| 61 |
+
return match.group(1).strip() if match else None
|
| 62 |
|