KevinHuSh
commited on
Commit
·
e7e8c6b
1
Parent(s):
e31db28
continue add layout model for 'laws' (#292)
Browse files### What problem does this PR solve?
Issue link:#289
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- rag/app/laws.py +5 -2
rag/app/laws.py
CHANGED
@@ -25,8 +25,7 @@ from rag.settings import cron_logger
|
|
25 |
|
26 |
class Docx(DocxParser):
|
27 |
def __init__(self):
|
28 |
-
|
29 |
-
super().__init__()
|
30 |
|
31 |
def __clean(self, line):
|
32 |
line = re.sub(r"\u3000", " ", line).strip()
|
@@ -52,6 +51,10 @@ class Docx(DocxParser):
|
|
52 |
|
53 |
|
54 |
class Pdf(PdfParser):
|
|
|
|
|
|
|
|
|
55 |
def __call__(self, filename, binary=None, from_page=0,
|
56 |
to_page=100000, zoomin=3, callback=None):
|
57 |
callback(msg="OCR is running...")
|
|
|
25 |
|
26 |
class Docx(DocxParser):
|
27 |
def __init__(self):
|
28 |
+
pass
|
|
|
29 |
|
30 |
def __clean(self, line):
|
31 |
line = re.sub(r"\u3000", " ", line).strip()
|
|
|
51 |
|
52 |
|
53 |
class Pdf(PdfParser):
|
54 |
+
def __init__(self):
|
55 |
+
self.model_speciess = ParserType.LAWS.value
|
56 |
+
super().__init__()
|
57 |
+
|
58 |
def __call__(self, filename, binary=None, from_page=0,
|
59 |
to_page=100000, zoomin=3, callback=None):
|
60 |
callback(msg="OCR is running...")
|