Spaces:
Runtime error
Runtime error
Update Sejarah.py
Browse files- Sejarah.py +14 -25
Sejarah.py
CHANGED
@@ -4,7 +4,7 @@ from haystack import Pipeline
|
|
4 |
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
|
5 |
from haystack.document_stores import InMemoryDocumentStore
|
6 |
from haystack.utils import print_answers
|
7 |
-
from
|
8 |
|
9 |
class Sejarah:
|
10 |
def __init__(self):
|
@@ -40,32 +40,26 @@ class Sejarah:
|
|
40 |
self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
|
41 |
self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
42 |
|
43 |
-
#Malay to English Model
|
44 |
-
self.id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
|
45 |
-
self.id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en")
|
46 |
-
|
47 |
-
#English to Malay Model
|
48 |
-
self.en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id")
|
49 |
-
self.en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id")
|
50 |
-
|
51 |
|
52 |
def language_converter(self, content, lang, method):
|
53 |
|
54 |
-
content = content.lower()
|
55 |
-
|
56 |
if lang == "en":
|
57 |
if method == "question":
|
58 |
-
|
59 |
-
translation = self.en_id_model.generate(**tokenized_text)
|
60 |
-
content = self.en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
|
61 |
|
|
|
|
|
62 |
else:
|
63 |
-
|
64 |
-
translation = self.id_en_model.generate(**tokenized_text)
|
65 |
-
content = self.id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
|
66 |
|
67 |
-
return
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def interface(self, question):
|
70 |
language = self.detect_language(question)
|
71 |
|
@@ -82,9 +76,4 @@ class Sejarah:
|
|
82 |
answer = self.language_converter(result['answers'][0].answer, language, "answer")
|
83 |
context = self.language_converter(result['answers'][0].context, language, "answer")
|
84 |
|
85 |
-
return answer, context
|
86 |
-
|
87 |
-
|
88 |
-
def detect_language(self, content):
|
89 |
-
lang = langid.classify(content)
|
90 |
-
return lang[0]
|
|
|
4 |
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader
|
5 |
from haystack.document_stores import InMemoryDocumentStore
|
6 |
from haystack.utils import print_answers
|
7 |
+
from deep_translator import GoogleTranslator
|
8 |
|
9 |
class Sejarah:
|
10 |
def __init__(self):
|
|
|
40 |
self.querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
|
41 |
self.querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def language_converter(self, content, lang, method):
|
45 |
|
|
|
|
|
46 |
if lang == "en":
|
47 |
if method == "question":
|
48 |
+
new_content = GoogleTranslator(source='en', target='ms').translate(content)
|
|
|
|
|
49 |
|
50 |
+
if "when" in content:
|
51 |
+
new_content = new_content.replace("apabila","bila")
|
52 |
else:
|
53 |
+
new_content = GoogleTranslator(source='ms', target='en').translate(content)
|
|
|
|
|
54 |
|
55 |
+
return new_content
|
56 |
+
|
57 |
+
|
58 |
+
def detect_language(self, content):
|
59 |
+
lang = langid.classify(content)
|
60 |
+
return lang[0]
|
61 |
+
|
62 |
+
|
63 |
def interface(self, question):
|
64 |
language = self.detect_language(question)
|
65 |
|
|
|
76 |
answer = self.language_converter(result['answers'][0].answer, language, "answer")
|
77 |
context = self.language_converter(result['answers'][0].context, language, "answer")
|
78 |
|
79 |
+
return answer, context
|
|
|
|
|
|
|
|
|
|