File size: 5,777 Bytes
2b5ef14
 
 
 
 
ddb7963
2b5ef14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb7963
2b5ef14
 
 
ddb7963
2b5ef14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768dac1
2b5ef14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1047a9
2b5ef14
235b1de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from haystack.document_stores.memory import InMemoryDocumentStore
from haystack.nodes import TfidfRetriever, FARMReader

import pickle

pickle_file = 'knowledge_graph.pickle'

# Load the knowledge graph from the pickle file
with open(pickle_file, 'rb') as f:
  knowledge_graph = pickle.load(f)

document_store = InMemoryDocumentStore()
node_sentences = {}
documents = []
nodes = [node for node in knowledge_graph.nodes() if node is not None]

for node in nodes:
  # Get all the edges related to the current node
  related_edges = [edge for edge in knowledge_graph.edges() if edge[0] == node or edge[1] == node]

  # Get the parents and grandparents of the current node
  parents = [edge[0] for edge in related_edges if edge[1] == node]
  grandparents = []
  for parent in parents:
    grandparents.extend([edge[0] for edge in related_edges if edge[1] == parent])

  # Get the children and grandchildren of the current node
  children = [edge[1] for edge in related_edges if edge[0] == node]
  grandchildren = []
  for child in children:
    grandchildren.extend([edge[1] for edge in related_edges if edge[0] == child])

  # Create the sentence by combining all the related nodes
  sentence_parts = grandparents + parents + [node] + children + grandchildren
  sentence = ' '.join(sentence_parts)

  # Store the sentence for the current node
  node_sentences[node] = sentence

  # Create the document with the node and the sentence as the content
  documents.append({'text': node, 'content': sentence})
document_store.write_documents(documents)

#Initialize the retriever
retriever = TfidfRetriever(document_store=document_store)

#Initialize the reader
model_name = "primasr/multilingualbert-for-eqa-finetuned"
reader = FARMReader(model_name_or_path=model_name, use_gpu=False)

#Create pipeline with the component of retriever and reader
from haystack.pipelines import Pipeline
pipeline = Pipeline()
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Targeted to Translate English queries to Malay Language
# Initialize the tokenizer
en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id")
# Initialize the model
en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id")

# Targeted to Translate Malay Answer to English Language
# Initialize the tokenizer
id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
# Initialize the model
id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en")

#Defined some pairs for chatbot response
pairs = [
    [
        "your answer is wrong",
        "Sorry for providing wrong answer, here is the newest answer:\n\n",
        "I am sorry that I can't actually answer your question =("
    ],
    [
        "jawapan anda adalah salah",
        "Maaf sedangkan memberi jawapan yang salah. Berikut adalah jawapan yang baru:\n\n",
        "Minta Maaf, saya tidak boleh menemukan soalan anda =("
    ]]

#Function for checking reiterate providing answer for same question
def checkReiterateQuery(query,lang):
  if query in [pairs[0][0],pairs[1][0]]:
    if lang == 'en':
      j = 0
    else:
      j = 1
    return True, j

  else:
    return False , 3

import gradio as gr
from langdetect import detect
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

chat_history = []
answer_counter = 0

def chatbot_interface(message):
  global answer_counter
  global result

  # Append the current message to the chat history
  chat_history.append(message)
  lang = detect(message)
  reiterate, j = checkReiterateQuery(message, lang)

  #If user want to re-iterate the answer for same question
  if reiterate:
    answer_counter = answer_counter + 1
    if answer_counter < 5:
      retrieved_main_answer = pairs[j][1] + result['answers'][answer_counter].answer
      retrieved_main_context = result['answers'][answer_counter].context
    else:
      retrieved_main_answer = pairs[j][2]
      retrieved_main_context = ""
  else:
    answer_counter = 0
    #if language is english then convert it to malay language
    if lang == "en":
      tokenized_text = en_id_tokenizer.prepare_seq2seq_batch([message], return_tensors='pt')
      translation = en_id_model.generate(**tokenized_text)
      message = en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]

    result = pipeline.run(query=message.lower(), params={
            "Retriever": {"top_k": 5},
            "Reader": {"top_k": 5}})
    retrieved_main_answer = result['answers'][answer_counter].answer
    retrieved_main_context = result['answers'][answer_counter].context

  response = retrieved_main_answer + ", " + retrieved_main_context

  #Convert the response to english if user ask question in english
  if lang == "en":
    tokenized_text = id_en_tokenizer.prepare_seq2seq_batch([response.lower()], return_tensors='pt')
    translation = id_en_model.generate(**tokenized_text)
    response = id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]

  # Append the response to the chat history
  chat_history.append(response)

  # Join the chat history with newline characters
  chat_history_text = "\n\n".join(chat_history)

  return response, chat_history_text

# Create a Gradio interface
iface = gr.Interface(
    fn=chatbot_interface,
    inputs=gr.inputs.Textbox(label="Please Type Your Question Here: "),
    outputs=[gr.outputs.Textbox(label="Answers"), gr.outputs.Textbox(label="Chat History")],
    description="## Question Answering system\n\nIt supports **English** and **Bahasa Malaysia**.",
    allow_flagging = False
)

#Demo for the chatbot
iface.launch(inline = False)