Spaces:
Sleeping
Sleeping
Chananchida
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -29,6 +29,7 @@ DATA_PATH='data/dataset.xlsx'
|
|
29 |
def load_data(path=DATA_PATH):
|
30 |
df = pd.read_excel(path, sheet_name='Default')
|
31 |
df['Context'] = pd.read_excel(path, sheet_name='mdeberta')['Context']
|
|
|
32 |
print('Load data done')
|
33 |
return df
|
34 |
|
@@ -177,6 +178,10 @@ def predict_test(model, tokenizer, embedding_model, df, question, index): # sen
|
|
177 |
return output
|
178 |
|
179 |
def highlight_text(text, start_index, end_index):
|
|
|
|
|
|
|
|
|
180 |
highlighted_text = ""
|
181 |
for i, char in enumerate(text):
|
182 |
if i == start_index:
|
@@ -196,10 +201,12 @@ def chat_interface_after(question, history):
|
|
196 |
return highlighted_answer
|
197 |
|
198 |
examples=[
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
203 |
]
|
204 |
demo_before = gr.ChatInterface(fn=chat_interface_before,
|
205 |
examples=examples)
|
@@ -211,8 +218,8 @@ interface = gr.TabbedInterface([demo_before, demo_after], ["Before", "After"])
|
|
211 |
|
212 |
if __name__ == "__main__":
|
213 |
# Load your model, tokenizer, data, and index here...
|
|
|
214 |
model, tokenizer = load_model('wangchanberta-hyp')
|
215 |
embedding_model = load_embedding_model()
|
216 |
-
df = load_data()
|
217 |
index = set_index(prepare_sentences_vector(load_embeddings(EMBEDDINGS_PATH)))
|
218 |
interface.launch()
|
|
|
29 |
def load_data(path=DATA_PATH):
|
30 |
df = pd.read_excel(path, sheet_name='Default')
|
31 |
df['Context'] = pd.read_excel(path, sheet_name='mdeberta')['Context']
|
32 |
+
print(len(df))
|
33 |
print('Load data done')
|
34 |
return df
|
35 |
|
|
|
178 |
return output
|
179 |
|
180 |
def highlight_text(text, start_index, end_index):
|
181 |
+
if start_index < 0:
|
182 |
+
start_index = 0
|
183 |
+
if end_index > len(text):
|
184 |
+
end_index = len(text)
|
185 |
highlighted_text = ""
|
186 |
for i, char in enumerate(text):
|
187 |
if i == start_index:
|
|
|
201 |
return highlighted_answer
|
202 |
|
203 |
examples=[
|
204 |
+
'อยากทราบความถี่ในการดึงข้อมูลของ DXT360 ในแต่ละแพลตฟอร์ม',
|
205 |
+
'อยากทราบความถี่ในการดึงข้อมูลของ DXT360 บน Twitter',
|
206 |
+
'ช่องทางติดตามข่าวสารของเรา',
|
207 |
+
'ขอช่องทางติดตามข่าวสารทาง Line หน่อย',
|
208 |
+
'ช่องทางติดตามข่าวสารของเรา',
|
209 |
+
'ขอช่องทางติดตามข่าวสารทาง Line หน่อย',
|
210 |
]
|
211 |
demo_before = gr.ChatInterface(fn=chat_interface_before,
|
212 |
examples=examples)
|
|
|
218 |
|
219 |
if __name__ == "__main__":
|
220 |
# Load your model, tokenizer, data, and index here...
|
221 |
+
df = load_data()
|
222 |
model, tokenizer = load_model('wangchanberta-hyp')
|
223 |
embedding_model = load_embedding_model()
|
|
|
224 |
index = set_index(prepare_sentences_vector(load_embeddings(EMBEDDINGS_PATH)))
|
225 |
interface.launch()
|