File size: 622 Bytes
5285b72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/usr/bin/env python

import sys
import pandas as pd

df = pd.read_csv(sys.argv[1])

df = df[df["ID"].notna()]
assert isinstance(df, pd.DataFrame), "Narrowing down the type of df"

df["id"] = df["ID"].apply(lambda x: f"id:govsearch:qa::{x}")
df["fields"] = df.apply(
    lambda row: {
        "doc_id": row["ID"],
        "category_major": row["ε€§εˆ†ι‘ž"],
        "category_medium": row["δΈ­εˆ†ι‘ž"],
        "category_minor": row["ε°εˆ†ι‘ž"],
        "question": row["問い"],
        "answer": row["ε›žη­”"],
    },
    axis=1,
)
print(df[["id", "fields"]].to_json(orient="records", force_ascii=False, lines=True))