|
import jsonlines |
|
|
|
|
|
input_file = "data/thirdStep_file.jsonl" |
|
|
|
|
|
output_file = "data/train4465" |
|
|
|
|
|
threshold = 0.5 |
|
|
|
|
|
options = [ |
|
{"id": "CapitalRequirements", "text": "Capital Requirements", "meta": "0.00"}, |
|
{"id": "ConsumerProtection", "text": "Consumer Protection", "meta": "0.00"}, |
|
{"id": "RiskManagement", "text": "Risk Management", "meta": "0.00"}, |
|
{"id": "ReportingAndCompliance", "text": "Reporting And Compliance", "meta": "0.00"}, |
|
{"id": "CorporateGovernance", "text": "Corporate Governance", "meta": "0.00"} |
|
] |
|
|
|
|
|
def process_record(record): |
|
|
|
text = record["text"] |
|
predicted_labels = record["predicted_labels"] |
|
|
|
|
|
accepted_categories = [label for label, score in predicted_labels.items() if score > threshold] |
|
|
|
|
|
answer = "accept" if accepted_categories else "reject" |
|
|
|
|
|
options_with_meta = [ |
|
{"id": option["id"], "text": option["text"], "meta": option["meta"]} for option in options |
|
] |
|
|
|
|
|
output_record = { |
|
"text": text, |
|
"cats": predicted_labels, |
|
"accept": accepted_categories, |
|
"answer": answer, |
|
"options": options_with_meta |
|
} |
|
|
|
return output_record |
|
|
|
|
|
with jsonlines.open(input_file, "r") as infile, jsonlines.open(output_file, "w") as outfile: |
|
for record in infile: |
|
output_record = process_record(record) |
|
outfile.write(output_record) |
|
|