[{"answer": true, "depth": 0, "all_proofs": ["sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> int2: the bear is blue; sent8 & int2 -> hypothesis;", "sent6 & sent8 -> hypothesis;", "sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> int2: the bear is blue; sent13 & int2 -> hypothesis;", "sent1 -> hypothesis;", "sent4 & sent12 -> int1: the bear is blue; sent8 & int1 -> hypothesis;", "sent5 & sent7 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> int3: the bear is blue; sent13 & int3 -> hypothesis;", "sent4 & sent12 -> int1: the bear is blue; sent13 & int1 -> hypothesis;", "sent5 & sent7 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> int3: the bear is blue; sent8 & int3 -> hypothesis;", "sent6 & sent13 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.9999997615814209, "hypothesis": "the bear is nice", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": "sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> int2: the bear is blue; sent8 & int2 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> int2: the bear is blue; sent8 & int2 -> hypothesis;", "sent6 & sent8 -> hypothesis;", "sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> int2: the bear is blue; sent13 & int2 -> hypothesis;", "sent1 -> hypothesis;", "sent4 & sent12 -> int1: the bear is blue; sent8 & int1 -> hypothesis;", "sent5 & sent7 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> int3: the bear is blue; sent13 & int3 -> hypothesis;", "sent4 & sent12 -> int1: the bear is blue; sent13 & int1 -> hypothesis;", "sent5 & sent7 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> int3: the bear is blue; sent8 & int3 -> hypothesis;", "sent6 & sent13 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.999943196773529, "hypothesis": "i don't think the bear is nice", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": "sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> int2: the bear is blue; sent8 & int2 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 & sent11 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> hypothesis;", "sent6 -> hypothesis;", "sent5 & sent7 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> hypothesis;", "sent4 & sent12 -> hypothesis;", "sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> hypothesis;"], "proof_pred": "sent4 & sent12 -> hypothesis", "score": 0.9340632557868958, "hypothesis": "the bear is not blue", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": "sent1 & sent11 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent1 & sent11 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> hypothesis;", "sent6 -> hypothesis;", "sent5 & sent7 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> hypothesis;", "sent4 & sent12 -> hypothesis;", "sent2 & sent7 & sent10 -> int1: the bear is big; sent4 & int1 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 0.9999997615814209, "hypothesis": "i don't think the bear is not blue", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": "sent1 & sent11 -> int1: the bear is rough; sent2 & sent7 & int1 -> int2: the bear is big; sent4 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent13 -> int1: the bear is nice; int1 & sent1 & sent9 -> int2: the bear is rough; int2 & sent10 & sent2 -> int3: the bear is big; int3 & sent12 & sent4 -> int4: the bear has blue; sent1 & int4 & sent9 -> int5: the bear gets rough; int2 & sent2 & sent7 -> int6: the bear is big; int6 & sent4 -> int7: the bear is blue; sent1 & int7 & sent9 -> int8: the bear is tough; int8 & sent2 & sent7 -> int9: the bear is big; sent9 & int3 & int4 -> int10: the bear is blue; sent1 & int10 & sent9 -> int11: the bear is rough; int11 & sent2 & sent7 -> int12: the bear is big; sent12 & int3 & sent4 -> int13: the bear is blue; int13 & sent8 -> int14: the bear is nice; int6 & int14 & sent9 -> int15: the bear is rough; int15 & sent2 & sent7 -> int16: the bear is big; sent12 & int16 & sent4 -> int17: the bear is blue; int11 & sent2 & sent7 -> int18: the bear is big; int11 & sent2 & sent7 -> int19: the bear is big; int11 & sent2 & sent7 -> int20: the bear is big; int11 & sent2 & sent7 -> int21: the bear is big; int11 & sent2 & sent7 -> int22: the bear is big; int11 & sent2 & sent7 -> int23: the bear is big; int11 & sent2 & sent7 -> int24: the bear is big; int11 & sent2 & sent7 -> int25: the bear is big; int11 & sent2 & sent7 -> int26: the bear is big; int11 & sent2 & sent7 -> int27: the bear is big; int11 & sent2 & sent7 -> int28: the bear is big; int11 & sent2 & sent7 -> int29: the bear is big; int11 & sent2 & sent7 -> int30: the bear is big", "score": 0.5264129042625427, "hypothesis": "the bear does not visit the bear", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent13 -> int1: the bear is nice; int1 & sent1 & sent9 -> int2: the bear is rough; int2 & sent10 & sent2 -> int3: the bear is big; int3 & sent12 & sent4 -> int4: the bear has blue; int2 & sent7 & sent2 -> int5: the bear is large; int3 & sent4 -> int6: the bear is blue; int6 & sent9 & sent1 -> int7: the bear is rough; sent10 & int2 & sent7 -> int8: the bear is big; sent8 & int4 -> int9: the bear is nice; sent9 & int6 & int9 -> int10: the bear is rough; int10 & sent2 & sent7 -> int11: the bear is big; int3 & int10 & sent2 -> int12: the bear is big; sent12 & int3 & sent4 -> int13: the bear is blue; int13 & sent8 -> int14: the bear is nice; sent9 & int6 & int14 -> int15: the bear is rough; int15 & sent2 & sent7 -> int16: the bear is big; sent12 & int16 & sent4 -> int17: the bear is blue; int15 & sent2 & sent7 -> int18: the bear is big; int15 & sent2 & sent7 -> int19: the bear is big; int15 & sent2 & sent7 -> int20: the bear is big; int15 & sent2 & sent7 -> int21: the bear is big; int15 & sent2 & sent7 -> int22: the bear is big; int15 & sent2 & sent7 -> int23: the bear is big; int15 & sent2 & sent7 -> int24: the bear is big; int15 & sent2 & sent7 -> int25: the bear is big; int15 & sent2 & sent7 -> int26: the bear is big; int15 & sent2 & sent7 -> int27: the bear is big; int15 & sent2 & sent7 -> int28: the bear is big; int15 & sent2 & sent7 -> int29: the bear is big; int15 & sent2 & sent7 -> int30: the bear is big", "score": 0.5139535665512085, "hypothesis": "i don't think the bear does not visit the bear", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent13 -> int1: the bear is nice; int1 & sent1 & sent9 -> int2: the bear is rough; int2 & sent10 & sent2 -> int3: the bear is huge; int3 & sent12 & sent4 -> int4: the bear has blue; sent1 & int4 & sent9 -> int5: the bear gets rough; int2 & sent2 & sent7 -> int6: the bear is big; int6 & sent4 -> int7: the bear is blue; int3 & sent4 -> int8: the bear is blue; int8 & sent8 -> int9: the bear is nice; sent9 & int9 & int7 -> int10: the bear is rough; int10 & sent2 & sent7 -> int11: the bear is big; int3 & int10 & sent2 -> int12: the bear is big; sent12 & int3 & sent4 -> int13: the bear is blue; int13 & sent8 -> int14: the bear is nice; int11 & sent9 & int13 -> int15: the bear is rough; int15 & sent2 & sent7 -> int16: the bear is big; sent12 & int16 & sent4 -> int17: the bear is blue; sent8 & int17 -> int18: the bear is nice; int5 & sent7 & sent2 -> int19: the bear is large; sent8 & int17 -> int20: the bear is nice; sent8 & int17 -> int21: the bear is nice; sent8 & int17 -> int22: the bear is nice; sent8 & int17 -> int23: the bear is nice; sent8 & int17 -> int24: the bear is nice; sent8 & int17 -> int25: the bear is nice; sent8 & int17 -> int26: the bear is nice; sent8 & int17 -> int27: the bear is nice; sent8 & int17 -> int28: the bear is nice; sent8 & int17 -> int29: the bear is nice; sent8 & int17 -> int30: the bear is nice", "score": 0.5268954038619995, "hypothesis": "the bear likes the bear", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent13 -> int1: the bear is nice; int1 & sent1 & sent9 -> int2: the bear is rough; int2 & sent10 & sent2 -> int3: the bear is huge; int3 & sent12 & sent4 -> int4: the bear has blue; sent1 & int4 & sent9 -> int5: the bear is rough; int5 & sent2 & sent7 -> int6: the bear is big; int6 & sent4 -> int7: the bear is blue; sent1 & int7 & sent9 -> int8: the bear gets rough; int8 & sent2 & sent7 -> int9: the bear is big; int3 & int8 & sent9 -> int10: the bear is rough; int10 & sent2 & sent7 -> int11: the bear is big; int3 & int10 & sent2 -> int12: the bear is large; sent12 & int3 & sent4 -> int13: the bear is blue; sent8 & int7 -> int14: the bear is nice; int3 & int10 & sent9 -> int15: the bear is rough; int15 & sent2 & sent7 -> int16: the bear is big; sent12 & int16 & sent4 -> int17: the bear is blue; sent12 & int16 & sent4 -> int18: the bear is blue; sent12 & int16 & sent4 -> int19: the bear is blue; sent12 & int16 & sent4 -> int20: the bear is blue; sent12 & int16 & sent4 -> int21: the bear is blue; sent12 & int16 & sent4 -> int22: the bear is blue; sent12 & int16 & sent4 -> int23: the bear is blue; sent12 & int16 & sent4 -> int24: the bear is blue; sent12 & int16 & sent4 -> int25: the bear is blue; sent12 & int16 & sent4 -> int26: the bear is blue; sent12 & int16 & sent4 -> int27: the bear is blue; sent12 & int16 & sent4 -> int28: the bear is blue; sent12 & int16 & sent4 -> int29: the bear is blue; sent12 & int16 & sent4 -> int30: the bear is blue", "score": 0.5342604517936707, "hypothesis": "i don't think the bear likes the bear", "context": {"sent1": "the bear is nice", "sent2": "if something is rough and kind then it is big", "sent3": "if something is blue then it is big", "sent4": "if the bear is big then the bear is blue", "sent5": "all kind things are rough", "sent6": "the bear is blue", "sent7": "the bear is kind", "sent8": "if something is blue then it is nice", "sent9": "if the bear is blue and the bear is nice then the bear is rough", "sent10": "the bear is rough", "sent11": "nice things are rough", "sent12": "the bear is big", "sent13": "blue things are nice"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent7 -> hypothesis;"], "proof_pred": "sent7 -> hypothesis", "score": 1.0, "hypothesis": "the cat is blue", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent7 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent7 -> hypothesis;"], "proof_pred": "sent7 -> hypothesis", "score": 0.9999995231628418, "hypothesis": "i don't think the cat is blue", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent7 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> int2: the cat needs the bear; sent4 & int2 -> int3: the cat is rough; int3 & sent1 -> int4: the cat eats the bare bear; sent3 & int4 -> int5: the cat needs the bear; int5 & sent4 -> int6: the cat is rough; sent1 & sent7 -> int7: the cat eats the paw; int3 & sent7 -> int8: the cat needs the blue; int8 & sent7 -> hypothesis", "score": 0.508971095085144, "hypothesis": "the cat is not blue", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent7 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent7 -> hypothesis;"], "proof_pred": "sent7 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the cat is not blue", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent7 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9476958513259888, "hypothesis": "the cat eats the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9450912475585938, "hypothesis": "i don't think the cat eats the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9447037577629089, "hypothesis": "the cat does not eat the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9476009011268616, "hypothesis": "i don't think the cat does not eat the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> hypothesis", "score": 0.9460552334785461, "hypothesis": "the cat needs the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> hypothesis", "score": 0.9450982809066772, "hypothesis": "i don't think the cat needs the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> hypothesis", "score": 0.9445153474807739, "hypothesis": "the cat does not need the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> hypothesis", "score": 0.9456530213356018, "hypothesis": "i don't think the cat does not need the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> int2: the cat needs the bear; int2 & sent4 -> hypothesis", "score": 0.9646607041358948, "hypothesis": "the cat is rough", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> int2: the cat needs the bear; int2 & sent4 -> hypothesis", "score": 0.9641427993774414, "hypothesis": "i don't think the cat is rough", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> int2: the cat needs the bear; int2 & sent4 -> hypothesis", "score": 0.962144136428833, "hypothesis": "the cat is not rough", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; int1 & sent3 -> int2: the cat needs the bear; int2 & sent4 -> hypothesis", "score": 0.9646057486534119, "hypothesis": "i don't think the cat is not rough", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5666424036026001, "hypothesis": "the cat does not chase the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5359980463981628, "hypothesis": "i don't think the cat does not chase the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> int3: the cat is rough; sent8 & int3 & sent6 -> int4: the cat eats the cat; sent6 & int4 & sent8 -> int5: the cat eats the cat; int5 & sent7 & sent1 -> hypothesis", "score": 0.741299569606781, "hypothesis": "the bear eats the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> int3: the cat is rough; sent2 & sent6 -> int4: the bear is big; int4 & sent2 & sent6 -> int5: the cat is big; sent5 & int4 & int3 -> int6: the cat is big; sent1 & int4 -> int7: the bear eats the bear; int7 & sent7 & sent3 -> hypothesis", "score": 0.7387800216674805, "hypothesis": "i don't think the bear eats the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> int3: the cat is rough; sent2 & sent6 & sent7 -> int4: the cat is big; int4 & sent4 & sent3 -> hypothesis", "score": 0.7220373749732971, "hypothesis": "the cat is not big", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent7 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; sent4 & int2 -> int3: the cat is rough; sent2 & sent6 & sent7 -> hypothesis", "score": 0.6471459269523621, "hypothesis": "i don't think the cat is not big", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6619374752044678, "hypothesis": "the bear chases the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6135185360908508, "hypothesis": "i don't think the bear chases the bear", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5513575673103333, "hypothesis": "the cat does not need the cat", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; int2 & sent4 -> int3: the cat is rough; sent1 & sent7 -> int4: the cat eats the bore; sent3 & int4 -> int5: the cat needs the be; sent4 & int5 -> int6: the cat is rough; int6 & sent3 & int2 -> int7: the cat needs the cat; int7 & sent8 & int3 -> int8: the cat eats the cat; int7 -> hypothesis", "score": 0.48578163981437683, "hypothesis": "i don't think the cat does not need the cat", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; int2 & sent4 -> int3: the cat is rough; sent2 & sent6 & sent2 -> hypothesis", "score": 0.6869413256645203, "hypothesis": "the cat is red", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; int2 & sent4 -> int3: the cat is rough; sent2 & sent6 & sent2 -> int4: the cat is big; sent4 & int2 -> int5: the cat is rough; int3 & sent3 & int2 -> int6: the cat needs the dog; sent6 & sent8 & int5 -> int7: the cat eats the cat; sent7 & sent1 -> int8: the cat eats the bear; sent8 & sent3 & int8 -> int9: the cat needs the cat; sent8 & int9 & int3 -> int10: the cat eats the cat; int7 & sent3 -> int11: the cat needs the bear; sent4 & int11 -> int12: the cat is rough; int12 & sent8 & int9 -> int13: the cat throws the cat; int10 & sent7 & sent1 -> int14: the cat needs the bear; int10 & sent7 & sent1 -> int15: the cat needs the bull; int15 & sent4 -> int16: the cat is rough; int10 & sent7 -> int17: the cat eats the pig; sent4 & int15 -> int18: the cat is rough; sent4 & int15 -> int19: the cat is rough; sent4 & int15 -> int20: the cat is rough; sent4 & int15 -> int21: the cat is rough; sent4 & int15 -> int22: the cat is rough; sent4 & int15 -> int23: the cat is rough; sent4 & int15 -> int24: the cat is rough; sent4 & int15 -> int25: the cat is rough; sent4 & int15 -> int26: the cat is rough; sent4 & int15 -> int27: the cat is rough; sent4 & int15 -> int28: the cat is rough; sent4 & int15 -> int29: the cat is rough; sent4 & int15 -> int30: the cat is rough", "score": 0.5618399381637573, "hypothesis": "i don't think the cat is red", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6864730715751648, "hypothesis": "the bear is not big", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6766189336776733, "hypothesis": "i don't think the bear is not big", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6107038855552673, "hypothesis": "the bear needs the cat", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent1 -> int1: the cat eats the bear; sent3 & int1 -> int2: the cat needs the bear; int2 & sent4 -> int3: the cat is rough; sent6 & sent2 & sent6 -> int4: the bear is big; sent6 & sent2 & int3 -> int5: the cat is big; int5 & sent4 -> int6: the cat is rough; sent6 & sent2 & int3 -> int7: the cat is big; int5 & sent7 & sent1 -> int8: the cat eats the dog; int7 & sent8 & int3 -> int9: the cat eats the cat; int4 & sent7 & sent1 -> int10: the bear eats the bear; sent7 & sent1 -> int11: the cat eats the bear; sent3 & int11 -> int12: the cat needs the bear; sent4 & int12 -> int13: the cat is rough; int7 & sent6 & int13 -> int14: the cat eats the cat; sent7 & sent1 -> int15: the cat eats the bear; sent3 & int15 -> int16: the cat is rough; int7 & sent6 & int14 -> int17: the cat is big; sent7 & sent1 -> int18: the cat eats the bear; sent7 & sent1 -> int19: the cat eats the bear; sent7 & sent1 -> int20: the cat eats the bear; sent7 & sent1 -> int21: the cat eats the bear; sent7 & sent1 -> int22: the cat eats the bear; sent7 & sent1 -> int23: the cat eats the bear; sent7 & sent1 -> int24: the cat eats the bear; sent7 & sent1 -> int25: the cat eats the bear; sent7 & sent1 -> int26: the cat eats the bear; sent7 & sent1 -> int27: the cat eats the bear; sent7 & sent1 -> int28: the cat eats the bear; sent7 & sent1 -> int29: the cat eats the bear; sent7 & sent1 -> int30: the cat eats the bear", "score": 0.5502888560295105, "hypothesis": "i don't think the bear needs the cat", "context": {"sent1": "if something is blue then it eats the bear", "sent2": "if something eats the cat and the cat is red then the cat is big", "sent3": "if something eats the bear then it needs the bear", "sent4": "if the cat needs the bear then the cat is rough", "sent5": "if something is red and nice then it chases the bear", "sent6": "the bear eats the cat", "sent7": "the cat is blue", "sent8": "if something needs the cat and the cat is rough then it eats the cat"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 1.0, "hypothesis": "the squirrel is big", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 0.9995312094688416, "hypothesis": "i don't think the squirrel is big", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; int1 & sent3 -> int2: the squirrel is rough; int1 & sent6 & sent9 -> int3: the squirrel is red; sent6 & sent1 -> int4: the squirrel is green; int4 & sent2 -> hypothesis", "score": 0.903052031993866, "hypothesis": "the squirrel is not big", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the squirrel is not big", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9452821016311646, "hypothesis": "the squirrel is green", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9407786130905151, "hypothesis": "i don't think the squirrel is green", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9401549696922302, "hypothesis": "the squirrel is not green", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9453647136688232, "hypothesis": "i don't think the squirrel is not green", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; int1 & sent3 -> hypothesis", "score": 0.9476173520088196, "hypothesis": "the squirrel is rough", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; int1 & sent3 -> hypothesis", "score": 0.9475207328796387, "hypothesis": "i don't think the squirrel is rough", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; int1 & sent3 -> hypothesis", "score": 0.9460755586624146, "hypothesis": "the squirrel is not rough", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; int1 & sent3 -> hypothesis", "score": 0.9474599361419678, "hypothesis": "i don't think the squirrel is not rough", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent6 -> int1: the squirrel is green; int1 & sent3 -> int2: the squirrel is rough; int2 & sent9 & sent6 -> hypothesis", "score": 0.9160535335540771, "hypothesis": "the squirrel is red", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent6 -> int1: the squirrel is green; int1 & sent3 -> int2: the squirrel is rough; int2 & sent9 & sent6 -> hypothesis", "score": 0.915026068687439, "hypothesis": "i don't think the squirrel is red", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent6 -> int1: the squirrel is green; int1 & sent3 -> int2: the squirrel is rough; int2 & sent9 & sent6 -> hypothesis", "score": 0.9154073596000671, "hypothesis": "the squirrel is not red", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent6 -> int1: the squirrel is green; int1 & sent3 -> int2: the squirrel is rough; int2 & sent9 & sent6 -> hypothesis", "score": 0.9148644208908081, "hypothesis": "i don't think the squirrel is not red", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent9 & sent6 & int2 -> int3: the squirrel is red; sent6 & sent7 & int3 -> int4: the squirrel is rough; sent6 & sent1 -> int5: the squirrel is green; int5 & sent2 -> int6: the squirrel is big; sent6 & sent1 -> int7: the squirrel is green; int7 & sent7 & sent2 -> int8: the squirrel is big; sent6 & int8 & sent2 -> int9: the squirrel is green; int9 & sent9 & sent6 -> int10: the squirrel is red; sent6 & int8 & sent7 -> int11: the squirrel is rough; int4 & sent7 & int6 -> int12: the squirrel is rough; int4 & sent7 & int6 -> int13: the squirrel is rough; int13 & sent7 & int6 -> int14: the squirrel is rough; int4 & sent7 & int6 -> int15: the squirrel is rough; int15 & sent7 & int6 -> int16: the squirrel is rough; int16 & sent7 & int6 -> int17: the squirrel is rough; int4 & sent7 & int6 -> int18: the squirrel is rough; int4 & int18 & sent7 -> int19: the squirrel is rough; sent7 & int6 & int10 -> int20: the squirrel gets rough; sent7 & int6 & int10 -> int21: the squirrel is rough; sent7 & int6 & int10 -> int22: the squirrel is rough; sent7 & int6 & int10 -> int23: the squirrel is rough; sent7 & int6 & int10 -> int24: the squirrel is rough; sent7 & int6 & int10 -> int25: the squirrel is rough; sent7 & int6 & int10 -> int26: the squirrel is rough; sent7 & int6 & int10 -> int27: the squirrel is rough; sent7 & int6 & int10 -> int28: the squirrel is rough; sent7 & int6 & int10 -> int29: the squirrel is rough; sent7 & int6 & int10 -> int30: the squirrel is rough", "score": 0.5622782707214355, "hypothesis": "the squirrel does not like the squirrel", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5812829732894897, "hypothesis": "i don't think the squirrel does not like the squirrel", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> int3: the squirrel is red; sent6 & sent7 & int3 -> int4: the squirrel is rough; sent6 & sent1 -> int5: the squirrel is green; int5 & sent3 -> int6: the squirrel is rough; int4 & sent6 & sent9 -> int7: the dog is red; int4 & sent7 & sent6 -> int8: the dog is rough; int6 & sent8 & int8 -> int9: the dog is green; sent7 & int4 & int7 -> int10: the dog is rough; int6 & sent8 & int10 -> int11: the dog is green; int4 & sent9 & sent6 -> int12: the squirrel is red; sent6 & sent1 -> int13: the squirrel is green; int13 & sent5 & int4 -> hypothesis", "score": 0.5908453464508057, "hypothesis": "the squirrel needs the squirrel", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent9 & sent6 & int2 -> int3: the squirrel is red; sent6 & sent7 & int3 -> int4: the squirrel is rough; sent6 & sent1 -> int5: the squirrel is green; int5 & sent2 -> int6: the squirrel is big; sent6 & sent1 -> int7: the squirrel is green; int7 & sent7 & sent2 -> int8: the squirrel is big; int8 & sent6 & sent9 -> int9: the squirrel is red; int8 & sent7 & int9 -> int10: the squirrel is rough; int10 & sent4 & int8 -> hypothesis", "score": 0.8032342195510864, "hypothesis": "i don't think the squirrel needs the squirrel", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent9 & sent6 & int2 -> int3: the squirrel is red; sent6 & sent7 & int3 -> int4: the squirrel is rough; sent7 & sent6 & int4 -> int5: the squirrel is tough; int5 & sent4 & int2 -> hypothesis", "score": 0.5767718553543091, "hypothesis": "the squirrel is not young", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.868960440158844, "hypothesis": "i don't think the squirrel is not young", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent1 -> int1: the squirrel is green; sent3 & int1 -> int2: the squirrel is rough; sent6 & sent9 & int2 -> int3: the squirrel is red; sent6 & sent7 & int3 -> int4: the squirrel is rough; sent6 & sent1 -> int5: the squirrel is green; int5 & sent3 -> int6: the squirrel is rough; int4 & sent6 & sent9 -> int7: the dog is red; int4 & sent7 & sent6 -> int8: the dog is rough; int6 & sent8 & int8 -> int9: the dog is green; sent7 & int4 & int7 -> int10: the dog is rough; int6 & sent8 & int10 -> int11: the dog is green; int4 & sent9 & sent6 -> int12: the squirrel is red; sent6 & sent1 -> int13: the squirrel is green; int13 & sent5 & int4 -> hypothesis", "score": 0.5864291787147522, "hypothesis": "the squirrel sees the squirrel", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8992800116539001, "hypothesis": "i don't think the squirrel sees the squirrel", "context": {"sent1": "if someone is big then they are green", "sent2": "if the squirrel is green then the squirrel is big", "sent3": "all green people are rough", "sent4": "all rough, young people are not green", "sent5": "all rough, young people are big", "sent6": "the squirrel is big", "sent7": "red, big people are rough", "sent8": "young, rough people are green", "sent9": "big, rough people are red"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 1.0, "hypothesis": "bob is big", "context": {"sent1": "bob is smart", "sent2": "bob is furry", "sent3": "charlie is furry", "sent4": "bob is rough", "sent5": "bob is cold", "sent6": "charlie is nice", "sent7": "charlie is smart", "sent8": "bob is big", "sent9": "charlie is blue", "sent10": "charlie is cold", "sent11": "if charlie is smart then charlie is blue", "sent12": "charlie is big", "sent13": "bob is blue", "sent14": "bob is nice", "sent15": "charlie is rough"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is big", "context": {"sent1": "bob is smart", "sent2": "bob is furry", "sent3": "charlie is furry", "sent4": "bob is rough", "sent5": "bob is cold", "sent6": "charlie is nice", "sent7": "charlie is smart", "sent8": "bob is big", "sent9": "charlie is blue", "sent10": "charlie is cold", "sent11": "if charlie is smart then charlie is blue", "sent12": "charlie is big", "sent13": "bob is blue", "sent14": "bob is nice", "sent15": "charlie is rough"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 0.9999971389770508, "hypothesis": "bob is not furry", "context": {"sent1": "bob is smart", "sent2": "bob is furry", "sent3": "charlie is furry", "sent4": "bob is rough", "sent5": "bob is cold", "sent6": "charlie is nice", "sent7": "charlie is smart", "sent8": "bob is big", "sent9": "charlie is blue", "sent10": "charlie is cold", "sent11": "if charlie is smart then charlie is blue", "sent12": "charlie is big", "sent13": "bob is blue", "sent14": "bob is nice", "sent15": "charlie is rough"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is not furry", "context": {"sent1": "bob is smart", "sent2": "bob is furry", "sent3": "charlie is furry", "sent4": "bob is rough", "sent5": "bob is cold", "sent6": "charlie is nice", "sent7": "charlie is smart", "sent8": "bob is big", "sent9": "charlie is blue", "sent10": "charlie is cold", "sent11": "if charlie is smart then charlie is blue", "sent12": "charlie is big", "sent13": "bob is blue", "sent14": "bob is nice", "sent15": "charlie is rough"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "bob is cold", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is cold", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.9999815821647644, "hypothesis": "bob is not cold", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is not cold", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent3 & sent7 -> hypothesis;"], "proof_pred": "sent3 & sent7 -> hypothesis", "score": 0.9427414536476135, "hypothesis": "bob is white", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent3 & sent7 -> hypothesis;"], "proof_pred": "sent3 & sent7 -> hypothesis", "score": 0.9431099891662598, "hypothesis": "i don't think bob is white", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent3 & sent7 -> hypothesis;"], "proof_pred": "sent3 & sent7 -> hypothesis", "score": 0.9442393183708191, "hypothesis": "bob is not white", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent3 & sent7 -> hypothesis;"], "proof_pred": "sent3 & sent7 -> hypothesis", "score": 0.9425300359725952, "hypothesis": "i don't think bob is not white", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent3 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis", "score": 0.9008024334907532, "hypothesis": "bob is rough", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent3 -> int1: bob is white; int1 & sent3 & sent6 -> hypothesis", "score": 0.9009766578674316, "hypothesis": "i don't think bob is rough", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent3 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis", "score": 0.9018036127090454, "hypothesis": "bob is not rough", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent3 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis", "score": 0.9000042080879211, "hypothesis": "i don't think bob is not rough", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent1 & int2 -> hypothesis;"], "proof_pred": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; int2 & sent1 -> hypothesis", "score": 0.9247043132781982, "hypothesis": "bob is blue", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent1 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent1 & int2 -> hypothesis;"], "proof_pred": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; int2 & sent1 -> hypothesis", "score": 0.9247042536735535, "hypothesis": "i don't think bob is blue", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent1 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent5 & int2 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent3 -> int1: bob is white; sent6 & sent3 & int1 -> int2: bob is rough; sent1 & int2 -> int3: bob is blue; sent4 & sent3 -> int4: bob is quiet; sent7 & sent3 -> int5: bob is white; sent5 & int5 & sent5 -> int6: bob is smart; sent5 & int6 & int5 -> hypothesis", "score": 0.6449743509292603, "hypothesis": "bob is not smart", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent5 & int2 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent5 & int2 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent3 -> int1: bob is white; sent6 & sent3 & int1 -> int2: bob is rough; sent4 & sent3 -> int3: bob is quiet; sent1 & int2 -> int4: bob is blue; sent5 & int4 & sent1 -> int5: bob is smart; int5 -> hypothesis", "score": 0.6386809945106506, "hypothesis": "i don't think bob is not smart", "context": {"sent1": "rough things are blue", "sent2": "if bob is not furry then bob is not rough", "sent3": "bob is cold", "sent4": "smart, cold things are quiet", "sent5": "if bob is white and bob is rough then bob is smart", "sent6": "white, cold things are rough", "sent7": "all cold things are white"}, "proof_gt": "sent3 & sent7 -> int1: bob is white; sent3 & sent6 & int1 -> int2: bob is rough; sent5 & int2 & int1 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "the cat does not visit the tiger", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.9912168383598328, "hypothesis": "i don't think the cat does not visit the tiger", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent4 -> hypothesis;"], "proof_pred": "sent4 -> hypothesis", "score": 0.9998822808265686, "hypothesis": "the tiger does not like the cat", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent4 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent4 -> hypothesis;"], "proof_pred": "sent4 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the tiger does not like the cat", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent4 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent4 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent4 -> hypothesis", "score": 0.9383725523948669, "hypothesis": "the tiger likes the tiger", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent4 & sent6 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent4 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent4 -> hypothesis", "score": 0.9394544363021851, "hypothesis": "i don't think the tiger likes the tiger", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent4 & sent6 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent4 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent4 -> hypothesis", "score": 0.9382796883583069, "hypothesis": "the tiger does not like the tiger", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent4 & sent6 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent4 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent4 -> hypothesis", "score": 0.9386314749717712, "hypothesis": "i don't think the tiger does not like the tiger", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": "sent4 & sent6 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5361893773078918, "hypothesis": "the cat is not nice", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5414397120475769, "hypothesis": "i don't think the cat is not nice", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent6 -> int1: the tiger likes the tiger; sent4 & sent6 -> int2: the tiger likes the tiger; int1 & int2 & sent1 -> hypothesis", "score": 0.7925961017608643, "hypothesis": "the tiger is nice", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent6 -> int1: the tiger likes the tiger; int1 & sent1 -> int2: the tiger is nice; int2 & sent1 -> hypothesis", "score": 0.8243119120597839, "hypothesis": "i don't think the tiger is nice", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent6 -> int1: the tiger likes the tiger; int1 & sent1 -> int2: the tiger likes the bald eagle; sent1 & int2 -> int3: the tiger is nice; int3 & sent3 -> hypothesis", "score": 0.6815277934074402, "hypothesis": "the tiger does not visit the cat", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8146606087684631, "hypothesis": "i don't think the tiger does not visit the cat", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.9402433037757874, "hypothesis": "the cat is red", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.9416542053222656, "hypothesis": "i don't think the cat is red", "context": {"sent1": "if someone likes the bald eagle then they are nice", "sent2": "the bald eagle is young", "sent3": "the cat does not visit the tiger", "sent4": "the tiger likes the cat", "sent5": "if the tiger is not red and the tiger is not young then the tiger likes the bald eagle", "sent6": "if someone likes the cat then they like the tiger"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent2 & sent24 -> hypothesis;", "sent1 -> hypothesis;", "sent2 & sent15 -> int1: erin is big; sent5 & int1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.9999967813491821, "hypothesis": "erin is quiet", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": "sent2 & sent24 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 & sent24 -> hypothesis;", "sent1 -> hypothesis;", "sent2 & sent15 -> int1: erin is big; sent5 & int1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.9999808669090271, "hypothesis": "i don't think erin is quiet", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": "sent2 & sent24 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent6 & sent15 -> int1: fiona is big; sent5 & int1 -> int2: fiona is quiet; sent20 & int2 -> hypothesis;", "sent6 & sent18 -> hypothesis;", "sent13 & sent20 -> hypothesis;", "sent14 -> hypothesis;", "sent5 & sent8 -> int1: fiona is quiet; sent20 & int1 -> hypothesis;", "sent6 & sent24 -> int1: fiona is quiet; sent20 & int1 -> hypothesis;"], "proof_pred": "sent14 -> hypothesis", "score": 0.9992217421531677, "hypothesis": "fiona is not cold", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": "sent6 & sent15 -> int1: fiona is big; sent5 & int1 -> int2: fiona is quiet; sent20 & int2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent6 & sent15 -> int1: fiona is big; sent5 & int1 -> int2: fiona is quiet; sent20 & int2 -> hypothesis;", "sent6 & sent18 -> hypothesis;", "sent13 & sent20 -> hypothesis;", "sent14 -> hypothesis;", "sent5 & sent8 -> int1: fiona is quiet; sent20 & int1 -> hypothesis;", "sent6 & sent24 -> int1: fiona is quiet; sent20 & int1 -> hypothesis;"], "proof_pred": "sent14 -> hypothesis", "score": 0.9999998807907104, "hypothesis": "i don't think fiona is not cold", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": "sent6 & sent15 -> int1: fiona is big; sent5 & int1 -> int2: fiona is quiet; sent20 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent15 -> int1: bob is big; sent5 & int1 -> int2: bob is quiet; int2 & sent10 & sent10 -> hypothesis", "score": 0.756096363067627, "hypothesis": "bob is not big", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent15 -> int1: bob is big; int1 -> hypothesis", "score": 0.7594859004020691, "hypothesis": "i don't think bob is not big", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent24 -> int1: fiona is quiet; int1 & sent13 & sent22 -> int2: fiona is green; int2 & sent23 & sent14 -> hypothesis", "score": 0.9012075066566467, "hypothesis": "fiona is young", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent24 -> int1: fiona is quiet; int1 & sent13 & sent22 -> int2: fiona is green; int2 & sent23 & sent14 -> hypothesis", "score": 0.9006150960922241, "hypothesis": "i don't think fiona is young", "context": {"sent1": "erin is quiet", "sent2": "erin is round", "sent3": "erin is cold", "sent4": "all young things are nice", "sent5": "big things are quiet", "sent6": "fiona is round", "sent7": "fiona is nice", "sent8": "fiona is big", "sent9": "erin is young", "sent10": "bob is quiet", "sent11": "if bob is young and bob is nice then bob is quiet", "sent12": "bob is nice", "sent13": "fiona is quiet", "sent14": "fiona is cold", "sent15": "round things are big", "sent16": "bob is cold", "sent17": "bob is young", "sent18": "round things are cold", "sent19": "erin is green", "sent20": "if something is quiet then it is cold", "sent21": "erin is nice", "sent22": "quiet things are green", "sent23": "fiona is green", "sent24": "round things are quiet"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent4 -> hypothesis;"], "proof_pred": "sent4 -> hypothesis", "score": 1.0, "hypothesis": "the dog is not blue", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": "sent4 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent4 -> hypothesis;"], "proof_pred": "sent4 -> hypothesis", "score": 0.9998305439949036, "hypothesis": "i don't think the dog is not blue", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": "sent4 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 & sent3 & sent4 -> hypothesis;", "sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 0.92892986536026, "hypothesis": "the dog is not green", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": "sent2 & sent3 & sent4 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 & sent3 & sent4 -> hypothesis;", "sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 0.9999998807907104, "hypothesis": "i don't think the dog is not green", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": "sent2 & sent3 & sent4 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent12 & sent1 -> int1: the dog is nice; sent11 & int1 & sent10 -> int2: the dog is kind; sent11 & int2 & sent7 -> int3: the dog is not green; sent5 & int3 -> hypothesis", "score": 0.9198198914527893, "hypothesis": "the dog does not chase the dog", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent10 & sent4 -> int1: the dog is kind; int1 & sent11 & sent7 -> int2: the dog is not green; int1 & sent7 & sent11 -> int3: the dog is not green; int3 & int2 & int1 -> hypothesis", "score": 0.6929490566253662, "hypothesis": "i don't think the dog does not chase the dog", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent10 & sent4 -> int1: the dog is kind; int1 & sent11 & sent7 -> int2: the dog is not green; int1 & sent7 & sent11 -> int3: the dog is not green; int3 & int2 & int1 -> int4: the dog is not green; int4 & sent7 & sent11 -> int5: the dog is not green; int4 & int5 & int4 -> hypothesis", "score": 0.6817564368247986, "hypothesis": "the dog likes the dog", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent4 -> int1: the dog is not nice; int1 & sent9 & sent9 -> int2: the dog is kind; sent7 & sent11 & int2 -> int3: the dog is not green; sent5 & int3 -> hypothesis", "score": 0.7451279759407043, "hypothesis": "i don't think the dog likes the dog", "context": {"sent1": "cold people are nice", "sent2": "if the dog is not kind and the dog is not blue then the dog is green", "sent3": "the dog is not kind", "sent4": "the dog is not blue", "sent5": "the dog is green", "sent6": "blue people are not nice", "sent7": "if someone is kind and nice then they are not green", "sent8": "if someone is kind and not nice then they are green", "sent9": "if someone is blue and not nice then they are kind", "sent10": "if the dog is blue then the dog is kind", "sent11": "the dog is nice", "sent12": "the dog is cold", "sent13": "if someone is blue then they are kind"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "the dog is rough", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the dog is rough", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 0.9999991059303284, "hypothesis": "the dog is not rough", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the dog is not rough", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent4 & sent15 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> hypothesis", "score": 0.9392455220222473, "hypothesis": "the squirrel is kind", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent4 & sent15 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> hypothesis", "score": 0.9385755658149719, "hypothesis": "i don't think the squirrel is kind", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent4 & sent17 -> hypothesis;"], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent4 & sent15 -> int3: the bald eagle is kind; sent11 & int3 & sent14 -> int4: the bald eagle chases the squirrel; int4 & sent15 -> int5: the squirrel is kind; int2 & sent14 & sent11 -> int6: the squirrel chases the squirrel; sent15 & int2 -> int7: the squirrel is kind; int5 & sent14 & sent11 -> int8: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int9: the squirrel chases the squirrel; int4 & sent14 & sent11 -> int10: the dog chases the squirrel; int8 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.6470393538475037, "hypothesis": "the bald eagle chases the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent17 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent4 & sent17 -> hypothesis;"], "proof_pred": "sent17 & sent4 -> hypothesis", "score": 0.9558870792388916, "hypothesis": "i don't think the bald eagle chases the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent17 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> hypothesis", "score": 0.8996520042419434, "hypothesis": "the squirrel chases the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> hypothesis", "score": 0.8997341990470886, "hypothesis": "i don't think the squirrel chases the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> hypothesis", "score": 0.9004222750663757, "hypothesis": "the squirrel does not chase the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> hypothesis", "score": 0.8994693756103516, "hypothesis": "i don't think the squirrel does not chase the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis;"], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis", "score": 0.9164260029792786, "hypothesis": "the squirrel does not chase the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; int2 & sent17 -> hypothesis", "score": 0.8903469443321228, "hypothesis": "i don't think the squirrel does not chase the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & sent11 & sent16 -> int3: the rabbit chases the squirrel; sent15 & int3 -> int4: the squirrel is sort; int4 & sent11 & sent14 -> int5: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int6: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int7: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int8: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int9: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int10: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int11: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int12: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int13: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int14: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int15: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int16: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int17: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int18: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int19: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int20: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int21: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int22: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int23: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int24: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int25: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int26: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int27: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int28: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int29: the squirrel chases the squirrel; sent14 & sent11 & int4 -> int30: the squirrel chases the squirrel", "score": 0.45478254556655884, "hypothesis": "the squirrel chases the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis;"], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> int2: the squirrel chases the squirrel; int2 & sent17 -> hypothesis", "score": 0.9172479510307312, "hypothesis": "i don't think the squirrel chases the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & sent14 & int1 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> int2: the squirrel chases the squirrel; sent17 & int2 -> hypothesis", "score": 0.9175530076026917, "hypothesis": "the dog does not chase the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> hypothesis", "score": 0.8992259502410889, "hypothesis": "i don't think the dog does not chase the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> int2: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int3: the squirrel chases the squirrel; int2 & int3 & sent14 -> int4: the squirrel chases the dog; sent14 & sent11 & int1 -> int5: the rabbit chases the squirrel; sent15 & int5 -> int6: the squirrel is kind; int6 & sent14 & sent11 -> int7: the squirrel chases the squirrel; int2 & int1 & sent14 -> int8: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int9: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int10: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int11: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int12: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int13: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int14: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int15: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int16: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int17: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int18: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int19: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int20: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int21: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int22: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int23: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int24: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int25: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int26: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int27: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int28: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int29: the squirrel chases the squirrel; sent14 & sent11 & int6 -> int30: the squirrel chases the squirrel", "score": 0.6242622137069702, "hypothesis": "the squirrel is young", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & int2 & sent11 -> int3: the squirrel chases the dog; sent12 & sent10 & sent13 -> int4: the bald eagle needs the dog; int4 & sent5 & sent1 -> int5: the bald eagle needs the squirrel; int3 & sent10 & sent13 -> int6: the dog needs the dog; int6 & sent10 & sent13 -> int7: the bald eagle needs the dog; int4 & sent7 & sent10 -> int8: the dog needs the dog; int4 & sent7 & sent8 -> int9: the dog needs the dog; int4 & sent7 & sent8 -> int10: the dog needs the dog; int4 & sent7 & sent8 -> int11: the dog needs the dog; int4 & sent7 & sent8 -> int12: the dog needs the dog; int4 & sent7 & sent8 -> int13: the dog needs the dog; int4 & sent7 & sent8 -> int14: the dog needs the dog; int4 & sent7 & sent8 -> int15: the dog needs the dog; int4 & sent7 & sent8 -> int16: the dog needs the dog; int4 & sent7 & sent8 -> int17: the dog needs the dog; int4 & sent7 & sent8 -> int18: the dog needs the dog; int4 & sent7 & sent8 -> int19: the dog needs the dog; int4 & sent7 & sent8 -> int20: the dog needs the dog; int4 & sent7 & sent8 -> int21: the dog needs the dog; int4 & sent7 & sent8 -> int22: the dog needs the dog; int4 & sent7 & sent8 -> int23: the dog needs the dog; int4 & sent7 & sent8 -> int24: the dog needs the dog; int4 & sent7 & sent8 -> int25: the dog needs the dog; int4 & sent7 & sent8 -> int26: the dog needs the dog; int4 & sent7 & sent8 -> int27: the dog needs the dog; int4 & sent7 & sent8 -> int28: the dog needs the dog; int4 & sent7 & sent8 -> int29: the dog needs the dog; int4 & sent7 & sent8 -> int30: the dog needs the dog", "score": 0.7592358589172363, "hypothesis": "i don't think the squirrel is young", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & sent11 & sent11 -> int3: the squirrel chases the dog; int2 & sent15 -> int4: the squirrel is kind; int4 & sent14 & sent11 -> int5: the squirrel chases the squirrel; int5 & sent15 -> int6: the squirrel is kind; int4 & sent14 & sent11 -> int7: the squirrel chases the squirrel; int7 & sent7 & sent14 -> int8: the squirrel chases the squirrel; int8 & sent7 & sent14 -> int9: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int10: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int11: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int12: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int13: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int14: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int15: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int16: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int17: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int18: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int19: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int20: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int21: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int22: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int23: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int24: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int25: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int26: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int27: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int28: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int29: the squirrel chases the squirrel; int8 & sent7 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.5695101022720337, "hypothesis": "the dog does not eat the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & sent11 & sent11 -> int3: the squirrel chases the dog; int2 & sent15 -> int4: the squirrel is kind; int2 & sent14 & sent11 -> int5: the squirrel chases the squirrel; int5 & sent15 -> int6: the squirrel is kind; int4 & sent14 & sent11 -> int7: the rabbit chases the squirrel; int7 & sent15 -> int8: the squirrel is kind; int6 & sent8 & sent14 -> int9: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int10: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.5518507957458496, "hypothesis": "i don't think the dog does not eat the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8404573202133179, "hypothesis": "the bald eagle eats the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5681049823760986, "hypothesis": "i don't think the bald eagle eats the squirrel", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & sent11 & sent11 -> int3: the squirrel chases the dog; int2 & sent15 -> int4: the squirrel is kind; int4 & sent14 & sent11 -> int5: the squirrel chases the squirrel; int5 & sent15 -> int6: the squirrel is kind; int4 & sent14 & sent11 -> int7: the squirrel chases the squirrel; int7 & sent10 & sent11 -> int8: the squirrel needs the dog; int5 & sent14 & sent11 -> int9: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int10: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.5625211000442505, "hypothesis": "the dog is not green", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & sent11 & sent11 -> int3: the squirrel chases the dog; sent15 & int2 -> int4: the squirrel is kind; int4 & sent13 & sent10 -> int5: the dog needs the dog; int5 & sent14 & sent11 -> int6: the dog chases the squirrel; int6 & sent15 -> int7: the squirrel is kind; int7 & sent14 & sent11 -> int8: the squirrel chases the squirrel; int8 & sent14 & sent11 -> int9: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int10: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.5420441627502441, "hypothesis": "i don't think the dog is not green", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent15 & int2 -> int3: the squirrel is kind; sent11 & int3 & sent14 -> int4: the squirrel chases the squirrel; sent13 & sent10 & sent12 -> int5: the dog needs the dog; sent15 & int4 -> int6: the squirrel is kind; int4 & sent14 & sent11 -> int7: the squirrel chases the squirrel; int4 & sent15 -> int8: the squirrel is kind; int7 & sent14 & sent11 -> int9: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int10: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.5897936224937439, "hypothesis": "the bald eagle needs the bald eagle", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent5 & sent13 & sent10 -> int3: the dog needs the dog; sent14 & int2 -> int4: the squirrel chases the dog; int4 & sent15 -> int5: the squirrel is kind; int5 & sent11 & sent14 -> int6: the squirrel chases the squirrel; int6 & sent15 -> int7: the squirrel is kind; int7 & sent14 & sent11 -> int8: the squirrel chases the squirrel; int6 & sent15 -> int9: the squirrel is kind; sent15 & int6 -> int10: the squirrel is kind; sent15 & int6 -> int11: the squirrel is kind; sent15 & int6 -> int12: the squirrel is kind; sent15 & int6 -> int13: the squirrel is kind; sent15 & int6 -> int14: the squirrel is kind; sent15 & int6 -> int15: the squirrel is kind; sent15 & int6 -> int16: the squirrel is kind; sent15 & int6 -> int17: the squirrel is kind; sent15 & int6 -> int18: the squirrel is kind; sent15 & int6 -> int19: the squirrel is kind; sent15 & int6 -> int20: the squirrel is kind; sent15 & int6 -> int21: the squirrel is kind; sent15 & int6 -> int22: the squirrel is kind; sent15 & int6 -> int23: the squirrel is kind; sent15 & int6 -> int24: the squirrel is kind; sent15 & int6 -> int25: the squirrel is kind; sent15 & int6 -> int26: the squirrel is kind; sent15 & int6 -> int27: the squirrel is kind; sent15 & int6 -> int28: the squirrel is kind; sent15 & int6 -> int29: the squirrel is kind; sent15 & int6 -> int30: the squirrel is kind", "score": 0.5934209227561951, "hypothesis": "i don't think the bald eagle needs the bald eagle", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent11 & sent14 -> int2: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int3: the rabbit chases the squirrel; int2 & sent15 & int3 -> int4: the rabbit is kind; int1 & sent14 & sent11 -> int5: the squirrel chases the squirrel; int2 & sent15 -> int6: the squirrel is kind; int2 & sent15 -> int7: the squirrel is kind; int7 & int2 & sent15 -> int8: the squirrel is kind; int7 & int2 & sent15 -> int9: the squirrel is kind; int1 & sent14 & sent11 -> int10: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.5817369222640991, "hypothesis": "the dog does not eat the bald eagle", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; int1 & sent11 & sent14 -> int2: the squirrel chases the squirrel; sent15 & int2 -> int3: the squirrel is kind; int3 & sent14 & sent11 -> int4: the squirrel chases the squirrel; sent15 & int4 -> int5: the squirrel is kind; int3 & sent14 & sent11 -> int6: the squirrel chases the squirrel; sent15 & int4 -> int7: the squirrel is kind; int5 & sent14 & sent11 -> int8: the squirrel chases the squirrel; sent15 & int8 -> int9: the squirrel is kind; int1 & sent14 & sent11 -> int10: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int11: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int12: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int13: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int14: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int15: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int16: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int17: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int18: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int19: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int20: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int21: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int22: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int23: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int24: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int25: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int26: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int27: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int28: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int29: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int30: the squirrel chases the squirrel", "score": 0.9020063877105713, "hypothesis": "i don't think the dog does not eat the bald eagle", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent15 & sent4 -> int1: the squirrel is kind; sent11 & int1 & sent14 -> int2: the squirrel chases the squirrel; sent14 & sent11 & sent11 -> int3: the squirrel chases the dog; sent15 & int2 -> int4: the squirrel is kind; int4 & sent13 & sent14 -> int5: the dog chases the squirrel; int5 & sent14 & sent11 -> int6: the dog chases the squirrel; sent6 & int5 & sent15 -> int7: the dog is kind; int6 & sent14 & sent11 -> int8: the dog chases the squirrel; int6 & sent8 & sent14 -> int9: the dog chases the squirrel; int6 & sent8 & sent14 -> int10: the dog chases the squirrel; int6 & sent8 & sent14 -> int11: the dog chases the squirrel; int6 & sent8 & sent14 -> int12: the dog chases the squirrel; int6 & sent8 & sent14 -> int13: the dog chases the squirrel; int6 & sent8 & sent14 -> int14: the dog chases the squirrel; int6 & sent8 & sent14 -> int15: the dog chases the squirrel; int6 & sent8 & sent14 -> int16: the dog chases the squirrel; int6 & sent8 & sent14 -> int17: the dog chases the squirrel; int6 & sent8 & sent14 -> int18: the dog chases the squirrel; int6 & sent8 & sent14 -> int19: the dog chases the squirrel; int6 & sent8 & sent14 -> int20: the dog chases the squirrel; int6 & sent8 & sent14 -> int21: the dog chases the squirrel; int6 & sent8 & sent14 -> int22: the dog chases the squirrel; int6 & sent8 & sent14 -> int23: the dog chases the squirrel; int6 & sent8 & sent14 -> int24: the dog chases the squirrel; int6 & sent8 & sent14 -> int25: the dog chases the squirrel; int6 & sent8 & sent14 -> int26: the dog chases the squirrel; int6 & sent8 & sent14 -> int27: the dog chases the squirrel; int6 & sent8 & sent14 -> int28: the dog chases the squirrel; int6 & sent8 & sent14 -> int29: the dog chases the squirrel; int6 & sent8 & sent14 -> int30: the dog chases the squirrel", "score": 0.5719525218009949, "hypothesis": "the dog eats the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent15 -> int1: the squirrel is kind; int1 & sent14 & sent11 -> int2: the squirrel chases the squirrel; int1 & sent14 & sent11 -> int3: the squirrel chases the squirrel; int2 & sent14 & sent11 -> int4: the rabbit chases the squirrel; int3 & sent15 -> int5: the squirrel is kind; int2 & sent14 & sent11 -> int6: the squirrel chases the squirrel; int6 & sent14 & sent11 -> int7: the squirrel chases the squirrel; int7 & sent14 & sent11 -> int8: the squirrel chases the squirrel; int8 & sent7 & int2 -> int9: the squirrel chases the dog; int7 & int8 & sent14 -> int10: the squirrel chases the squirrel; int7 & int8 & sent14 -> int11: the squirrel chases the squirrel; int7 & int8 & sent14 -> int12: the squirrel chases the squirrel; int7 & int8 & sent14 -> int13: the squirrel chases the squirrel; int7 & int8 & sent14 -> int14: the squirrel chases the squirrel; int7 & int8 & sent14 -> int15: the squirrel chases the squirrel; int7 & int8 & sent14 -> int16: the squirrel chases the squirrel; int7 & int8 & sent14 -> int17: the squirrel chases the squirrel; int7 & int8 & sent14 -> int18: the squirrel chases the squirrel; int7 & int8 & sent14 -> int19: the squirrel chases the squirrel; int7 & int8 & sent14 -> int20: the squirrel chases the squirrel; int7 & int8 & sent14 -> int21: the squirrel chases the squirrel; int7 & int8 & sent14 -> int22: the squirrel chases the squirrel; int7 & int8 & sent14 -> int23: the squirrel chases the squirrel; int7 & int8 & sent14 -> int24: the squirrel chases the squirrel; int7 & int8 & sent14 -> int25: the squirrel chases the squirrel; int7 & int8 & sent14 -> int26: the squirrel chases the squirrel; int7 & int8 & sent14 -> int27: the squirrel chases the squirrel; int7 & int8 & sent14 -> int28: the squirrel chases the squirrel; int7 & int8 & sent14 -> int29: the squirrel chases the squirrel; int7 & int8 & sent14 -> int30: the squirrel chases the squirrel", "score": 0.5393838882446289, "hypothesis": "i don't think the dog eats the dog", "context": {"sent1": "if someone is green and they need the dog then they eat the squirrel", "sent2": "the squirrel needs the bald eagle", "sent3": "if someone needs the bald eagle and the bald eagle eats the squirrel then the squirrel is young", "sent4": "the bald eagle chases the squirrel", "sent5": "the bald eagle needs the dog", "sent6": "the dog is not red", "sent7": "the squirrel eats the bald eagle", "sent8": "the bald eagle is not green", "sent9": "the bald eagle does not need the squirrel", "sent10": "if the dog is not green and the dog does not need the bald eagle then the bald eagle needs the dog", "sent11": "the squirrel chases the bald eagle", "sent12": "the dog is rough", "sent13": "the dog does not need the bald eagle", "sent14": "if someone chases the bald eagle and they are kind then they chase the squirrel", "sent15": "if someone chases the squirrel then the squirrel is kind", "sent16": "the bald eagle is young", "sent17": "if someone chases the squirrel then they do not chase the dog", "sent18": "the bald eagle eats the dog"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent7 -> hypothesis;"], "proof_pred": "sent7 -> hypothesis", "score": 1.0, "hypothesis": "the bear is cold", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent7 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent7 -> hypothesis;"], "proof_pred": "sent7 -> hypothesis", "score": 0.9999995231628418, "hypothesis": "i don't think the bear is cold", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent7 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.999998927116394, "hypothesis": "the rabbit is not red", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the rabbit is not red", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent2 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> hypothesis", "score": 0.9391331076622009, "hypothesis": "the bear does not chase the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent2 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; sent7 & int1 & sent9 -> int2: the bear chases the bear; sent6 & int2 & sent4 -> int3: the bear does not eat the rabbit; sent2 & sent7 -> hypothesis", "score": 0.8898873925209045, "hypothesis": "i don't think the bear does not chase the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent2 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; sent7 & int1 & sent9 -> int2: the bear chases the bear; sent6 & int2 & sent4 -> int3: the bear does not eat the rabbit; sent2 & sent7 -> hypothesis", "score": 0.5679503083229065, "hypothesis": "the bear chases the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent2 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> hypothesis", "score": 0.9408095479011536, "hypothesis": "i don't think the bear chases the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> hypothesis", "score": 0.9016754031181335, "hypothesis": "the bear chases the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> hypothesis", "score": 0.9029557108879089, "hypothesis": "i don't think the bear chases the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> hypothesis", "score": 0.9023497104644775, "hypothesis": "the bear does not chase the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> hypothesis", "score": 0.9014875888824463, "hypothesis": "i don't think the bear does not chase the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis;"], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; int2 & sent4 & sent6 -> hypothesis", "score": 0.9117393493652344, "hypothesis": "the bear does not eat the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent4 & sent8 -> hypothesis", "score": 0.6301320195198059, "hypothesis": "i don't think the bear does not eat the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis;"], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; int2 & sent4 & sent6 -> hypothesis", "score": 0.9030993580818176, "hypothesis": "the bear eats the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis;"], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; int2 & sent4 & sent6 -> hypothesis", "score": 0.9099417328834534, "hypothesis": "i don't think the bear eats the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": "sent2 & sent7 -> int1: the bear does not chase the rabbit; sent7 & sent9 & int1 -> int2: the bear chases the bear; sent4 & sent6 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6001765727996826, "hypothesis": "the bear does not need the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6577219367027283, "hypothesis": "i don't think the bear does not need the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.728867769241333, "hypothesis": "the bear is rough", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent7 & sent9 -> int2: the bear chases the bear; int2 & sent6 & sent4 -> int3: the bear does not eat the rabbit; int3 & sent4 & int2 -> int4: the bear does not eat the rabbit; int3 & sent9 & sent7 -> int5: the bear chases the bear; sent4 & int5 & sent6 -> int6: the bear does not eat the rabbit; int4 & int6 & sent4 -> int7: the bear does not eat the rabbit; int7 & sent4 & int2 -> int8: the bear eats the rabbit; int8 & sent7 & sent2 -> hypothesis", "score": 0.6668267250061035, "hypothesis": "i don't think the bear is rough", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; sent6 & int2 & sent4 -> int3: the bear does not eat the rabbit; int3 & sent7 & sent9 -> int4: the bear chases the bear; int4 & sent4 & sent6 -> int5: the bear does not eat the rabbit; int3 & sent4 & int4 -> int6: the bear does not eat the rabbit; int3 & sent4 & int4 -> int7: the bear does not eat the rabbit; int3 & sent4 & int4 -> int8: the bear does not eat the rabbit; int8 & int3 & int4 -> int9: the bear does not eat it; int6 & sent4 & int4 -> int10: the bear do not eat the rabbit; int7 & sent9 & int1 -> int11: the bear chases the bear; int4 & sent4 & sent6 -> int12: the bear does not eat the rabbit; int12 & sent4 & int4 -> int13: the bear does not eat the rabbit; int13 & int3 & int4 -> int14: the bear does not eat the rabbit; int14 & sent4 & int11 -> int15: the bear does not eat the rabbit; int14 & sent4 & int4 -> int16: the bear does not eat the rabbit; int14 & sent4 & int4 -> int17: the bear does not eat the rabbit; int14 & sent4 & int4 -> int18: the bear does not eat the rabbit; int14 & sent4 & int4 -> int19: the bear does not eat the rabbit; int14 & sent4 & int4 -> int20: the bear does not eat the rabbit; int14 & sent4 & int4 -> int21: the bear does not eat the rabbit; int14 & sent4 & int4 -> int22: the bear does not eat the rabbit; int14 & sent4 & int4 -> int23: the bear does not eat the rabbit; int14 & sent4 & int4 -> int24: the bear does not eat the rabbit; int14 & sent4 & int4 -> int25: the bear does not eat the rabbit; int14 & sent4 & int4 -> int26: the bear does not eat the rabbit; int14 & sent4 & int4 -> int27: the bear does not eat the rabbit; int14 & sent4 & int4 -> int28: the bear does not eat the rabbit; int14 & sent4 & int4 -> int29: the bear does not eat the rabbit; int14 & sent4 & int4 -> int30: the bear does not eat the rabbit", "score": 0.5998649597167969, "hypothesis": "the rabbit is not blue", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent7 & sent9 -> int2: the bear chases the bear; sent6 & int2 & sent4 -> int3: the bear does not eat the rabbit; int3 & sent7 & sent9 -> int4: the bear chases the bear; int4 & sent4 & sent6 -> int5: the bear does not eat the rabbit; int3 & sent4 & int4 -> int6: the bear does not eat the rabbit; int3 & sent4 & int4 -> int7: the bear does not eat the rabbit; int3 & sent4 & int4 -> int8: the bear does not eat the rabbit; int8 & int3 & int4 -> int9: the bear does not eat it; int7 & sent9 & int1 -> int10: the bear chases the bear; int4 & sent4 & sent6 -> int11: the bear does not eat the rabbit; int3 & int4 & sent4 -> int12: the bear does not eat the rabbit; int12 & sent4 & int4 -> int13: the bear does not eat the rabbit; int13 & int3 & int4 -> int14: the bear does not eat the rabbit; int14 & sent4 & int10 -> int15: the bear does not eat the rabbit; int11 & sent4 & int4 -> int16: the bear does not eat the rabbit; int11 & sent4 & int4 -> int17: the bear does not eat the rabbit; int11 & sent4 & int4 -> int18: the bear does not eat the rabbit; int11 & sent4 & int4 -> int19: the bear does not eat the rabbit; int11 & sent4 & int4 -> int20: the bear does not eat the rabbit; int11 & sent4 & int4 -> int21: the bear does not eat the rabbit; int11 & sent4 & int4 -> int22: the bear does not eat the rabbit; int11 & sent4 & int4 -> int23: the bear does not eat the rabbit; int11 & sent4 & int4 -> int24: the bear does not eat the rabbit; int11 & sent4 & int4 -> int25: the bear does not eat the rabbit; int11 & sent4 & int4 -> int26: the bear does not eat the rabbit; int11 & sent4 & int4 -> int27: the bear does not eat the rabbit; int11 & sent4 & int4 -> int28: the bear does not eat the rabbit; int11 & sent4 & int4 -> int29: the bear does not eat the rabbit; int11 & sent4 & int4 -> int30: the bear does not eat the rabbit", "score": 0.5835314393043518, "hypothesis": "i don't think the rabbit is not blue", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; int2 & sent4 & sent6 -> int3: the bear does not eat the rabbit; int3 & sent4 & int2 -> int4: the bear does not eat the rabbit; int3 & int4 & sent4 -> int5: the bear does not eat it; int4 & sent9 & sent7 -> int6: the bear chases the bear; sent4 & int6 & sent6 -> int7: the bear does not eat the rabbit; int5 & sent4 & int2 -> int8: the bear does not eat the rabbit; int8 & sent4 & int6 -> int9: the bear does not eat the rabbit; int9 & sent4 & int6 -> int10: the bear does not eat the rabbit; int10 & sent9 & sent7 -> int11: the bear chases the bear; int6 & sent4 & sent6 -> int12: the bear does not eat the rabbit; int12 & sent4 & int6 -> int13: the bear does not eat the rabbit; int13 & sent4 & int6 -> int14: the bear does not eat the rabbit; int14 & sent4 & int6 -> int15: the bear does not eat the rabbit; int10 & sent4 & int6 -> int16: the bear does not eat the rabbit; int10 & sent4 & int6 -> int17: the bear does not eat the rabbit; int10 & sent4 & int6 -> int18: the bear does not eat the rabbit; int10 & sent4 & int6 -> int19: the bear does not eat the rabbit; int10 & sent4 & int6 -> int20: the bear does not eat the rabbit; int10 & sent4 & int6 -> int21: the bear does not eat the rabbit; int10 & sent4 & int6 -> int22: the bear does not eat the rabbit; int10 & sent4 & int6 -> int23: the bear does not eat the rabbit; int10 & sent4 & int6 -> int24: the bear does not eat the rabbit; int10 & sent4 & int6 -> int25: the bear does not eat the rabbit; int10 & sent4 & int6 -> int26: the bear does not eat the rabbit; int10 & sent4 & int6 -> int27: the bear does not eat the rabbit; int10 & sent4 & int6 -> int28: the bear does not eat the rabbit; int10 & sent4 & int6 -> int29: the bear does not eat the rabbit; int10 & sent4 & int6 -> int30: the bear does not eat the rabbit", "score": 0.538917064666748, "hypothesis": "the rabbit eats the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent7 & sent9 -> int2: the bear chases the bear; int2 & sent4 & sent6 -> hypothesis", "score": 0.9156160950660706, "hypothesis": "i don't think the rabbit eats the rabbit", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent7 & sent9 -> int2: the bear chases the bear; sent6 & int2 & sent4 -> int3: the bear does not eat the rabbit; int3 & sent7 & sent9 -> int4: the bear chases the bear; int4 & sent4 & sent6 -> int5: the bear does not eat the rabbit; int3 & sent4 & int4 -> int6: the bear does not eat the rabbit; int3 & sent4 & int4 -> int7: the bear does not eat the rabbit; int3 & sent4 & int4 -> int8: the bear does not eat the rabbit; int8 & int3 & int4 -> int9: the bear does not eat it; int7 & sent9 & int1 -> int10: the bear chases the bear; int4 & sent4 & sent6 -> int11: the bear does not eat the rabbit; int3 & int4 & sent4 -> int12: the bear does not eat the rabbit; int12 & sent4 & int4 -> int13: the bear does not eat the rabbit; int13 & int3 & int4 -> int14: the bear does not eat the rabbit; int14 & int3 & int4 -> int15: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int16: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int17: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int18: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int19: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int20: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int21: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int22: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int23: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int24: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int25: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int26: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int27: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int28: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int29: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int30: the bear does not eat the rabbit", "score": 0.5894984006881714, "hypothesis": "the rabbit is not cold", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent2 -> int1: the bear does not chase the rabbit; int1 & sent7 & sent9 -> int2: the bear chases the bear; sent6 & int2 & sent4 -> int3: the bear does not eat the rabbit; int3 & sent9 & sent7 -> int4: the bear chases the bear; int4 & sent4 & sent6 -> int5: the bear does not eat the rabbit; int3 & sent4 & int4 -> int6: the bear does not eat the rabbit; int3 & sent4 & int4 -> int7: the bear does not eat the rabbit; int3 & sent4 & int4 -> int8: the bear does not eat the rabbit; int8 & int3 & int4 -> int9: the bear does not eat it; int7 & sent9 & int1 -> int10: the bear chases the bear; int4 & sent4 & sent6 -> int11: the bear does not eat the rabbit; int3 & int4 & sent4 -> int12: the bear does not eat the rabbit; int11 & int3 & int4 -> int13: the bear does not eat the rabbit; int13 & int3 & int4 -> int14: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int15: the bear does not eat the rabbit; int13 & sent4 & int4 -> int16: the bear do not eat the rabbit; sent4 & int10 & sent6 -> int17: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int18: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int19: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int20: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int21: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int22: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int23: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int24: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int25: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int26: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int27: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int28: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int29: the bear does not eat the rabbit; sent4 & int10 & sent6 -> int30: the bear does not eat the rabbit", "score": 0.5811153650283813, "hypothesis": "i don't think the rabbit is not cold", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5805160999298096, "hypothesis": "the bear is blue", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5719766020774841, "hypothesis": "i don't think the bear is blue", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; int2 & sent6 & sent4 -> int3: the bear does not eat the rabbit; int2 & int3 & sent4 -> hypothesis", "score": 0.8483623266220093, "hypothesis": "the bear does not eat the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent9 & sent7 -> int2: the bear chases the bear; int2 & sent6 & sent4 -> int3: the bear does not eat the rabbit; sent4 & int3 & int2 -> int4: the bear does not eat the rabbit; int1 & sent9 & sent7 -> int5: the bear chases the bear; sent4 & int5 & sent6 -> int6: the bear does not eat the rabbit; int4 & sent9 & sent7 -> int7: the bear chases the bear; sent6 & sent4 & int7 -> int8: the bear does not eat the rabbit; int8 & sent6 & sent4 -> int9: the bear does not eat the rabbit; int8 & sent9 & sent7 -> int10: the bear chases the bear; sent6 & sent4 & int10 -> int11: the bear does not eat the rabbit; int11 & int8 & sent9 -> int12: the bear chases the bear; sent6 & sent4 & int12 -> int13: the bear does not eat the rabbit; sent9 & sent7 & int1 -> int14: the bear chases the rabbit; sent9 & int14 & sent7 -> int15: the bear chases the bear; int8 & sent9 & int1 -> int16: the bear chases the bear; int8 & sent9 & int1 -> int17: the bear chases the bear; int8 & sent9 & int1 -> int18: the bear chases the bear; int8 & sent9 & int1 -> int19: the bear chases the bear; int8 & sent9 & int1 -> int20: the bear chases the bear; int8 & sent9 & int1 -> int21: the bear chases the bear; int8 & sent9 & int1 -> int22: the bear chases the bear; int8 & sent9 & int1 -> int23: the bear chases the bear; int8 & sent9 & int1 -> int24: the bear chases the bear; int8 & sent9 & int1 -> int25: the bear chases the bear; int8 & sent9 & int1 -> int26: the bear chases the bear; int8 & sent9 & int1 -> int27: the bear chases the bear; int8 & sent9 & int1 -> int28: the bear chases the bear; int8 & sent9 & int1 -> int29: the bear chases the bear; int8 & sent9 & int1 -> int30: the bear chases the bear", "score": 0.8360975980758667, "hypothesis": "i don't think the bear does not eat the bear", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5805337429046631, "hypothesis": "the bear is red", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent7 -> int1: the bear does not chase the rabbit; int1 & sent7 & sent9 -> int2: the bear chases the bear; int2 & sent6 & sent4 -> int3: the bear does not eat the rabbit; sent4 & sent6 & int2 -> int4: the bear does not eat the bun; int3 & sent9 & sent7 -> int5: the bear chases the bear; sent6 & int5 & sent4 -> int6: the bear does not eat the rabbit; int4 & int6 & sent4 -> int7: the bear does not eat the rabbit; int4 & int7 & sent9 -> int8: the bear chases the bull; sent6 & int8 & sent4 -> int9: the bear does not eat the rabbit; int4 & sent9 & sent7 -> int10: the bear chases the bear; sent6 & sent4 & int10 -> int11: the bear does not eat the rabbit; int4 & int11 & sent9 -> int12: the bear chases the bear; sent6 & sent4 & int12 -> int13: the bear does not eat the rabbit; sent9 & sent7 & int1 -> int14: the bear chases the rabbit; sent9 & int14 & sent7 -> int15: the bear chases the bear; int6 & sent4 & int15 -> hypothesis", "score": 0.568773090839386, "hypothesis": "i don't think the bear is red", "context": {"sent1": "the rabbit needs the bear", "sent2": "if someone is cold then they do not chase the rabbit", "sent3": "the rabbit is red", "sent4": "if someone needs the rabbit and they chase the bear then they do not eat the rabbit", "sent5": "the bear is big", "sent6": "the bear needs the rabbit", "sent7": "the bear is cold", "sent8": "the rabbit chases the bear", "sent9": "if someone is cold and they do not chase the rabbit then they chase the bear", "sent10": "the rabbit eats the bear"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent1 -> hypothesis;", "sent6 & sent9 & sent13 -> int1: charlie is nice; sent4 & int1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 1.0, "hypothesis": "charlie is cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 -> hypothesis;", "sent6 & sent9 & sent13 -> int1: charlie is nice; sent4 & int1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.9999985694885254, "hypothesis": "i don't think charlie is cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent9 -> hypothesis;"], "proof_pred": "sent9 -> hypothesis", "score": 0.9999946355819702, "hypothesis": "charlie is not round", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent9 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent9 -> hypothesis;"], "proof_pred": "sent9 -> hypothesis", "score": 1.0, "hypothesis": "i don't think charlie is not round", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent9 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent6 & sent9 & sent13 -> hypothesis;"], "proof_pred": "sent9 & sent6 & sent13 -> hypothesis", "score": 0.9058244824409485, "hypothesis": "charlie is nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent6 & sent9 & sent13 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent6 & sent9 & sent13 -> hypothesis;"], "proof_pred": "sent9 & sent6 & sent13 -> hypothesis", "score": 0.9062650203704834, "hypothesis": "i don't think charlie is nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent6 & sent9 & sent13 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent3 & sent10 -> hypothesis;"], "proof_pred": "sent3 & sent10 -> hypothesis", "score": 0.9479039311408997, "hypothesis": "gary is not green", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent3 & sent10 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent3 & sent10 -> hypothesis;"], "proof_pred": "sent3 & sent10 -> hypothesis", "score": 0.9469671845436096, "hypothesis": "i don't think gary is not green", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent3 & sent10 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis", "score": 0.8975938558578491, "hypothesis": "bob is nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis", "score": 0.8977322578430176, "hypothesis": "i don't think bob is nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis", "score": 0.8986194729804993, "hypothesis": "bob is not nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent13 & sent12 & int1 -> hypothesis", "score": 0.8977279663085938, "hypothesis": "i don't think bob is not nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis", "score": 0.9255618453025818, "hypothesis": "bob is cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis", "score": 0.9252711534500122, "hypothesis": "i don't think bob is cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis", "score": 0.9256805777549744, "hypothesis": "bob is not cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis;"], "proof_pred": "sent16 & sent5 & sent7 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis", "score": 0.9253805875778198, "hypothesis": "i don't think bob is not cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": "sent5 & sent7 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6737308502197266, "hypothesis": "gary is not nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent10 -> int1: gary is green; int1 & sent6 & sent13 -> int2: gary is nice; int2 & sent7 & sent5 -> hypothesis", "score": 0.6735207438468933, "hypothesis": "i don't think gary is not nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent9 -> int1: gary is red; int1 & sent4 -> int2: harry is cold; sent4 & sent9 -> int3: harry is nice; sent4 & int3 -> int4: harry is cold; sent4 & int2 & sent4 -> int5: harry is warm; int5 & sent9 & sent14 -> int6: harry is red; sent4 & sent5 & int6 -> hypothesis", "score": 0.6668091416358948, "hypothesis": "harry is round", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent9 -> int1: gary is red; int1 & sent4 -> int2: harry is cold; sent4 & sent9 -> int3: harry is nice; sent4 & int3 -> int4: harry is cold; int2 & sent10 -> int5: harry is green; sent8 & int2 & sent8 -> hypothesis", "score": 0.6820662617683411, "hypothesis": "i don't think harry is round", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent16 & sent5 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; sent4 & int2 -> hypothesis", "score": 0.7205018997192383, "hypothesis": "harry is not cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent16 & sent5 & sent16 -> int1: bob is round; sent12 & sent13 & int1 -> int2: bob is nice; int2 & sent4 -> hypothesis", "score": 0.6149008870124817, "hypothesis": "i don't think harry is not cold", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent16 & sent14 -> int1: gary is red; int1 & sent5 & sent2 -> int2: gary is round; sent2 & sent5 & int1 -> int3: gary is around; sent2 & sent5 & int3 -> int4: gary is nice; sent4 & int4 -> int5: gary is cold; int5 & sent3 & sent10 -> int6: gary is green; sent10 & int5 -> int7: gary is green; sent6 & int2 & sent13 -> int8: gary is good; int6 & sent7 & sent5 -> int9: gary is smart; int9 & sent5 & int1 -> int10: gary is round; sent14 & int10 -> int11: gary is red; sent5 & int11 & int9 -> int12: gary is round; sent14 & int12 -> int13: gary is red; sent5 & int13 & int8 -> int14: gary is round; sent14 & int12 -> int15: gary is red; sent5 & int15 & int6 -> int16: gary is round; sent14 & int12 -> int17: gary is red; sent14 & int16 -> int18: gary is red; sent14 & int16 -> int19: gary is red; sent14 & int16 -> int20: gary is red; sent14 & int16 -> int21: gary is red; sent14 & int16 -> int22: gary is red; sent14 & int16 -> int23: gary is red; sent14 & int16 -> int24: gary is red; sent14 & int16 -> int25: gary is red; sent14 & int16 -> int26: gary is red; sent14 & int16 -> int27: gary is red; sent14 & int16 -> int28: gary is red; sent14 & int16 -> int29: gary is red; sent14 & int16 -> int30: gary is red", "score": 0.5314642190933228, "hypothesis": "harry is nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent16 & sent14 -> int1: gary is red; int1 & sent5 & sent2 -> int2: gary is round; sent2 & sent5 & int1 -> int3: gary is around; sent2 & sent5 & int3 -> int4: gary is green; int4 & sent8 & sent7 -> int5: harry is round; sent14 & int2 -> int6: gary is red; int4 & sent8 & sent4 -> int7: harry is cold; sent10 & int7 -> int8: harry is green; int8 & sent8 & sent8 -> hypothesis", "score": 0.5187568068504333, "hypothesis": "i don't think harry is nice", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent9 & sent13 & sent6 -> int1: charlie is nice; int1 & sent11 & sent1 -> int2: charlie is smart; sent2 & int2 & sent2 -> hypothesis", "score": 0.876865804195404, "hypothesis": "charlie is not red", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent9 & sent13 & sent6 -> int1: charlie is nice; int1 & sent11 & sent1 -> int2: charlie is smart; sent2 & int2 & sent2 -> hypothesis", "score": 0.7573626637458801, "hypothesis": "i don't think charlie is not red", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent16 & sent14 -> int1: gary is red; int1 & sent5 & sent7 -> int2: gary is round; sent4 & int2 -> int3: harry is cold; sent10 & int3 -> hypothesis", "score": 0.6704134345054626, "hypothesis": "harry is smart", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent16 & sent14 -> int1: gary is red; int1 & sent5 & sent7 -> int2: gary is round; sent4 & int2 -> int3: harry is cold; sent14 & sent9 -> int4: harry is red; sent5 & int4 & int3 -> int5: harry is round; int5 & sent13 & sent6 -> int6: harry is nice; int6 & sent4 -> int7: harry is cold; sent4 & int6 -> int8: harry is cold; int8 & sent7 & sent10 -> int9: harry is green; sent8 & int9 -> int10: harry is smart; int10 -> hypothesis", "score": 0.6807069182395935, "hypothesis": "i don't think harry is smart", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent9 -> int1: gary is red; int1 & sent5 & sent7 -> int2: gary is round; sent4 & int2 -> int3: harry is cold; sent9 & sent14 -> int4: harry is red; sent7 & sent5 & int4 -> int5: harry is round; sent5 & int3 & sent13 -> int6: harry is nice; int6 & sent4 -> int7: harry is cold; sent7 & int3 & sent10 -> int8: harry is growing; sent8 & int8 & sent8 -> hypothesis", "score": 0.5173701643943787, "hypothesis": "harry is not big", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent13 & sent12 -> int1: harry is nice; int1 & sent4 -> int2: harry is cold; int1 & sent13 & sent12 -> int3: harry is sent; int3 & sent6 & sent14 -> int4: harry is round; int3 & sent14 -> int5: harry is red; sent6 & int2 & sent12 -> int6: harry is smart; sent5 & int6 & int5 -> int7: harry is round; int4 & sent7 & int7 -> int8: harry is big; sent8 -> hypothesis", "score": 0.7657518982887268, "hypothesis": "i don't think harry is not big", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent10 -> int1: gary is green; int1 & sent3 & sent10 -> hypothesis", "score": 0.6997319459915161, "hypothesis": "gary is smart", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent10 -> int1: gary is green; sent10 & sent3 -> int2: gary is green; int2 & int1 & sent7 -> int3: gary is smart; int3 & sent6 & sent5 -> int4: gary is round; sent14 & int4 -> int5: gary is red; sent5 & int5 & int3 -> int6: gary is round; sent6 & sent13 & int6 -> int7: gary is nice; int3 & sent3 & sent5 -> int8: gary is around; int3 & sent3 & sent3 -> hypothesis", "score": 0.5563596487045288, "hypothesis": "i don't think gary is smart", "context": {"sent1": "charlie is cold", "sent2": "charlie is smart", "sent3": "gary is cold", "sent4": "nice things are cold", "sent5": "smart, red things are round", "sent6": "charlie is big", "sent7": "bob is smart", "sent8": "harry is green", "sent9": "charlie is round", "sent10": "cold things are green", "sent11": "if charlie is nice and charlie is cold then charlie is smart", "sent12": "bob is big", "sent13": "if something is big and round then it is nice", "sent14": "if gary is round then gary is red", "sent15": "bob is green", "sent16": "bob is red"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 1.0, "hypothesis": "the rabbit is green", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the rabbit is green", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 0.9998556971549988, "hypothesis": "the rabbit does not see the bear", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent5 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the rabbit does not see the bear", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent5 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9428118467330933, "hypothesis": "the rabbit chases the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9391526579856873, "hypothesis": "i don't think the rabbit chases the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9403407573699951, "hypothesis": "the rabbit does not chase the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9416741728782654, "hypothesis": "i don't think the rabbit does not chase the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent5 & sent14 & int1 -> int2: the bear likes the rabbit; sent7 & sent11 -> int3: the bear is blue; sent10 & int3 & int2 -> hypothesis;", "sent7 & sent11 -> int1: the bear is blue; sent4 & sent10 & int1 -> hypothesis;", "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent13 & int2 -> int3: the bear likes the rabbit; sent7 & sent11 -> int4: the bear is blue; sent10 & int4 & int3 -> hypothesis;"], "proof_pred": "sent11 & sent7 -> int1: the bear is blue; sent10 & sent4 & int1 -> hypothesis", "score": 0.8975661396980286, "hypothesis": "the bear likes the bear", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent5 & sent14 & int1 -> int2: the bear likes the rabbit; sent7 & sent11 -> int3: the bear is blue; sent10 & int3 & int2 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent5 & sent14 & int1 -> int2: the bear likes the rabbit; sent7 & sent11 -> int3: the bear is blue; sent10 & int3 & int2 -> hypothesis;", "sent7 & sent11 -> int1: the bear is blue; sent4 & sent10 & int1 -> hypothesis;", "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent13 & int2 -> int3: the bear likes the rabbit; sent7 & sent11 -> int4: the bear is blue; sent10 & int4 & int3 -> hypothesis;"], "proof_pred": "sent11 & sent7 -> int1: the bear is blue; sent10 & sent4 & int1 -> hypothesis", "score": 0.8979068398475647, "hypothesis": "i don't think the bear likes the bear", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent5 & sent14 & int1 -> int2: the bear likes the rabbit; sent7 & sent11 -> int3: the bear is blue; sent10 & int3 & int2 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> hypothesis;"], "proof_pred": "sent5 & sent3 & sent2 -> hypothesis", "score": 0.7694411277770996, "hypothesis": "the bear does not see the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> hypothesis;"], "proof_pred": "sent5 & sent3 & sent2 -> hypothesis", "score": 0.83063143491745, "hypothesis": "i don't think the bear does not see the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int2 & sent9 & sent3 -> hypothesis", "score": 0.905055046081543, "hypothesis": "the bear is red", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int2 & sent9 & sent3 -> hypothesis", "score": 0.9019622802734375, "hypothesis": "i don't think the bear is red", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int2 & sent9 & sent3 -> hypothesis", "score": 0.8999578356742859, "hypothesis": "the bear is not red", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int2 & sent9 & sent3 -> hypothesis", "score": 0.9031358361244202, "hypothesis": "i don't think the bear is not red", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": "sent1 & sent11 -> int1: the rabbit chases the rabbit; sent2 & sent5 & int1 -> int2: the bear sees the rabbit; sent3 & sent9 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6961780786514282, "hypothesis": "the rabbit is not round", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.682450532913208, "hypothesis": "i don't think the rabbit is not round", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.9035091996192932, "hypothesis": "the rabbit is kind", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8999488949775696, "hypothesis": "i don't think the rabbit is kind", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int2 & sent5 & int1 -> int4: the rabbit sees the rabbit; int3 & sent9 & int4 -> int5: the rabbit is red; sent9 & int3 & int1 -> int6: the rabbit is red; sent7 & sent11 -> int7: the bear is blue; int3 & sent9 & int2 -> int8: the bear is red; int8 & sent8 & sent10 -> hypothesis", "score": 0.8073842525482178, "hypothesis": "the bear is not green", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int2 & sent5 & int1 -> int4: the rabbit sees the rabbit; int3 & sent9 & int4 -> int5: the rabbit is red; sent9 & int3 & int1 -> int6: the rabbit is red; sent7 & sent11 -> int7: the bear is blue; int3 & sent9 & int2 -> int8: the bear is red; int8 & sent8 & sent10 -> hypothesis", "score": 0.8095382452011108, "hypothesis": "i don't think the bear is not green", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int2 & sent5 & int1 -> int4: the rabbit sees the rabbit; int3 & sent9 & int4 -> int5: the rabbit is red; sent9 & int3 & int1 -> int6: the rabbit is red; sent7 & sent11 -> int7: the bear is blue; int3 & sent9 & int2 -> int8: the bear is red; int8 & sent8 & sent10 -> hypothesis", "score": 0.8071657419204712, "hypothesis": "the bear is round", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int2 & sent5 & int1 -> int4: the rabbit sees the rabbit; int3 & sent9 & int4 -> int5: the rabbit is red; sent9 & int3 & int1 -> int6: the rabbit is red; sent7 & sent11 -> int7: the bear is blue; int3 & sent9 & int2 -> int8: the bear is red; int8 & sent8 & sent10 -> int9: the bear likes the bear; int9 & sent10 & int7 -> hypothesis", "score": 0.8091686367988586, "hypothesis": "i don't think the bear is round", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8941779136657715, "hypothesis": "the rabbit does not see the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.9098824858665466, "hypothesis": "i don't think the rabbit does not see the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent1 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int3 & sent2 & int2 -> int4: the bear sees the rabbit; int4 & sent5 & sent2 -> int5: the bear sees the rabbit; int4 & int5 & sent9 -> int6: the bear is red; sent3 & int4 & sent9 -> int7: the bear is red; int6 & sent6 & int7 -> hypothesis", "score": 0.8026940226554871, "hypothesis": "the bear chases the bear", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int3 & sent2 & int2 -> int4: the bear sees the rabbit; int4 & sent5 & sent2 -> int5: the bear sees the bun; sent1 & sent11 -> int6: the rabbit chases the rabbit; sent5 & int1 & sent2 -> int7: the bear sees the rabbit; int7 & sent7 & sent11 -> int8: the bear is blue; int4 & sent7 & sent11 -> int9: the bear is white; sent9 & int4 & sent9 -> int10: the bear is red; sent9 & int5 & int4 -> int11: the bear is red; int10 & sent10 & int8 -> hypothesis", "score": 0.587185800075531, "hypothesis": "i don't think the bear chases the bear", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.895018994808197, "hypothesis": "the rabbit does not like the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent1 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; sent13 & int2 & int1 -> int3: the bear likes the rabbit; int3 & sent4 & sent4 -> hypothesis", "score": 0.8950514793395996, "hypothesis": "i don't think the rabbit does not like the rabbit", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int3 & sent2 & int1 -> int4: the bear sees the rabbit; int4 & sent2 & int1 -> int5: the bear sees the rabbit; sent7 & sent11 -> int6: the bear is blue; sent4 & sent10 & int6 -> hypothesis", "score": 0.839181125164032, "hypothesis": "the bear is kind", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent11 -> int1: the rabbit chases the rabbit; int1 & sent5 & sent2 -> int2: the bear sees the rabbit; int1 & sent5 & sent2 -> int3: the bear sees the rabbit; int2 & sent5 & int1 -> int4: the rabbit sees the rabbit; int4 & sent9 & int2 -> int5: the rabbit is red; sent9 & int4 & int1 -> int6: the rabbit is red; sent7 & sent11 -> int7: the bear is blue; int3 & sent9 & int4 -> int8: the bear is red; int8 & sent8 & sent10 -> int9: the bear likes the bear; int9 & sent10 & int7 -> hypothesis", "score": 0.8186954259872437, "hypothesis": "i don't think the bear is kind", "context": {"sent1": "if something chases the bear then it chases the rabbit", "sent2": "if something chases the rabbit and it sees the bear then the bear sees the rabbit", "sent3": "the bear chases the rabbit", "sent4": "the bear likes the rabbit", "sent5": "the rabbit sees the bear", "sent6": "the rabbit is blue", "sent7": "if something chases the bear then the bear is blue", "sent8": "the rabbit is green", "sent9": "if something sees the rabbit and it chases the rabbit then it is red", "sent10": "if something likes the rabbit and it is blue then it likes the bear", "sent11": "the rabbit chases the bear", "sent12": "if something chases the bear then it is red", "sent13": "if the bear chases the rabbit and the bear sees the rabbit then the bear likes the rabbit", "sent14": "if something sees the bear and it chases the rabbit then the bear likes the rabbit", "sent15": "the rabbit likes the bear"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 1.0, "hypothesis": "erin is red", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 0.9999998807907104, "hypothesis": "i don't think erin is red", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 & sent4 & sent13 -> hypothesis;", "sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 0.9999271035194397, "hypothesis": "fiona is not red", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": "sent2 & sent4 & sent13 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 & sent4 & sent13 -> hypothesis;", "sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is not red", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": "sent2 & sent4 & sent13 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent11 -> int1: dave is green; int1 & sent5 & sent6 -> int2: dave is cold; int2 & sent7 & sent13 -> int3: dave is red; int3 & sent4 & sent12 -> hypothesis", "score": 0.754145622253418, "hypothesis": "dave is not quiet", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent13 & sent2 -> int1: dave is red; int1 & sent2 -> int2: dave is quiet; int2 & sent7 & sent13 -> hypothesis", "score": 0.78944331407547, "hypothesis": "i don't think dave is not quiet", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.866218090057373, "hypothesis": "dave is cold", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent11 -> int1: dave is green; int1 & sent5 -> int2: dave is cold; int2 & sent9 & sent5 -> hypothesis", "score": 0.8140355348587036, "hypothesis": "i don't think dave is cold", "context": {"sent1": "fiona is young", "sent2": "fiona is quiet", "sent3": "erin is kind", "sent4": "fiona is kind", "sent5": "fiona is cold", "sent6": "fiona is green", "sent7": "dave is white", "sent8": "erin is red", "sent9": "fiona is white", "sent10": "fiona is red", "sent11": "erin is green", "sent12": "dave is young", "sent13": "if fiona is kind and fiona is quiet then fiona is red"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 1.0, "hypothesis": "dave is kind", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent10 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 1.0, "hypothesis": "i don't think dave is kind", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent10 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 0.9999971389770508, "hypothesis": "fiona is not kind", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is not kind", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent6 & sent12 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> hypothesis", "score": 0.9430968165397644, "hypothesis": "dave is nice", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent6 & sent12 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> hypothesis", "score": 0.9446878433227539, "hypothesis": "i don't think dave is nice", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent12 -> hypothesis;"], "proof_pred": "sent12 & sent1 -> hypothesis", "score": 0.9398099780082703, "hypothesis": "fiona is not nice", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent1 & sent12 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent12 -> hypothesis;"], "proof_pred": "sent12 & sent1 -> hypothesis", "score": 0.9395712614059448, "hypothesis": "i don't think fiona is not nice", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent1 & sent12 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; sent5 & int1 & sent6 -> hypothesis", "score": 0.8993441462516785, "hypothesis": "dave is young", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; sent5 & int1 & sent6 -> hypothesis", "score": 0.8990099430084229, "hypothesis": "i don't think dave is young", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; sent5 & int1 & sent6 -> hypothesis", "score": 0.8994207978248596, "hypothesis": "dave is not young", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; sent5 & int1 & sent6 -> hypothesis", "score": 0.8997155427932739, "hypothesis": "i don't think dave is not young", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; int1 & sent3 -> int2: dave is white; int2 & int1 & sent5 -> int3: dave is young; int3 & sent15 & sent6 -> int4: dave is nice; int4 & sent3 -> int5: dave is white; int5 & sent14 -> hypothesis", "score": 0.9177259802818298, "hypothesis": "dave is furry", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; int1 & sent3 -> int2: dave is white; int2 & int1 & sent5 -> int3: dave is young; int3 & sent15 & sent6 -> int4: dave is nice; int4 & sent3 -> int5: dave is white; int5 & sent14 -> hypothesis", "score": 0.9155791401863098, "hypothesis": "i don't think dave is furry", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; int1 & sent3 -> int2: dave is white; int2 & int1 & sent5 -> int3: dave is young; int3 & sent15 & sent6 -> int4: dave is nice; int4 & sent3 -> int5: dave is white; int5 & sent14 -> hypothesis", "score": 0.9163431525230408, "hypothesis": "dave is not furry", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent12 -> int1: dave is nice; int1 & sent3 -> int2: dave is white; int2 & int1 & sent5 -> int3: dave is young; int3 & sent15 & sent6 -> int4: dave is nice; int4 & sent3 -> int5: dave is white; int5 & sent14 -> hypothesis", "score": 0.9157324433326721, "hypothesis": "i don't think dave is not furry", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": "sent6 & sent12 -> int1: dave is nice; sent3 & int1 -> int2: dave is white; sent14 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent5 & sent6 -> int1: erin is young; int1 & sent9 -> int2: erin is nice; sent8 & sent13 -> int3: erin is nice; int3 & sent4 & sent7 -> int4: erin is kind; int2 & sent11 & sent7 -> int5: erin is kind; int5 & int2 & sent7 -> int6: erin is kind; int4 & int5 & int6 -> int7: erin is young; int7 & sent9 -> int8: erin is nice; int3 & sent4 & sent7 -> int9: erin is kind; int3 & sent4 & sent7 -> int10: erin is kind; sent9 & int7 -> int11: erin is nice; int11 & sent4 & sent7 -> int12: erin is kind; int11 & sent4 & sent7 -> int13: erin is kind; int13 & int3 & sent8 -> int14: erin is nice; sent4 & int11 & sent7 -> int15: erin is kind; sent9 & int7 -> int16: erin is nice; sent4 & int16 & sent4 -> int17: erin is nice; sent4 & int17 & sent4 -> int18: erin is nice; int11 & sent4 & int2 -> int19: erin is nice; int11 & sent4 & int2 -> int20: erin is nice; int11 & sent4 & int2 -> int21: erin is nice; int11 & sent4 & int2 -> int22: erin is nice; int11 & sent4 & int2 -> int23: erin is nice; int11 & sent4 & int2 -> int24: erin is nice; int11 & sent4 & int2 -> int25: erin is nice; int11 & sent4 & int2 -> int26: erin is nice; int11 & sent4 & int2 -> int27: erin is nice; int11 & sent4 & int2 -> int28: erin is nice; int11 & sent4 & int2 -> int29: erin is nice; int11 & sent4 & int2 -> int30: erin is nice", "score": 0.7456169128417969, "hypothesis": "erin is not young", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent5 & sent6 -> hypothesis", "score": 0.696372926235199, "hypothesis": "i don't think erin is not young", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent12 -> int1: fiona is nice; sent5 & int1 & sent1 -> int2: fiona is young; int2 & sent9 -> int3: fiona is nice; sent3 & int3 -> int4: fiona is white; int4 & sent13 -> int5: fiona is nice; int5 & sent5 & sent1 -> int6: fiona is young; int6 & sent15 & sent1 -> int7: fiona is nice; int7 & sent5 & sent1 -> int8: fiona is young; int6 & sent5 & sent1 -> int9: fiona is good; int8 & sent9 -> int10: fiona is nice; int10 & sent5 & sent1 -> int11: fiona is nice; int11 & sent5 & sent1 -> int12: fiona is young; int12 & sent9 -> int13: fiona is nice; int13 & sent7 & int2 -> hypothesis", "score": 0.5689671635627747, "hypothesis": "fiona is smart", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent12 -> int1: fiona is nice; sent5 & int1 & sent1 -> int2: fiona is young; sent1 & int2 & sent15 -> int3: fiona is nice; int3 & sent3 -> int4: fiona is white; int4 & sent13 -> int5: fiona is nice; int3 & sent5 & sent1 -> int6: fiona is young; int6 & sent5 & sent1 -> int7: fiona is nice; int7 & sent5 & sent1 -> int8: fiona is young; int6 & sent5 & sent1 -> int9: fiona is good; int9 & sent5 & sent1 -> int10: fiona is young; int10 & sent9 -> int11: fiona is nice; int11 & sent7 & int3 -> int12: fiona is nice; sent1 & int12 & sent5 -> int13: fiona is nice; int13 & sent5 & sent1 -> int14: fiona is nice; sent1 & int14 & sent5 -> int15: fiona is nice; sent1 & int15 & sent5 -> int16: fiona is nice; int16 & sent7 & int3 -> int17: fiona is nice; int16 & sent7 & int3 -> int18: fiona is nice; int16 & sent7 & int3 -> int19: fiona is nice; int16 & sent7 & int3 -> int20: fiona is nice; int16 & sent7 & int3 -> int21: fiona is nice; int16 & sent7 & int3 -> int22: fiona is nice; int16 & sent7 & int3 -> int23: fiona is nice; int16 & sent7 & int3 -> int24: fiona is nice; int16 & sent7 & int3 -> int25: fiona is nice; int16 & sent7 & int3 -> int26: fiona is nice; int16 & sent7 & int3 -> int27: fiona is nice; int16 & sent7 & int3 -> int28: fiona is nice; int16 & sent7 & int3 -> int29: fiona is nice; int16 & sent7 & int3 -> int30: fiona is nice", "score": 0.5894277691841125, "hypothesis": "i don't think fiona is smart", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8464087247848511, "hypothesis": "erin is not big", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8602322340011597, "hypothesis": "i don't think erin is not big", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> int2: dave is young; sent6 & int2 & sent15 -> int3: dave is nice; sent3 & int3 -> int4: dave is white; sent14 & int4 -> int5: dave is furry; int5 & sent14 -> int6: dave is nice; int6 & sent5 & sent6 -> int7: dave is younger; sent6 & int7 & sent15 -> int8: dave is nice; int8 & sent5 & sent6 -> int9: dave is young; int9 & sent5 & sent6 -> int10: dave is nice; int10 & sent5 & sent6 -> int11: dave is young; int11 & sent5 & sent6 -> int12: dave is nice; int12 & sent5 & sent6 -> int13: dave is young; int13 & sent5 & sent6 -> int14: dave is nice; sent6 & int14 & sent5 -> int15: dave is nice; int15 & sent6 & sent5 -> int16: dave is nice; sent6 & int16 & sent5 -> int17: dave is nice; sent6 & sent5 & int17 -> int18: dave is nice; int17 & sent6 & int15 -> int19: dave is nice; int17 & sent6 & int15 -> int20: dave is nice; int17 & sent6 & int15 -> int21: dave is nice; int17 & sent6 & int15 -> int22: dave is nice; int17 & sent6 & int15 -> int23: dave is nice; int17 & sent6 & int15 -> int24: dave is nice; int17 & sent6 & int15 -> int25: dave is nice; int17 & sent6 & int15 -> int26: dave is nice; int17 & sent6 & int15 -> int27: dave is nice; int17 & sent6 & int15 -> int28: dave is nice; int17 & sent6 & int15 -> int29: dave is nice; int17 & sent6 & int15 -> int30: dave is nice", "score": 0.4852190911769867, "hypothesis": "dave is smart", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent6 & sent12 -> int1: dave is nice; sent5 & sent6 & int1 -> int2: dave is young; sent6 & int2 & sent15 -> int3: dave is nice; int3 & sent3 -> int4: dave is white; int4 & sent13 -> int5: dave is nice; int3 & sent5 & sent6 -> int6: dave is young; int6 & sent5 & sent6 -> int7: dave is nice; int7 & sent5 & sent6 -> int8: dave is young; int8 & sent5 & sent6 -> int9: dave is nice; sent6 & sent5 & int9 -> int10: dave is nice; sent6 & sent5 & int10 -> int11: dave is nice; int11 & sent5 & sent6 -> int12: dave is young; int12 & sent5 & sent6 -> int13: dave is nice; sent6 & int13 & sent5 -> int14: dave is nice; sent6 & sent5 & int14 -> int15: dave is nice; int15 & sent6 & sent5 -> int16: dave is nice; int16 & sent5 & sent6 -> int17: dave is nice; int6 & sent5 & sent6 -> int18: dave is nice; int6 & sent5 & sent6 -> int19: dave is nice; int6 & sent5 & sent6 -> int20: dave is nice; int6 & sent5 & sent6 -> int21: dave is nice; int6 & sent5 & sent6 -> int22: dave is nice; int6 & sent5 & sent6 -> int23: dave is nice; int6 & sent5 & sent6 -> int24: dave is nice; int6 & sent5 & sent6 -> int25: dave is nice; int6 & sent5 & sent6 -> int26: dave is nice; int6 & sent5 & sent6 -> int27: dave is nice; int6 & sent5 & sent6 -> int28: dave is nice; int6 & sent5 & sent6 -> int29: dave is nice; int6 & sent5 & sent6 -> int30: dave is nice", "score": 0.7903725504875183, "hypothesis": "i don't think dave is smart", "context": {"sent1": "fiona is big", "sent2": "fiona is kind", "sent3": "all nice people are white", "sent4": "erin is nice", "sent5": "big, nice people are young", "sent6": "dave is big", "sent7": "all nice, smart people are kind", "sent8": "erin is white", "sent9": "all young people are nice", "sent10": "dave is kind", "sent11": "erin is smart", "sent12": "all big people are nice", "sent13": "if someone is white then they are nice", "sent14": "white people are furry", "sent15": "big, young people are nice"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 1.0, "hypothesis": "bob is big", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent5 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 0.999998927116394, "hypothesis": "i don't think bob is big", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent5 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.9393535256385803, "hypothesis": "fiona is not kind", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is not kind", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent4 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.9398422837257385, "hypothesis": "fiona is not nice", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent1 & sent4 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent4 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.9355901479721069, "hypothesis": "i don't think fiona is not nice", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent1 & sent4 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent4 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> int1: fiona is not nice; int1 & sent4 -> hypothesis", "score": 0.900916337966919, "hypothesis": "fiona is nice", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent1 & sent4 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent4 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.9411197304725647, "hypothesis": "i don't think fiona is nice", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": "sent1 & sent4 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.9387743473052979, "hypothesis": "fiona is not white", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent4 -> int1: fiona is not nice; int1 & sent3 & sent2 -> hypothesis", "score": 0.5536880493164062, "hypothesis": "i don't think fiona is not white", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6215431690216064, "hypothesis": "harry is big", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent4 -> int1: harry is not nice; int1 & sent1 -> hypothesis", "score": 0.7837682962417603, "hypothesis": "i don't think harry is big", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent4 -> int1: fiona is not nice; int1 & sent1 -> int2: fiona is not nice; int2 & int1 & sent1 -> int3: fiona is kind; int3 & sent4 -> hypothesis", "score": 0.8675867319107056, "hypothesis": "anne is not nice", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent4 -> int1: anne is not nice; sent4 & sent1 -> int2: anne is not nice; int2 & int1 -> hypothesis", "score": 0.8913989067077637, "hypothesis": "i don't think anne is not nice", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent1 -> int1: bob is nice; sent1 & sent4 -> int2: bob is not nice; sent1 & sent4 -> int3: bob is not nice; int3 & int2 & int2 -> hypothesis", "score": 0.7582592368125916, "hypothesis": "bob is white", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent1 -> int1: bob is kind; sent4 & int1 -> hypothesis", "score": 0.9280185699462891, "hypothesis": "i don't think bob is white", "context": {"sent1": "fiona is kind", "sent2": "anne is rough", "sent3": "harry is green", "sent4": "all kind people are not nice", "sent5": "bob is big"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent9 -> hypothesis;"], "proof_pred": "sent9 -> hypothesis", "score": 1.0, "hypothesis": "the bear likes the mouse", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent9 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent9 -> hypothesis;"], "proof_pred": "sent9 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the bear likes the mouse", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent9 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 0.9999973177909851, "hypothesis": "the cat does not need the rabbit", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the cat does not need the rabbit", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent2 & sent13 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> hypothesis", "score": 0.9476361870765686, "hypothesis": "the rabbit sees the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent2 & sent13 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> hypothesis", "score": 0.9457536935806274, "hypothesis": "i don't think the rabbit sees the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent2 & sent13 -> hypothesis;"], "proof_pred": "sent2 & sent13 -> hypothesis", "score": 0.928850531578064, "hypothesis": "the rabbit does not see the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent2 & sent13 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> hypothesis", "score": 0.94823157787323, "hypothesis": "i don't think the rabbit does not see the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> hypothesis", "score": 0.9465233087539673, "hypothesis": "the cat is big", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> hypothesis", "score": 0.9464763402938843, "hypothesis": "i don't think the cat is big", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; int3 & sent6 -> hypothesis", "score": 0.9540587663650513, "hypothesis": "the cat is not big", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> hypothesis", "score": 0.9462382197380066, "hypothesis": "i don't think the cat is not big", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; int2 & sent11 -> hypothesis", "score": 0.958834171295166, "hypothesis": "the cat sees the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; int2 & sent11 -> hypothesis", "score": 0.9580861926078796, "hypothesis": "i don't think the cat sees the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; int2 & sent11 -> hypothesis", "score": 0.9576253890991211, "hypothesis": "the cat does not see the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis;"], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; int2 & sent11 -> hypothesis", "score": 0.9586988091468811, "hypothesis": "i don't think the cat does not see the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": "sent2 & sent13 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> hypothesis", "score": 0.8499660491943359, "hypothesis": "the bear does not see the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; int2 & sent11 -> hypothesis", "score": 0.9455155730247498, "hypothesis": "i don't think the bear does not see the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; int3 & sent6 -> hypothesis", "score": 0.6150182485580444, "hypothesis": "the rabbit is big", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent5 & sent6 -> int4: the mouse is big; sent11 & int4 -> int5: the mouse sees the cat; int5 & sent6 -> int6: the cat is big; int3 & sent6 -> int7: the cat is big; int7 & sent11 -> int8: the cat sees the cats; sent6 & int3 -> int9: the cat is big; int9 & sent11 -> int10: the cat sees the cat; int10 & sent6 -> int11: the cat is big; sent6 & int3 -> int12: the cat is big; int12 & sent9 & sent11 -> int13: the cat sees the cat; int13 & sent6 -> int14: the cat is big; int14 & sent9 -> int15: the cat sees the cat; sent13 & int15 & sent6 -> int16: the cat is large; int16 & sent9 -> int17: the cat sees the cat; int17 & sent6 -> int18: the cat is big; int17 & sent6 -> int19: the cat is big; int17 & sent6 -> int20: the cat is big; int17 & sent6 -> int21: the cat is big; int17 & sent6 -> int22: the cat is big; int17 & sent6 -> int23: the cat is big; int17 & sent6 -> int24: the cat is big; int17 & sent6 -> int25: the cat is big; int17 & sent6 -> int26: the cat is big; int17 & sent6 -> int27: the cat is big; int17 & sent6 -> int28: the cat is big; int17 & sent6 -> int29: the cat is big; int17 & sent6 -> int30: the cat is big", "score": 0.5084233283996582, "hypothesis": "i don't think the rabbit is big", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent2 & sent13 -> int4: the mouse sees the cat; int4 & sent6 -> int5: the cat is big; sent11 & int5 -> int6: the cat sees the cat; sent6 & int3 -> int7: the cat is big; sent11 & int7 -> int8: the cat sees the cat; sent8 & sent4 -> int9: the mouse likes the cat; sent8 & int9 -> hypothesis", "score": 0.6328465938568115, "hypothesis": "the bear does not like the bear", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent2 & sent13 -> int4: the mouse sees the cat; int4 & sent6 -> int5: the cat is big; sent11 & int5 -> int6: the cat sees the cat; sent6 & int3 -> int7: the cat is big; sent11 & int7 -> int8: the cat sees the cat; sent6 & int8 -> int9: the cat is big; sent11 & int9 -> int10: the cat sees the cat; sent6 & int10 -> int11: the cat is big; int7 & sent11 -> int12: the cat sees the cat; int12 & sent6 -> int13: the cat is big; int13 & sent11 -> int14: the cat sees the cat; int14 & sent6 -> int15: the cat is big; int15 & sent11 -> int16: the cat sees the cat; int16 & sent6 -> int17: the cat is big; int17 & sent11 -> int18: the cat sees the cat; sent11 & int17 -> int19: the cat sees the cat; sent11 & int17 -> int20: the cat sees the cat; sent11 & int17 -> int21: the cat sees the cat; sent11 & int17 -> int22: the cat sees the cat; sent11 & int17 -> int23: the cat sees the cat; sent11 & int17 -> int24: the cat sees the cat; sent11 & int17 -> int25: the cat sees the cat; sent11 & int17 -> int26: the cat sees the cat; sent11 & int17 -> int27: the cat sees the cat; sent11 & int17 -> int28: the cat sees the cat; sent11 & int17 -> int29: the cat sees the cat; sent11 & int17 -> int30: the cat sees the cat", "score": 0.6478797793388367, "hypothesis": "i don't think the bear does not like the bear", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent2 & sent13 -> int4: the mouse sees the cat; int4 & sent6 -> int5: the cat is big; sent11 & int5 -> int6: the cat sees the cat; sent6 & int3 -> int7: the cat is big; sent11 & int7 -> int8: the cat sees the cat; int8 & sent6 -> int9: the cat is big; int3 & sent6 -> int10: the cat is big; int10 & sent11 -> int11: the cat sees the cat; int11 & sent6 -> int12: the cat is big; int12 & sent9 -> int13: the cat sees the cat; int13 & sent6 -> int14: the cat is big; int14 & sent9 -> int15: the cat sees the cat; int15 & sent6 -> int16: the cat is big; int15 & sent6 -> int17: the cat is big; int16 & int17 & sent9 -> int18: the cat sees the cat; int15 & sent6 -> int19: the cat is big; int15 & sent6 -> int20: the cat is big; int15 & sent6 -> int21: the cat is big; int15 & sent6 -> int22: the cat is big; int15 & sent6 -> int23: the cat is big; int15 & sent6 -> int24: the cat is big; int15 & sent6 -> int25: the cat is big; int15 & sent6 -> int26: the cat is big; int15 & sent6 -> int27: the cat is big; int15 & sent6 -> int28: the cat is big; int15 & sent6 -> int29: the cat is big; int15 & sent6 -> int30: the cat is big", "score": 0.6618348956108093, "hypothesis": "the bear likes the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent2 & sent13 -> int4: the mouse sees the cat; sent4 & sent8 -> int5: the mouse likes the bear; int5 & sent4 -> hypothesis", "score": 0.6818667054176331, "hypothesis": "i don't think the bear likes the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent8 & sent4 -> int4: the mouse likes the cat; int4 & sent1 -> int5: the mouse is big; sent11 & int5 -> int6: the mouse sees the cat; int3 & sent6 -> int7: the cat is big; sent11 & int7 -> int8: the cat sees the cat; int8 & sent6 -> int9: the cat is big; int3 & sent6 -> int10: the cat is big; int10 & sent9 -> int11: the cat sees the cats; int11 & sent6 -> int12: the cat is big; sent9 & int12 & sent10 -> int13: the cat is not big; int11 & sent6 -> int14: the cat is big; int11 & sent6 -> int15: the cat is big; int15 & int10 & sent9 -> int16: the cat sees the cat; int16 & sent6 -> int17: the cat is big; int16 & sent6 -> int18: the cat is big; int16 & sent6 -> int19: the cat is big; int16 & sent6 -> int20: the cat is big; int16 & sent6 -> int21: the cat is big; int16 & sent6 -> int22: the cat is big; int16 & sent6 -> int23: the cat is big; int16 & sent6 -> int24: the cat is big; int16 & sent6 -> int25: the cat is big; int16 & sent6 -> int26: the cat is big; int16 & sent6 -> int27: the cat is big; int16 & sent6 -> int28: the cat is big; int16 & sent6 -> int29: the cat is big; int16 & sent6 -> int30: the cat is big", "score": 0.5607343316078186, "hypothesis": "the mouse does not like the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent4 & sent8 -> int4: the mouse likes the cat; int4 & sent4 -> hypothesis", "score": 0.5907560586929321, "hypothesis": "i don't think the mouse does not like the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.47225263714790344, "hypothesis": "the rabbit is blue", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent4 & sent8 -> int4: the mouse likes the cat; int4 & sent4 -> int5: the bear likes the cat; int3 & sent6 -> int6: the cat is big; int5 & sent11 -> int7: the bear sees the cat; int5 & sent6 -> int8: the bear is big; int3 & sent6 -> int9: the cat is big; sent9 & int6 -> int10: the cat sees the cats; int3 & sent6 -> int11: the cat is big; int10 & sent6 -> int12: the cat is big; int12 & sent11 -> int13: the cat sees the cat; int13 & sent6 -> int14: the cat is big; int13 & sent6 -> int15: the cat is big; int15 & sent11 -> int16: the cat sees the cat; int16 & sent6 -> int17: the cat is big; sent12 & int17 & int15 -> int18: the cat views the cat; int12 & sent8 -> int19: the bear likes the bear; int11 & sent8 -> int20: the cat likes the bear; int13 & sent6 -> int21: the cat is big; int13 & sent6 -> int22: the cat is big; int13 & sent6 -> int23: the cat is big; int13 & sent6 -> int24: the cat is big; int13 & sent6 -> int25: the cat is big; int13 & sent6 -> int26: the cat is big; int13 & sent6 -> int27: the cat is big; int13 & sent6 -> int28: the cat is big; int13 & sent6 -> int29: the cat is big; int13 & sent6 -> int30: the cat is big", "score": 0.5209568738937378, "hypothesis": "i don't think the rabbit is blue", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent4 & sent8 -> int4: the bear likes the cat; int3 & sent6 -> int5: the cat is big; int4 & sent11 -> int6: the bear sees the cat; int3 & sent6 -> int7: the cat is big; int7 & sent11 -> int8: the cat sees the cats; sent8 & int3 & sent6 -> int9: the cat is large; int9 & sent7 -> int10: the cat sees the cat; int9 & sent7 -> int11: the cat sees the tiger; sent11 & int5 -> int12: the cat sees the cat; int12 & sent6 -> int13: the cat is big; int13 & sent7 -> int14: the cat sees the cat; int14 & sent6 -> int15: the cat is big; int15 & sent7 -> int16: the cat sees the cat; int16 & sent6 -> int17: the cat is big; int17 & int15 & sent9 -> int18: the cat sees the cat; int14 & sent6 -> int19: the cat is big; int14 & sent6 -> int20: the cat is big; int14 & sent6 -> int21: the cat is big; int14 & sent6 -> int22: the cat is big; int14 & sent6 -> int23: the cat is big; int14 & sent6 -> int24: the cat is big; int14 & sent6 -> int25: the cat is big; int14 & sent6 -> int26: the cat is big; int14 & sent6 -> int27: the cat is big; int14 & sent6 -> int28: the cat is big; int14 & sent6 -> int29: the cat is big; int14 & sent6 -> int30: the cat is big", "score": 0.5058514475822449, "hypothesis": "the bear is not young", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; sent6 & int1 -> int2: the cat is big; sent11 & int2 -> int3: the cat sees the cat; sent2 & sent13 -> int4: the mouse sees the cat; int4 & sent6 -> int5: the cat is big; sent11 & int5 -> int6: the cat sees the cat; sent6 & int3 -> int7: the cat is big; sent11 & int7 -> int8: the cat sees the cat; int8 & sent6 -> int9: the cat is big; sent11 & int9 -> int10: the cat sees the cat; sent10 & int8 & sent6 -> int11: the cat is large; int7 & sent11 -> int12: the cat sees the cat; sent12 & int8 & sent6 -> int13: the cat is huge; int11 & sent13 & int10 -> int14: the cat looks big; sent11 & int14 -> int15: the cat sees the cat; int15 & sent6 -> int16: the cat is big; int16 & sent11 -> int17: the cat sees the cat; int17 & sent6 -> int18: the cat is big; int17 & sent6 -> int19: the cat is big; int17 & sent6 -> int20: the cat is big; int17 & sent6 -> int21: the cat is big; int17 & sent6 -> int22: the cat is big; int17 & sent6 -> int23: the cat is big; int17 & sent6 -> int24: the cat is big; int17 & sent6 -> int25: the cat is big; int17 & sent6 -> int26: the cat is big; int17 & sent6 -> int27: the cat is big; int17 & sent6 -> int28: the cat is big; int17 & sent6 -> int29: the cat is big; int17 & sent6 -> int30: the cat is big", "score": 0.4729185998439789, "hypothesis": "i don't think the bear is not young", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; int2 & sent11 -> int3: the cat sees the cat; int3 & sent6 -> int4: the cat is big; sent11 & int4 -> int5: the cat sees the kitty; int3 & int5 & sent6 -> int6: the cat is large; sent11 & int2 -> int7: the cat sees the cat; sent7 & int3 & sent6 -> int8: the cat is huge; int6 & int8 & sent8 -> int9: the cat looks big; int9 & sent11 -> int10: the cat sees the cat; int10 & sent6 -> int11: the cat is big; int11 & sent11 -> int12: the cat sees the cat; int12 & sent6 -> int13: the cat is big; int13 & sent11 -> int14: the cat sees the cat; int14 & sent6 -> int15: the cat is big; int15 & sent11 -> int16: the cat sees the cat; int16 & sent6 -> int17: the cat is big; sent11 & int17 -> int18: the cat sees the cat; sent11 & int17 -> int19: the cat sees the cat; sent11 & int17 -> int20: the cat sees the cat; sent11 & int17 -> int21: the cat sees the cat; sent11 & int17 -> int22: the cat sees the cat; sent11 & int17 -> int23: the cat sees the cat; sent11 & int17 -> int24: the cat sees the cat; sent11 & int17 -> int25: the cat sees the cat; sent11 & int17 -> int26: the cat sees the cat; sent11 & int17 -> int27: the cat sees the cat; sent11 & int17 -> int28: the cat sees the cat; sent11 & int17 -> int29: the cat sees the cat; sent11 & int17 -> int30: the cat sees the cat", "score": 0.5068615078926086, "hypothesis": "the cat likes the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent13 & sent2 -> int1: the rabbit sees the cat; int1 & sent6 -> int2: the cat is big; int2 & sent11 -> int3: the cat sees the cat; int3 & sent6 -> int4: the cat is big; sent11 & int4 -> int5: the cat sees the kitty; int3 & sent6 -> int6: the cat is big; sent11 & int2 -> int7: the cat sees the cat; int7 & sent6 -> int8: the cat is big; int8 & sent11 -> int9: the cat sees the cat; int9 & sent6 -> int10: the cat is big; int10 & sent8 & sent8 -> int11: the cat likes the cat; int11 & sent11 -> hypothesis", "score": 0.5455144047737122, "hypothesis": "i don't think the cat likes the cat", "context": {"sent1": "all green people are big", "sent2": "if someone sees the rabbit then the rabbit sees the cat", "sent3": "the rabbit sees the mouse", "sent4": "if someone likes the bear then the bear likes the cat", "sent5": "the mouse is not young", "sent6": "if someone sees the cat then the cat is big", "sent7": "if someone is young and green then they like the bear", "sent8": "the mouse does not like the bear", "sent9": "the bear likes the mouse", "sent10": "the mouse is not kind", "sent11": "if someone is big then they see the cat", "sent12": "the cat needs the rabbit", "sent13": "the mouse sees the rabbit"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 1.0, "hypothesis": "the cow sees the rabbit", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the cow sees the rabbit", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 0.9999991059303284, "hypothesis": "the cow does not see the rabbit", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the cow does not see the rabbit", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent6 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent4 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent4 -> hypothesis", "score": 0.9396843910217285, "hypothesis": "the dog eats the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent4 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent4 & sent7 -> hypothesis;"], "proof_pred": "sent4 & sent7 -> hypothesis", "score": 0.9383149743080139, "hypothesis": "i don't think the dog eats the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent4 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent4 & sent7 -> hypothesis;"], "proof_pred": "sent4 & sent7 -> hypothesis", "score": 0.9391281604766846, "hypothesis": "the dog does not eat the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent4 & sent7 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent4 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent4 -> hypothesis", "score": 0.9404823780059814, "hypothesis": "i don't think the dog does not eat the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent4 & sent7 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis;"], "proof_pred": "sent2 & sent3 -> int1: the cow sees the bear; int1 & sent7 -> hypothesis", "score": 0.9462584257125854, "hypothesis": "the cow eats the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis;"], "proof_pred": "sent3 & sent2 -> int1: the cow sees the bear; int1 & sent7 -> hypothesis", "score": 0.9444525837898254, "hypothesis": "i don't think the cow eats the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis;"], "proof_pred": "sent3 & sent2 -> int1: the cow sees the bear; int1 & sent7 -> hypothesis", "score": 0.944553792476654, "hypothesis": "the cow does not eat the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis;"], "proof_pred": "sent2 & sent3 -> int1: the cow sees the bear; int1 & sent7 -> hypothesis", "score": 0.9457300901412964, "hypothesis": "i don't think the cow does not eat the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": "sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> int2: the cow eats the dog; sent4 & sent7 -> int3: the dog eats the dog; int3 & sent2 & sent3 -> int4: the cow sees the bear; sent7 & int4 -> int5: the cow eats the dog; sent2 & sent3 -> int6: the cow sees the bear; int2 & int6 & sent7 -> int7: the cow ate the dog; int7 & sent9 & sent2 -> int8: the bear sees the rabbit; int2 & sent6 & sent8 -> int9: the bear sees the dog; int9 & sent9 & sent2 -> int10: the bear sees the bear; int10 & sent7 -> hypothesis", "score": 0.5340457558631897, "hypothesis": "the bear does not eat the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent3 -> int1: the cow sees the bear; sent7 & int1 -> int2: the cow eats the dog; sent7 & sent4 -> int3: the dog eats the dog; int3 & sent2 & sent3 -> int4: the cow sees the bull; sent7 & int4 -> hypothesis", "score": 0.5283825397491455, "hypothesis": "i don't think the bear does not eat the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.7991081476211548, "hypothesis": "the rabbit eats the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.7955721616744995, "hypothesis": "i don't think the rabbit eats the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5420578718185425, "hypothesis": "the cow does not see the cow", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent3 -> int1: the cow sees the bear; int1 & sent7 -> int2: the cow eats the dog; int1 & sent7 -> int3: the cow eats the dog; int2 & int3 & sent2 -> int4: the cow sees the cow; int4 & sent9 & sent2 -> hypothesis", "score": 0.7318610548973083, "hypothesis": "i don't think the cow does not see the cow", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5022929310798645, "hypothesis": "the bear is young", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.49731209874153137, "hypothesis": "i don't think the bear is young", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.594144344329834, "hypothesis": "the bear does not see the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5868877172470093, "hypothesis": "i don't think the bear does not see the dog", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5379260182380676, "hypothesis": "the bear eats the cow", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5490536093711853, "hypothesis": "i don't think the bear eats the cow", "context": {"sent1": "the dog is kind", "sent2": "the rabbit sees the cow", "sent3": "if someone sees the cow then the cow sees the bear", "sent4": "the dog sees the bear", "sent5": "the bear is kind", "sent6": "the cow sees the rabbit", "sent7": "if someone sees the bear then they eat the dog", "sent8": "the bear is big", "sent9": "the bear eats the rabbit"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent5 & sent7 & sent11 -> hypothesis;", "sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "fiona is round", "context": {"sent1": "furry, kind people are smart", "sent2": "if fiona is quiet and fiona is blue then fiona is furry", "sent3": "fiona is round", "sent4": "all kind, furry people are blue", "sent5": "fiona is furry", "sent6": "fiona is blue", "sent7": "fiona is big", "sent8": "if fiona is blue then fiona is not quiet", "sent9": "fiona is smart", "sent10": "big, quiet people are not round", "sent11": "if fiona is furry and fiona is big then fiona is round", "sent12": "fiona is not kind", "sent13": "fiona is not quiet"}, "proof_gt": "sent5 & sent7 & sent11 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent5 & sent7 & sent11 -> hypothesis;", "sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.999998927116394, "hypothesis": "i don't think fiona is round", "context": {"sent1": "furry, kind people are smart", "sent2": "if fiona is quiet and fiona is blue then fiona is furry", "sent3": "fiona is round", "sent4": "all kind, furry people are blue", "sent5": "fiona is furry", "sent6": "fiona is blue", "sent7": "fiona is big", "sent8": "if fiona is blue then fiona is not quiet", "sent9": "fiona is smart", "sent10": "big, quiet people are not round", "sent11": "if fiona is furry and fiona is big then fiona is round", "sent12": "fiona is not kind", "sent13": "fiona is not quiet"}, "proof_gt": "sent5 & sent7 & sent11 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 0.8324995636940002, "hypothesis": "fiona is kind", "context": {"sent1": "furry, kind people are smart", "sent2": "if fiona is quiet and fiona is blue then fiona is furry", "sent3": "fiona is round", "sent4": "all kind, furry people are blue", "sent5": "fiona is furry", "sent6": "fiona is blue", "sent7": "fiona is big", "sent8": "if fiona is blue then fiona is not quiet", "sent9": "fiona is smart", "sent10": "big, quiet people are not round", "sent11": "if fiona is furry and fiona is big then fiona is round", "sent12": "fiona is not kind", "sent13": "fiona is not quiet"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is kind", "context": {"sent1": "furry, kind people are smart", "sent2": "if fiona is quiet and fiona is blue then fiona is furry", "sent3": "fiona is round", "sent4": "all kind, furry people are blue", "sent5": "fiona is furry", "sent6": "fiona is blue", "sent7": "fiona is big", "sent8": "if fiona is blue then fiona is not quiet", "sent9": "fiona is smart", "sent10": "big, quiet people are not round", "sent11": "if fiona is furry and fiona is big then fiona is round", "sent12": "fiona is not kind", "sent13": "fiona is not quiet"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 1.0, "hypothesis": "harry is nice", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent5 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent5 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 0.9999993443489075, "hypothesis": "i don't think harry is nice", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent5 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 0.9999966621398926, "hypothesis": "fiona is not kind", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent8 -> hypothesis;"], "proof_pred": "sent8 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is not kind", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent8 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent9 & sent10 -> hypothesis;"], "proof_pred": "sent10 & sent9 -> hypothesis", "score": 0.9399893879890442, "hypothesis": "erin is big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent9 & sent10 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent9 & sent10 -> hypothesis;"], "proof_pred": "sent9 & sent10 -> hypothesis", "score": 0.9392414689064026, "hypothesis": "i don't think erin is big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent9 & sent10 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent5 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent5 -> hypothesis", "score": 0.9474877715110779, "hypothesis": "harry is not kind", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent5 & sent11 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent5 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent5 -> hypothesis", "score": 0.9477768540382385, "hypothesis": "i don't think harry is not kind", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent5 & sent11 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; int1 & sent3 & sent6 -> hypothesis", "score": 0.9014414548873901, "hypothesis": "charlie is round", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; int1 & sent3 & sent6 -> hypothesis", "score": 0.9018929600715637, "hypothesis": "i don't think charlie is round", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; int1 & sent3 & sent6 -> hypothesis", "score": 0.9015762209892273, "hypothesis": "charlie is not round", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; int1 & sent3 & sent6 -> hypothesis", "score": 0.901355504989624, "hypothesis": "i don't think charlie is not round", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; sent3 & int1 & sent6 -> int2: charlie is round; sent9 & int2 -> hypothesis", "score": 0.9220117926597595, "hypothesis": "charlie is big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; sent3 & int1 & sent6 -> int2: charlie is round; sent9 & int2 -> hypothesis", "score": 0.9220836162567139, "hypothesis": "i don't think charlie is big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; int1 & sent3 & sent6 -> int2: charlie is round; sent9 & int2 -> hypothesis", "score": 0.9267897605895996, "hypothesis": "charlie is not big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent3 -> int1: charlie is kind; sent3 & int1 & sent6 -> int2: charlie is round; sent9 & int2 -> hypothesis", "score": 0.9223570227622986, "hypothesis": "i don't think charlie is not big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": "sent3 & sent11 -> int1: charlie is kind; sent3 & sent6 & int1 -> int2: charlie is round; sent9 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent9 & sent10 -> int1: erin is big; int1 & sent11 -> int2: erin is nice; sent6 & sent8 & int2 -> int3: fiona is round; sent9 & int3 -> hypothesis", "score": 0.693516731262207, "hypothesis": "fiona is not big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent9 & sent10 -> int1: erin is big; int1 & sent8 & sent6 -> int2: fiona is round; int2 & sent9 -> hypothesis", "score": 0.7885817289352417, "hypothesis": "i don't think fiona is not big", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent10 & sent9 -> int1: erin is big; int1 & sent5 -> int2: erin is nice; int2 & sent11 -> hypothesis", "score": 0.7977067828178406, "hypothesis": "erin is kind", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent10 & sent9 -> int1: erin is big; int1 & sent5 -> int2: erin is nice; int2 & sent11 -> hypothesis", "score": 0.7894099950790405, "hypothesis": "i don't think erin is kind", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent3 -> int1: harry is kind; sent5 & int1 & sent6 -> int2: fiona is round; int2 & sent9 -> int3: fiona is big; sent6 & sent10 & sent5 -> int4: fiona is nice; int4 & sent8 & sent6 -> hypothesis", "score": 0.7125884890556335, "hypothesis": "fiona is not round", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent3 -> int1: harry is kind; sent5 & int1 & sent6 -> int2: fiona is round; int2 & sent7 & sent10 -> hypothesis", "score": 0.8416229486465454, "hypothesis": "i don't think fiona is not round", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6760079264640808, "hypothesis": "charlie is cold", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6767690777778625, "hypothesis": "i don't think charlie is cold", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8581297397613525, "hypothesis": "harry is not green", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8730000257492065, "hypothesis": "i don't think harry is not green", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5249911546707153, "hypothesis": "charlie is blue", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5187500715255737, "hypothesis": "i don't think charlie is blue", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.7089958786964417, "hypothesis": "erin is not green", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.4950316846370697, "hypothesis": "i don't think erin is not green", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent8 & sent6 & sent10 -> int1: fiona is round; sent9 & int1 -> int2: fiona is big; sent6 & sent10 & sent5 -> int3: fiona is nice; int3 & sent8 & sent6 -> int4: fiona is round; int4 & sent9 -> int5: fiona is big; sent2 & int2 & sent8 -> hypothesis", "score": 0.8075041770935059, "hypothesis": "fiona is green", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent8 & sent6 & sent10 -> int1: fiona is round; int1 & sent9 -> int2: fiona is big; sent8 & sent10 & sent6 -> int3: fiona is nice; int3 & sent8 & sent6 -> int4: fiona is round; int4 & sent9 -> int5: fiona is big; sent2 & sent8 & int3 -> int6: fiona is not nice; int2 & sent9 -> int7: fiona is big; int2 & sent7 -> int8: fiona is big; sent8 & sent6 & int3 -> int9: fiona is round; sent9 & int4 -> int10: fiona is big; sent7 & int2 & int10 -> int11: fiona is big; int11 & sent7 & int4 -> int12: fiona is big; int12 & sent7 & int4 -> int13: fiona is big; int13 & sent7 & int4 -> int14: fiona is big; int14 & sent7 & int4 -> int15: fiona is big; int15 & sent8 & sent6 -> int16: fiona is round; int15 & sent7 & int4 -> int17: fiona is big; int15 & sent7 & int4 -> int18: fiona is big; int15 & sent7 & int4 -> int19: fiona is big; int15 & sent7 & int4 -> int20: fiona is big; int15 & sent7 & int4 -> int21: fiona is big; int15 & sent7 & int4 -> int22: fiona is big; int15 & sent7 & int4 -> int23: fiona is big; int15 & sent7 & int4 -> int24: fiona is big; int15 & sent7 & int4 -> int25: fiona is big; int15 & sent7 & int4 -> int26: fiona is big; int15 & sent7 & int4 -> int27: fiona is big; int15 & sent7 & int4 -> int28: fiona is big; int15 & sent7 & int4 -> int29: fiona is big; int15 & sent7 & int4 -> int30: fiona is big", "score": 0.6220118999481201, "hypothesis": "i don't think fiona is green", "context": {"sent1": "if someone is cold and not green then they are not nice", "sent2": "green, kind people are big", "sent3": "charlie is nice", "sent4": "cold, nice people are not big", "sent5": "harry is nice", "sent6": "all nice, kind people are round", "sent7": "if erin is blue and erin is round then erin is not nice", "sent8": "fiona is kind", "sent9": "if someone is round then they are big", "sent10": "erin is round", "sent11": "all nice people are kind", "sent12": "if erin is not nice then erin is not big"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis;", "sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 1.0, "hypothesis": "dave is nice", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis;", "sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 0.9999993443489075, "hypothesis": "i don't think dave is nice", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis;", "sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 0.9999047517776489, "hypothesis": "dave is not nice", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis;", "sent10 -> hypothesis;"], "proof_pred": "sent10 -> hypothesis", "score": 1.0, "hypothesis": "i don't think dave is not nice", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9457672238349915, "hypothesis": "dave is smart", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9430767297744751, "hypothesis": "i don't think dave is smart", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9429446458816528, "hypothesis": "dave is not smart", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent11 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> hypothesis", "score": 0.9467626810073853, "hypothesis": "i don't think dave is not smart", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> int1: dave is smart; int1 & sent7 -> hypothesis", "score": 0.947445273399353, "hypothesis": "dave is kind", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> int1: dave is smart; int1 & sent7 -> hypothesis", "score": 0.9465928673744202, "hypothesis": "i don't think dave is kind", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> int1: dave is smart; int1 & sent7 -> hypothesis", "score": 0.9458799958229065, "hypothesis": "dave is not kind", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent1 -> int1: dave is smart; int1 & sent7 -> hypothesis", "score": 0.9475148916244507, "hypothesis": "i don't think dave is not kind", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> int3: dave is nice; sent9 & int3 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent5 & sent11 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: dave is smart; int1 & sent7 -> int2: dave is kind; int2 & sent10 & sent9 -> hypothesis", "score": 0.8757250905036926, "hypothesis": "dave is round", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> int3: dave is nice; sent9 & int3 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent5 & sent11 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: dave is smart; int1 & sent7 -> int2: dave is kind; int2 & sent10 & sent9 -> hypothesis", "score": 0.8741132020950317, "hypothesis": "i don't think dave is round", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> int3: dave is nice; sent9 & int3 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent5 & sent11 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: dave is smart; int1 & sent7 -> int2: dave is kind; int2 & sent10 & sent9 -> hypothesis", "score": 0.8753983974456787, "hypothesis": "dave is not round", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent4 & sent8 & int2 -> int3: dave is nice; sent9 & int3 & int2 -> hypothesis;", "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent5 & sent11 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent11 -> int1: dave is smart; int1 & sent7 -> int2: dave is kind; int2 & sent10 & sent9 -> hypothesis", "score": 0.873520016670227, "hypothesis": "i don't think dave is not round", "context": {"sent1": "all blue people are smart", "sent2": "if someone is round and nice then they are blue", "sent3": "kind, round people are white", "sent4": "if someone is cold and kind then they are nice", "sent5": "all kind, blue people are round", "sent6": "round people are kind", "sent7": "smart people are kind", "sent8": "dave is cold", "sent9": "if dave is kind and dave is nice then dave is round", "sent10": "dave is nice", "sent11": "dave is blue"}, "proof_gt": "sent1 & sent11 -> int1: dave is smart; sent7 & int1 -> int2: dave is kind; sent9 & sent10 & int2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 1.0, "hypothesis": "bob is red", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is red", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent4 & sent11 -> hypothesis;", "sent5 -> hypothesis;", "sent8 & sent12 -> int1: fiona is kind; sent11 & int1 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 0.9977712035179138, "hypothesis": "fiona is not nice", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent4 & sent11 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent4 & sent11 -> hypothesis;", "sent5 -> hypothesis;", "sent8 & sent12 -> int1: fiona is kind; sent11 & int1 -> hypothesis;"], "proof_pred": "sent5 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is not nice", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent4 & sent11 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent8 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> hypothesis", "score": 0.9452242255210876, "hypothesis": "fiona is quiet", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent8 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> hypothesis", "score": 0.9438828825950623, "hypothesis": "i don't think fiona is quiet", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent12 & sent14 -> int1: bob is kind; sent11 & int1 -> int2: bob is nice; sent2 & sent3 & int2 -> int3: bob is quiet; sent10 & int3 -> int4: bob is cold; sent7 & int4 -> hypothesis;", "sent1 & sent14 -> int1: bob is quiet; sent10 & int1 -> int2: bob is cold; sent7 & int2 -> hypothesis;", "sent7 & sent15 -> hypothesis;", "sent10 & sent13 -> int1: bob is cold; sent7 & int1 -> hypothesis;", "sent2 & sent6 -> int1: bob is nice; sent2 & sent3 & int1 -> int2: bob is quiet; sent10 & int2 -> int3: bob is cold; sent7 & int3 -> hypothesis;"], "proof_pred": "sent15 & sent7 -> hypothesis", "score": 0.9446475505828857, "hypothesis": "bob is not furry", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent12 & sent14 -> int1: bob is kind; sent11 & int1 -> int2: bob is nice; sent2 & sent3 & int2 -> int3: bob is quiet; sent10 & int3 -> int4: bob is cold; sent7 & int4 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent12 & sent14 -> int1: bob is kind; sent11 & int1 -> int2: bob is nice; sent2 & sent3 & int2 -> int3: bob is quiet; sent10 & int3 -> int4: bob is cold; sent7 & int4 -> hypothesis;", "sent1 & sent14 -> int1: bob is quiet; sent10 & int1 -> int2: bob is cold; sent7 & int2 -> hypothesis;", "sent7 & sent15 -> hypothesis;", "sent10 & sent13 -> int1: bob is cold; sent7 & int1 -> hypothesis;", "sent2 & sent6 -> int1: bob is nice; sent2 & sent3 & int1 -> int2: bob is quiet; sent10 & int2 -> int3: bob is cold; sent7 & int3 -> hypothesis;"], "proof_pred": "sent15 & sent7 -> hypothesis", "score": 0.9442528486251831, "hypothesis": "i don't think bob is not furry", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent12 & sent14 -> int1: bob is kind; sent11 & int1 -> int2: bob is nice; sent2 & sent3 & int2 -> int3: bob is quiet; sent10 & int3 -> int4: bob is cold; sent7 & int4 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> hypothesis", "score": 0.9456287026405334, "hypothesis": "fiona is cold", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> hypothesis", "score": 0.9447856545448303, "hypothesis": "i don't think fiona is cold", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> hypothesis", "score": 0.944327712059021, "hypothesis": "fiona is not cold", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> hypothesis", "score": 0.9454768300056458, "hypothesis": "i don't think fiona is not cold", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> int2: fiona is cold; int2 & sent7 -> hypothesis", "score": 0.9617834091186523, "hypothesis": "fiona is furry", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> int2: fiona is cold; int2 & sent7 -> hypothesis", "score": 0.9601241946220398, "hypothesis": "i don't think fiona is furry", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> int2: fiona is cold; int2 & sent7 -> hypothesis", "score": 0.9586688876152039, "hypothesis": "fiona is not furry", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis;"], "proof_pred": "sent8 & sent1 -> int1: fiona is quiet; int1 & sent10 -> int2: fiona is cold; int2 & sent7 -> hypothesis", "score": 0.961694061756134, "hypothesis": "i don't think fiona is not furry", "context": {"sent1": "all white things are quiet", "sent2": "bob is red", "sent3": "nice, red things are quiet", "sent4": "fiona is kind", "sent5": "fiona is nice", "sent6": "all red things are nice", "sent7": "all cold things are furry", "sent8": "fiona is white", "sent9": "if bob is kind and bob is furry then bob is white", "sent10": "if something is quiet then it is cold", "sent11": "kind things are nice", "sent12": "white things are kind", "sent13": "bob is quiet", "sent14": "bob is white", "sent15": "bob is cold"}, "proof_gt": "sent1 & sent8 -> int1: fiona is quiet; sent10 & int1 -> int2: fiona is cold; sent7 & int2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent1 & sent5 -> int1: the dog chases the squirrel; sent16 & int1 -> hypothesis;", "sent3 & sent9 & sent20 -> int1: the dog chases the squirrel; sent16 & int1 -> hypothesis;", "sent16 & sent21 -> hypothesis;", "sent17 -> hypothesis;"], "proof_pred": "sent17 -> hypothesis", "score": 1.0, "hypothesis": "the dog eats the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent5 -> int1: the dog chases the squirrel; sent16 & int1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 & sent5 -> int1: the dog chases the squirrel; sent16 & int1 -> hypothesis;", "sent3 & sent9 & sent20 -> int1: the dog chases the squirrel; sent16 & int1 -> hypothesis;", "sent16 & sent21 -> hypothesis;", "sent17 -> hypothesis;"], "proof_pred": "sent17 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the dog eats the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent5 -> int1: the dog chases the squirrel; sent16 & int1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 0.9998982548713684, "hypothesis": "the squirrel does not like the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the squirrel does not like the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9382966756820679, "hypothesis": "the squirrel chases the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9400522708892822, "hypothesis": "i don't think the squirrel chases the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.9419965744018555, "hypothesis": "the squirrel does not chase the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent6 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> hypothesis", "score": 0.93894362449646, "hypothesis": "i don't think the squirrel does not chase the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; int1 & sent16 -> hypothesis", "score": 0.9468640685081482, "hypothesis": "the squirrel eats the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; int1 & sent16 -> hypothesis", "score": 0.9455126523971558, "hypothesis": "i don't think the squirrel eats the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; int1 & sent16 -> hypothesis", "score": 0.9450659155845642, "hypothesis": "the squirrel does not eat the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; int1 & sent16 -> hypothesis", "score": 0.946889340877533, "hypothesis": "i don't think the squirrel does not eat the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; int1 & sent16 -> int2: the squirrel eats the squirrel; int2 & sent12 -> hypothesis", "score": 0.9604238271713257, "hypothesis": "the squirrel likes the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis", "score": 0.9464610815048218, "hypothesis": "i don't think the squirrel likes the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis", "score": 0.9470747113227844, "hypothesis": "the squirrel does not like the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis;"], "proof_pred": "sent6 & sent1 -> int1: the squirrel chases the squirrel; int1 & sent16 -> int2: the squirrel eats the squirrel; int2 & sent12 -> hypothesis", "score": 0.9618091583251953, "hypothesis": "i don't think the squirrel does not like the squirrel", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": "sent1 & sent6 -> int1: the squirrel chases the squirrel; sent16 & int1 -> int2: the squirrel eats the squirrel; sent12 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent18 & sent7 & sent17 -> int1: the squirrel eats the dog; int1 & sent15 -> int2: the squirrel ate the dog; int2 & int1 & sent15 -> int3: the squirrel eats the dog; sent15 & int3 & sent9 -> int4: the dog eats the dog; int4 & sent7 & sent17 -> hypothesis", "score": 0.7688086628913879, "hypothesis": "the dog does not eat the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent21 & sent16 -> int1: the dog eats the squirrel; int1 & sent18 & sent7 -> hypothesis", "score": 0.849490761756897, "hypothesis": "i don't think the dog does not eat the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5380927324295044, "hypothesis": "the dog is nice", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5329468846321106, "hypothesis": "i don't think the dog is nice", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent9 & sent20 -> int1: the dog chases the squirrel; sent20 & int1 & sent3 -> int2: the dog tries the squirrel; int2 & sent21 & sent16 -> int3: the dog eats the squirrel; int3 & sent18 & sent7 -> int4: the dog eats the dog; int4 & sent7 & sent18 -> int5: the squirrel eats the dog; int4 & sent15 & sent9 -> hypothesis", "score": 0.5812232494354248, "hypothesis": "the dog does not chase the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent9 & sent20 -> int1: the dog chases the squirrel; int1 & sent21 & sent16 -> int2: the dog ate the squirrel; int1 & sent16 -> int3: the dog eats the squirrel; int2 & int3 & sent12 -> int4: the dog loves the squirrel; sent3 & int4 & sent20 -> int5: the dog chases the squirrel; int5 & sent9 & sent3 -> int6: the dog chases the dog; int6 & sent7 & sent17 -> hypothesis", "score": 0.5310946702957153, "hypothesis": "i don't think the dog does not chase the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8247895240783691, "hypothesis": "the dog likes the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8296594619750977, "hypothesis": "i don't think the dog likes the dog", "context": {"sent1": "if something is blue then it chases the squirrel", "sent2": "the squirrel likes the dog", "sent3": "the squirrel is nice", "sent4": "the squirrel is young", "sent5": "the dog is blue", "sent6": "the squirrel is blue", "sent7": "if the dog eats the squirrel and the squirrel is red then the squirrel eats the dog", "sent8": "the squirrel chases the dog", "sent9": "the dog likes the squirrel", "sent10": "the squirrel is cold", "sent11": "if the squirrel chases the dog and the dog is young then the squirrel is red", "sent12": "if something eats the squirrel then it likes the squirrel", "sent13": "the dog is young", "sent14": "the dog is cold", "sent15": "the squirrel eats the dog", "sent16": "if something chases the squirrel then it eats the squirrel", "sent17": "the dog eats the squirrel", "sent18": "the squirrel is red", "sent19": "the dog is red", "sent20": "if something likes the squirrel and the squirrel is nice then it chases the squirrel", "sent21": "the dog chases the squirrel"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 1.0, "hypothesis": "the tiger is round", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.9999995231628418, "hypothesis": "i don't think the tiger is round", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 0.9984524846076965, "hypothesis": "the tiger is not round", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent1 -> hypothesis;"], "proof_pred": "sent1 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the tiger is not round", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9456104636192322, "hypothesis": "the tiger is kind", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.945419192314148, "hypothesis": "i don't think the tiger is kind", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9426457285881042, "hypothesis": "the tiger is not kind", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent7 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> hypothesis", "score": 0.9459802508354187, "hypothesis": "i don't think the tiger is not kind", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the tiger is kind; int1 & sent5 -> hypothesis", "score": 0.9451587796211243, "hypothesis": "the tiger is rough", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the tiger is kind; int1 & sent5 -> hypothesis", "score": 0.9446603655815125, "hypothesis": "i don't think the tiger is rough", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the tiger is kind; sent5 & int1 -> hypothesis", "score": 0.9446311593055725, "hypothesis": "the tiger is not rough", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the tiger is kind; int1 & sent5 -> hypothesis", "score": 0.9450576305389404, "hypothesis": "i don't think the tiger is not rough", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent7 -> int1: the tiger is kind; int1 & sent5 -> int2: the tiger is rough; int2 & sent4 -> hypothesis", "score": 0.9639036059379578, "hypothesis": "the tiger is cold", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the tiger is kind; int1 & sent5 -> int2: the tiger is rough; int2 & sent4 -> hypothesis", "score": 0.9642161726951599, "hypothesis": "i don't think the tiger is cold", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis;"], "proof_pred": "sent7 & sent1 -> int1: the tiger is kind; int1 & sent5 -> int2: the tiger is rough; int2 & sent4 -> hypothesis", "score": 0.9595402479171753, "hypothesis": "the tiger is not cold", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis;"], "proof_pred": "sent1 & sent7 -> int1: the tiger is kind; int1 & sent5 -> int2: the tiger is rough; int2 & sent4 -> hypothesis", "score": 0.9641000628471375, "hypothesis": "i don't think the tiger is not cold", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6025089621543884, "hypothesis": "the tiger does not need the tiger", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> int3: the tiger is cold; sent1 & int3 & sent6 -> int4: the tiger is kind; sent5 & int4 -> int5: the tiger is rough; int5 & sent4 -> int6: the tiger gets cold; int6 & sent1 & sent6 -> int7: the tiger is kind; int3 & sent3 & sent1 -> int8: the tiger gets rough; int8 & sent4 -> int9: the tiger is cold; int9 & sent1 & sent6 -> int10: the tiger is kind; sent5 & int10 -> int11: the tiger is rough; int11 & sent4 -> int12: the tiger is cold; sent1 & int12 & sent6 -> hypothesis", "score": 0.4522480070590973, "hypothesis": "i don't think the tiger does not need the tiger", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.569984495639801, "hypothesis": "the tiger is green", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> int3: the tiger is cold; sent1 & int3 & sent6 -> int4: the tiger is kind; sent5 & int4 -> int5: the tiger is rough; sent4 & int5 -> int6: the tiger is cold; sent1 & int6 & sent6 -> int7: the tiger is kind; int3 & sent1 & sent6 -> int8: the tiger is nice; int8 & sent5 -> int9: the tiger is rough; sent4 & int9 -> int10: the tiger is cold; int10 & sent1 & sent6 -> int11: the tiger is kind; int11 & sent5 -> int12: the tiger is rough; sent4 & int12 -> int13: the tiger is cold; int13 & sent6 & sent1 -> int14: the tiger is kind; sent7 & sent1 -> int15: the tiger is kind; int15 & sent7 & sent1 -> int16: the tiger is kind; sent7 & sent1 -> int17: the tiger is kind; sent7 & sent1 -> int18: the tiger is kind; sent7 & sent1 -> int19: the tiger is kind; sent7 & sent1 -> int20: the tiger is kind; int20 & sent6 & sent1 -> int21: the tiger is kind; sent7 & sent1 -> int22: the tiger is kind; sent7 & sent1 -> int23: the tiger is kind; sent7 & sent1 -> int24: the tiger is kind; sent7 & sent1 -> int25: the tiger is kind; sent7 & sent1 -> int26: the tiger is kind; sent7 & sent1 -> int27: the tiger is kind; sent7 & sent1 -> int28: the tiger is kind; sent7 & sent1 -> int29: the tiger is kind; sent7 & sent1 -> int30: the tiger is kind", "score": 0.5445563197135925, "hypothesis": "i don't think the tiger is green", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.591431200504303, "hypothesis": "the tiger does not like the tiger", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> int3: the tiger is cold; sent1 & int3 & sent6 -> int4: the tiger is kind; sent5 & int4 -> int5: the tiger is rough; int5 & sent4 -> int6: the tiger gets cold; int6 & sent1 & sent6 -> int7: the tiger is kind; int3 & sent3 & sent1 -> int8: the tiger gets rough; int8 & sent4 -> int9: the tiger is cold; int9 & sent1 & sent6 -> int10: the tiger is kind; sent5 & int10 -> int11: the tiger is rough; int11 & sent4 -> int12: the tiger is cold; sent1 & int12 & sent6 -> hypothesis", "score": 0.44844114780426025, "hypothesis": "i don't think the tiger does not like the tiger", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent7 -> int1: the tiger is kind; sent5 & int1 -> int2: the tiger is rough; sent4 & int2 -> int3: the tiger is cold; sent1 & int3 & sent6 -> int4: the tiger is kind; sent5 & int4 -> int5: the tiger is rough; int5 & sent4 -> int6: the tiger is cool; int6 & sent3 & sent1 -> int7: the tiger is rough; int7 & sent4 -> int8: the tiger is cold; int8 & sent1 & sent6 -> int9: the tiger is kind; int6 & sent1 & sent6 -> int10: the tiger is nice; int5 & sent5 -> int11: the tiger is rough; int11 & sent4 -> int12: the tiger is cold; int12 & sent1 & sent6 -> hypothesis", "score": 0.448123037815094, "hypothesis": "the tiger chases the tiger", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.5499709844589233, "hypothesis": "i don't think the tiger chases the tiger", "context": {"sent1": "the tiger is round", "sent2": "all green things are kind", "sent3": "round, cold things are rough", "sent4": "rough things are cold", "sent5": "all kind things are rough", "sent6": "all cold, round things are kind", "sent7": "all round things are kind"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent8 & sent10 -> hypothesis;", "sent18 -> hypothesis;"], "proof_pred": "sent18 -> hypothesis", "score": 0.9999993443489075, "hypothesis": "the bald eagle needs the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent8 & sent10 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent8 & sent10 -> hypothesis;", "sent18 -> hypothesis;"], "proof_pred": "sent18 -> hypothesis", "score": 0.9999847412109375, "hypothesis": "i don't think the bald eagle needs the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent8 & sent10 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent11 -> hypothesis;"], "proof_pred": "sent11 -> hypothesis", "score": 0.9998703002929688, "hypothesis": "the cow is not blue", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent11 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent11 -> hypothesis;"], "proof_pred": "sent11 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the cow is not blue", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent11 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis;", "sent5 & sent21 & sent23 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> hypothesis", "score": 0.8969128131866455, "hypothesis": "the lion is green", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis;", "sent5 & sent21 & sent23 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> hypothesis", "score": 0.8961447477340698, "hypothesis": "i don't think the lion is green", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis;", "sent5 & sent21 & sent23 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> hypothesis", "score": 0.8945054411888123, "hypothesis": "the lion is not green", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis;", "sent5 & sent21 & sent23 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> hypothesis", "score": 0.8965501189231873, "hypothesis": "i don't think the lion is not green", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis;", "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> hypothesis", "score": 0.929469645023346, "hypothesis": "the lion eats the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis;", "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> hypothesis", "score": 0.9267998933792114, "hypothesis": "i don't think the lion eats the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis;", "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> hypothesis", "score": 0.9212468266487122, "hypothesis": "the lion does not eat the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis;", "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> hypothesis", "score": 0.922749400138855, "hypothesis": "i don't think the lion does not eat the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis;", "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> int3: the lion eats the bald eagle; sent12 & int3 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; int1 & sent4 -> int2: the lion eats the bald eagle; int2 & sent12 -> hypothesis", "score": 0.9294611811637878, "hypothesis": "the lion eats the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis;", "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> int3: the lion eats the bald eagle; sent12 & int3 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; int1 & sent4 -> int2: the lion eats the bald eagle; int2 & sent12 -> hypothesis", "score": 0.9308044910430908, "hypothesis": "i don't think the lion eats the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis;", "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> int3: the lion eats the bald eagle; sent12 & int3 -> hypothesis;"], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis", "score": 0.9302332401275635, "hypothesis": "the lion does not eat the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis;", "sent9 & sent12 -> int1: the cow eats the lion; sent21 & sent23 & int1 -> int2: the lion is green; sent4 & int2 -> int3: the lion eats the bald eagle; sent12 & int3 -> hypothesis;"], "proof_pred": "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis", "score": 0.9294390678405762, "hypothesis": "i don't think the lion does not eat the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; int3 & sent13 & sent5 -> int4: the lion is not green; int3 & sent21 & sent23 -> int5: the lion is green; int3 & sent21 & sent23 -> int6: the lion is green; int3 & sent21 & sent23 -> int7: the lion is green; int3 & sent21 & sent23 -> int8: the lion is green; int3 & sent21 & sent23 -> int9: the lion is green; int3 & sent21 & sent23 -> int10: the lion is green; int3 & sent21 & sent23 -> int11: the lion is green; int3 & sent21 & sent23 -> int12: the lion is green; int3 & sent21 & sent23 -> int13: the lion is green; int3 & sent21 & sent23 -> int14: the lion is green; int3 & sent21 & sent23 -> int15: the lion is green; int3 & sent21 & sent23 -> int16: the lion is green; int3 & sent21 & sent23 -> int17: the lion is green; int3 & sent21 & sent23 -> int18: the lion is green; int3 & sent21 & sent23 -> int19: the lion is green; int3 & sent21 & sent23 -> int20: the lion is green; int3 & sent21 & sent23 -> int21: the lion is green; int3 & sent21 & sent23 -> int22: the lion is green; int3 & sent21 & sent23 -> int23: the lion is green; int3 & sent21 & sent23 -> int24: the lion is green; int3 & sent21 & sent23 -> int25: the lion is green; int3 & sent21 & sent23 -> int26: the lion is green; int3 & sent21 & sent23 -> int27: the lion is green; int3 & sent21 & sent23 -> int28: the lion is green; int3 & sent21 & sent23 -> int29: the lion is green; int3 & sent21 & sent23 -> int30: the lion is green", "score": 0.6535815000534058, "hypothesis": "the bald eagle does not eat the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; int3 & sent21 & sent23 -> int4: the lion is green; int3 & sent21 & sent23 -> int5: the lion is green; int3 & sent21 & sent23 -> int6: the lion is green; int3 & sent21 & sent23 -> int7: the lion is green; int3 & sent21 & sent23 -> int8: the lion is green; int3 & sent21 & sent23 -> int9: the lion is green; int3 & sent21 & sent23 -> int10: the lion is green; int3 & sent21 & sent23 -> int11: the lion is green; int3 & sent21 & sent23 -> int12: the lion is green; int3 & sent21 & sent23 -> int13: the lion is green; int3 & sent21 & sent23 -> int14: the lion is green; int3 & sent21 & sent23 -> int15: the lion is green; int3 & sent21 & sent23 -> int16: the lion is green; int3 & sent21 & sent23 -> int17: the lion is green; int3 & sent21 & sent23 -> int18: the lion is green; int3 & sent21 & sent23 -> int19: the lion is green; int3 & sent21 & sent23 -> int20: the lion is green; int3 & sent21 & sent23 -> int21: the lion is green; int3 & sent21 & sent23 -> int22: the lion is green; int3 & sent21 & sent23 -> int23: the lion is green; int3 & sent21 & sent23 -> int24: the lion is green; int3 & sent21 & sent23 -> int25: the lion is green; int3 & sent21 & sent23 -> int26: the lion is green; int3 & sent21 & sent23 -> int27: the lion is green; int3 & sent21 & sent23 -> int28: the lion is green; int3 & sent21 & sent23 -> int29: the lion is green; int3 & sent21 & sent23 -> int30: the lion is green", "score": 0.8585951924324036, "hypothesis": "i don't think the bald eagle does not eat the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent23 & sent21 & sent5 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis", "score": 0.9288256764411926, "hypothesis": "the bald eagle eats the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent23 & sent21 & sent5 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> hypothesis", "score": 0.7166069149971008, "hypothesis": "i don't think the bald eagle eats the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent6 & sent4 -> int2: the lion eats the bald eagle; int1 & sent12 -> int3: the lion eats the lion; int3 & sent23 & sent21 -> int4: the lion is green; sent23 & int3 & sent21 -> int5: the lion is green; sent23 & int3 & sent21 -> int6: the lion is green; sent23 & int3 & sent21 -> int7: the lion is green; sent23 & int3 & sent21 -> int8: the lion is green; sent23 & int3 & sent21 -> int9: the lion is green; sent23 & int3 & sent21 -> int10: the lion is green; sent23 & int3 & sent21 -> int11: the lion is green; sent23 & int3 & sent21 -> int12: the lion is green; sent23 & int3 & sent21 -> int13: the lion is green; sent23 & int3 & sent21 -> int14: the lion is green; sent23 & int3 & sent21 -> int15: the lion is green; sent23 & int3 & sent21 -> int16: the lion is green; sent23 & int3 & sent21 -> int17: the lion is green; sent23 & int3 & sent21 -> int18: the lion is green; sent23 & int3 & sent21 -> int19: the lion is green; sent23 & int3 & sent21 -> int20: the lion is green; sent23 & int3 & sent21 -> int21: the lion is green; sent23 & int3 & sent21 -> int22: the lion is green; sent23 & int3 & sent21 -> int23: the lion is green; sent23 & int3 & sent21 -> int24: the lion is green; sent23 & int3 & sent21 -> int25: the lion is green; sent23 & int3 & sent21 -> int26: the lion is green; sent23 & int3 & sent21 -> int27: the lion is green; sent23 & int3 & sent21 -> int28: the lion is green; sent23 & int3 & sent21 -> int29: the lion is green; sent23 & int3 & sent21 -> int30: the lion is green", "score": 0.6948327422142029, "hypothesis": "the cow does not need the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; sent23 & int3 & sent21 -> int4: the lion is green; int3 & sent21 & sent23 -> int5: the lion is green; int3 & sent21 & sent23 -> int6: the lion is green; int3 & sent21 & sent23 -> int7: the lion is green; int3 & sent21 & sent23 -> int8: the lion is green; int3 & sent21 & sent23 -> int9: the lion is green; int3 & sent21 & sent23 -> int10: the lion is green; int3 & sent21 & sent23 -> int11: the lion is green; int3 & sent21 & sent23 -> int12: the lion is green; int3 & sent21 & sent23 -> int13: the lion is green; int3 & sent21 & sent23 -> int14: the lion is green; int3 & sent21 & sent23 -> int15: the lion is green; int3 & sent21 & sent23 -> int16: the lion is green; int3 & sent21 & sent23 -> int17: the lion is green; int3 & sent21 & sent23 -> int18: the lion is green; int3 & sent21 & sent23 -> int19: the lion is green; int3 & sent21 & sent23 -> int20: the lion is green; int3 & sent21 & sent23 -> int21: the lion is green; int3 & sent21 & sent23 -> int22: the lion is green; int3 & sent21 & sent23 -> int23: the lion is green; int3 & sent21 & sent23 -> int24: the lion is green; int3 & sent21 & sent23 -> int25: the lion is green; int3 & sent21 & sent23 -> int26: the lion is green; int3 & sent21 & sent23 -> int27: the lion is green; int3 & sent21 & sent23 -> int28: the lion is green; int3 & sent21 & sent23 -> int29: the lion is green; int3 & sent21 & sent23 -> int30: the lion is green", "score": 0.9054410457611084, "hypothesis": "i don't think the cow does not need the lion", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent23 & sent21 & sent5 -> int1: the lion is green; sent1 & sent4 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; int3 & sent23 & sent21 -> int4: the lion is green; int3 & sent23 & sent21 -> int5: the lion is green; int3 & sent23 & sent21 -> int6: the lion is green; int3 & sent23 & sent21 -> int7: the lion is green; int3 & sent23 & sent21 -> int8: the lion is green; int3 & sent23 & sent21 -> int9: the lion is green; int3 & sent23 & sent21 -> int10: the lion is green; int3 & sent23 & sent21 -> int11: the lion is green; int3 & sent23 & sent21 -> int12: the lion is green; int3 & sent23 & sent21 -> int13: the lion is green; int3 & sent23 & sent21 -> int14: the lion is green; int3 & sent23 & sent21 -> int15: the lion is green; int3 & sent23 & sent21 -> int16: the lion is green; int3 & sent23 & sent21 -> int17: the lion is green; int3 & sent23 & sent21 -> int18: the lion is green; int3 & sent23 & sent21 -> int19: the lion is green; int3 & sent23 & sent21 -> int20: the lion is green; int3 & sent23 & sent21 -> int21: the lion is green; int3 & sent23 & sent21 -> int22: the lion is green; int3 & sent23 & sent21 -> int23: the lion is green; int3 & sent23 & sent21 -> int24: the lion is green; int3 & sent23 & sent21 -> int25: the lion is green; int3 & sent23 & sent21 -> int26: the lion is green; int3 & sent23 & sent21 -> int27: the lion is green; int3 & sent23 & sent21 -> int28: the lion is green; int3 & sent23 & sent21 -> int29: the lion is green; int3 & sent23 & sent21 -> int30: the lion is green", "score": 0.7059681415557861, "hypothesis": "the cow needs the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent23 & sent21 & sent5 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; sent22 & sent2 & sent15 -> int4: the lion does not eat the cow; sent22 & sent15 & sent2 -> int5: the cow does not eat the weed; sent22 & sent15 & sent2 -> int6: the cow does not eat the bog; sent22 & sent15 & sent2 -> int7: the cow does not eat the pig; sent23 & int3 & sent21 -> int8: the lion is green; sent23 & int3 & sent21 -> int9: the lion is green; sent23 & int3 & sent21 -> int10: the lion is green; sent23 & int3 & sent21 -> int11: the lion is green; sent23 & int3 & sent21 -> int12: the lion is green; sent23 & int3 & sent21 -> int13: the lion is green; sent23 & int3 & sent21 -> int14: the lion is green; sent23 & int3 & sent21 -> int15: the lion is green; sent23 & int3 & sent21 -> int16: the lion is green; sent23 & int3 & sent21 -> int17: the lion is green; sent23 & int3 & sent21 -> int18: the lion is green; sent23 & int3 & sent21 -> int19: the lion is green; sent23 & int3 & sent21 -> int20: the lion is green; sent23 & int3 & sent21 -> int21: the lion is green; sent23 & int3 & sent21 -> int22: the lion is green; sent23 & int3 & sent21 -> int23: the lion is green; sent23 & int3 & sent21 -> int24: the lion is green; sent23 & int3 & sent21 -> int25: the lion is green; sent23 & int3 & sent21 -> int26: the lion is green; sent23 & int3 & sent21 -> int27: the lion is green; sent23 & int3 & sent21 -> int28: the lion is green; sent23 & int3 & sent21 -> int29: the lion is green; sent23 & int3 & sent21 -> int30: the lion is green", "score": 0.5179521441459656, "hypothesis": "i don't think the cow needs the bald eagle", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent14 & sent23 & sent12 -> int2: the lion eats the lion; int1 & sent21 & sent23 -> int3: the lion is green; sent4 & int3 -> int4: the lion eats the bald eagle; sent23 & int2 & sent21 -> int5: the lion is green; sent23 & int2 & sent21 -> int6: the lion is green; sent23 & int2 & sent21 -> int7: the lion is green; sent23 & int2 & sent21 -> int8: the lion is green; sent23 & int2 & sent21 -> int9: the lion is green; sent23 & int2 & sent21 -> int10: the lion is green; sent23 & int2 & sent21 -> int11: the lion is green; sent23 & int2 & sent21 -> int12: the lion is green; sent23 & int2 & sent21 -> int13: the lion is green; sent23 & int2 & sent21 -> int14: the lion is green; sent23 & int2 & sent21 -> int15: the lion is green; sent23 & int2 & sent21 -> int16: the lion is green; sent23 & int2 & sent21 -> int17: the lion is green; sent23 & int2 & sent21 -> int18: the lion is green; sent23 & int2 & sent21 -> int19: the lion is green; sent23 & int2 & sent21 -> int20: the lion is green; sent23 & int2 & sent21 -> int21: the lion is green; sent23 & int2 & sent21 -> int22: the lion is green; sent23 & int2 & sent21 -> int23: the lion is green; sent23 & int2 & sent21 -> int24: the lion is green; sent23 & int2 & sent21 -> int25: the lion is green; sent23 & int2 & sent21 -> int26: the lion is green; sent23 & int2 & sent21 -> int27: the lion is green; sent23 & int2 & sent21 -> int28: the lion is green; sent23 & int2 & sent21 -> int29: the lion is green; sent23 & int2 & sent21 -> int30: the lion is green", "score": 0.7007513046264648, "hypothesis": "the bald eagle is not round", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent2 & sent23 & sent5 -> int2: the lion is not green; int1 & sent4 -> int3: the lion eats the bald eagle; sent12 & int3 -> int4: the lion eats the lion; int1 & sent4 -> int5: the lion eats the bald eagle; int1 & sent4 -> int6: the lion eats the bald eagle; int1 & sent4 -> int7: the lion eats the bald eagle; int1 & sent4 -> int8: the lion eats the bald eagle; int1 & sent4 -> int9: the lion eats the bald eagle; int1 & sent4 -> int10: the lion eats the bald eagle; int1 & sent4 -> int11: the lion eats the bald eagle; int1 & sent4 -> int12: the lion eats the bald eagle; int1 & sent4 -> int13: the lion eats the bald eagle; int1 & sent4 -> int14: the lion eats the bald eagle; int1 & sent4 -> int15: the lion eats the bald eagle; int1 & sent4 -> int16: the lion eats the bald eagle; int1 & sent4 -> int17: the lion eats the bald eagle; int1 & sent4 -> int18: the lion eats the bald eagle; int1 & sent4 -> int19: the lion eats the bald eagle; int1 & sent4 -> int20: the lion eats the bald eagle; int1 & sent4 -> int21: the lion eats the bald eagle; int1 & sent4 -> int22: the lion eats the bald eagle; int1 & sent4 -> int23: the lion eats the bald eagle; int1 & sent4 -> int24: the lion eats the bald eagle; int1 & sent4 -> int25: the lion eats the bald eagle; int1 & sent4 -> int26: the lion eats the bald eagle; int1 & sent4 -> int27: the lion eats the bald eagle; int1 & sent4 -> int28: the lion eats the bald eagle; int1 & sent4 -> int29: the lion eats the bald eagle; int1 & sent4 -> int30: the lion eats the bald eagle", "score": 0.6832478046417236, "hypothesis": "i don't think the bald eagle is not round", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent21 & sent23 -> int1: the lion is green; sent4 & int1 -> int2: the lion eat the bald eagle; sent12 & int2 -> int3: the lion eats the lion; sent22 & sent8 -> int4: the lion needs the lion; sent22 & int4 & sent16 -> int5: the cow needs the lion; int3 & sent23 & sent21 -> int6: the lion is green; int3 & sent23 & sent21 -> int7: the lion is green; int3 & sent23 & sent21 -> int8: the lion is green; int3 & sent23 & sent21 -> int9: the lion is green; int3 & sent23 & sent21 -> int10: the lion is green; int3 & sent23 & sent21 -> int11: the lion is green; int3 & sent23 & sent21 -> int12: the lion is green; int3 & sent23 & sent21 -> int13: the lion is green; int3 & sent23 & sent21 -> int14: the lion is green; int3 & sent23 & sent21 -> int15: the lion is green; int3 & sent23 & sent21 -> int16: the lion is green; int3 & sent23 & sent21 -> int17: the lion is green; int3 & sent23 & sent21 -> int18: the lion is green; int3 & sent23 & sent21 -> int19: the lion is green; int3 & sent23 & sent21 -> int20: the lion is green; int3 & sent23 & sent21 -> int21: the lion is green; int3 & sent23 & sent21 -> int22: the lion is green; int3 & sent23 & sent21 -> int23: the lion is green; int3 & sent23 & sent21 -> int24: the lion is green; int3 & sent23 & sent21 -> int25: the lion is green; int3 & sent23 & sent21 -> int26: the lion is green; int3 & sent23 & sent21 -> int27: the lion is green; int3 & sent23 & sent21 -> int28: the lion is green; int3 & sent23 & sent21 -> int29: the lion is green; int3 & sent23 & sent21 -> int30: the lion is green", "score": 0.5772403478622437, "hypothesis": "the cow is red", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent21 & sent23 -> int1: the lion is green; sent2 & sent23 & sent15 -> int2: the lion is not green; int1 & sent4 -> int3: the lion eats the bald eagle; sent12 & int3 -> int4: the lion eats the lion; sent4 & int4 -> int5: the lion eats the bald eaves; int4 & sent4 -> int6: the lion eats the bald eagle; int4 & sent4 -> int7: the lion eats the bald eagle; int4 & sent4 -> int8: the lion eats the bald eagle; int4 & sent4 -> int9: the lion eats the bald eagle; int4 & sent4 -> int10: the lion eats the bald eagle; int4 & sent4 -> int11: the lion eats the bald eagle; int4 & sent4 -> int12: the lion eats the bald eagle; int4 & sent4 -> int13: the lion eats the bald eagle; int4 & sent4 -> int14: the lion eats the bald eagle; int4 & sent4 -> int15: the lion eats the bald eagle; int4 & sent4 -> int16: the lion eats the bald eagle; int4 & sent4 -> int17: the lion eats the bald eagle; int4 & sent4 -> int18: the lion eats the bald eagle; int4 & sent4 -> int19: the lion eats the bald eagle; int4 & sent4 -> int20: the lion eats the bald eagle; int4 & sent4 -> int21: the lion eats the bald eagle; int4 & sent4 -> int22: the lion eats the bald eagle; int4 & sent4 -> int23: the lion eats the bald eagle; int4 & sent4 -> int24: the lion eats the bald eagle; int4 & sent4 -> int25: the lion eats the bald eagle; int4 & sent4 -> int26: the lion eats the bald eagle; int4 & sent4 -> int27: the lion eats the bald eagle; int4 & sent4 -> int28: the lion eats the bald eagle; int4 & sent4 -> int29: the lion eats the bald eagle; int4 & sent4 -> int30: the lion eats the bald eagle", "score": 0.5559634566307068, "hypothesis": "i don't think the cow is red", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; sent22 & sent10 & sent8 -> int4: the lion needs the lion; int4 & sent21 & sent23 -> int5: the lion is green; int4 & sent21 & sent23 -> int6: the lion is green; int4 & sent21 & sent23 -> int7: the lion is green; int4 & sent21 & sent23 -> int8: the lion is green; int4 & sent21 & sent23 -> int9: the lion is green; int4 & sent21 & sent23 -> int10: the lion is green; int4 & sent21 & sent23 -> int11: the lion is green; int4 & sent21 & sent23 -> int12: the lion is green; int4 & sent21 & sent23 -> int13: the lion is green; int4 & sent21 & sent23 -> int14: the lion is green; int4 & sent21 & sent23 -> int15: the lion is green; int4 & sent21 & sent23 -> int16: the lion is green; int4 & sent21 & sent23 -> int17: the lion is green; int4 & sent21 & sent23 -> int18: the lion is green; int4 & sent21 & sent23 -> int19: the lion is green; int4 & sent21 & sent23 -> int20: the lion is green; int4 & sent21 & sent23 -> int21: the lion is green; int4 & sent21 & sent23 -> int22: the lion is green; int4 & sent21 & sent23 -> int23: the lion is green; int4 & sent21 & sent23 -> int24: the lion is green; int4 & sent21 & sent23 -> int25: the lion is green; int4 & sent21 & sent23 -> int26: the lion is green; int4 & sent21 & sent23 -> int27: the lion is green; int4 & sent21 & sent23 -> int28: the lion is green; int4 & sent21 & sent23 -> int29: the lion is green; int4 & sent21 & sent23 -> int30: the lion is green", "score": 0.7776356339454651, "hypothesis": "the lion does not like the cow", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent4 & int1 -> int2: the lion eats the bald eagle; sent12 & int2 -> int3: the lion eats the lion; sent22 & sent10 & sent8 -> int4: the lion needs the lion; sent13 & sent4 -> int5: the bald eagle eats the pig; sent23 & int3 & sent21 -> int6: the lion is green; sent23 & int3 & sent21 -> int7: the lion is green; sent23 & int3 & sent21 -> int8: the lion is green; sent23 & int3 & sent21 -> int9: the lion is green; sent23 & int3 & sent21 -> int10: the lion is green; sent23 & int3 & sent21 -> int11: the lion is green; sent23 & int3 & sent21 -> int12: the lion is green; sent23 & int3 & sent21 -> int13: the lion is green; sent23 & int3 & sent21 -> int14: the lion is green; sent23 & int3 & sent21 -> int15: the lion is green; sent23 & int3 & sent21 -> int16: the lion is green; sent23 & int3 & sent21 -> int17: the lion is green; sent23 & int3 & sent21 -> int18: the lion is green; sent23 & int3 & sent21 -> int19: the lion is green; sent23 & int3 & sent21 -> int20: the lion is green; sent23 & int3 & sent21 -> int21: the lion is green; sent23 & int3 & sent21 -> int22: the lion is green; sent23 & int3 & sent21 -> int23: the lion is green; sent23 & int3 & sent21 -> int24: the lion is green; sent23 & int3 & sent21 -> int25: the lion is green; sent23 & int3 & sent21 -> int26: the lion is green; sent23 & int3 & sent21 -> int27: the lion is green; sent23 & int3 & sent21 -> int28: the lion is green; sent23 & int3 & sent21 -> int29: the lion is green; sent23 & int3 & sent21 -> int30: the lion is green", "score": 0.6141454577445984, "hypothesis": "i don't think the lion does not like the cow", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent2 & sent23 & sent5 -> int2: the lion is not green; int1 & sent4 -> int3: the lion eats the bald eagle; sent12 & int3 -> int4: the lion eats the lion; int1 & sent4 -> int5: the lion eats the bald eagle; int1 & sent4 -> int6: the lion eats the bald eagle; int1 & sent4 -> int7: the lion eats the bald eagle; int1 & sent4 -> int8: the lion eats the bald eagle; int1 & sent4 -> int9: the lion eats the bald eagle; int1 & sent4 -> int10: the lion eats the bald eagle; int1 & sent4 -> int11: the lion eats the bald eagle; int1 & sent4 -> int12: the lion eats the bald eagle; int1 & sent4 -> int13: the lion eats the bald eagle; int1 & sent4 -> int14: the lion eats the bald eagle; int1 & sent4 -> int15: the lion eats the bald eagle; int1 & sent4 -> int16: the lion eats the bald eagle; int1 & sent4 -> int17: the lion eats the bald eagle; int1 & sent4 -> int18: the lion eats the bald eagle; int1 & sent4 -> int19: the lion eats the bald eagle; int1 & sent4 -> int20: the lion eats the bald eagle; int1 & sent4 -> int21: the lion eats the bald eagle; int1 & sent4 -> int22: the lion eats the bald eagle; int1 & sent4 -> int23: the lion eats the bald eagle; int1 & sent4 -> int24: the lion eats the bald eagle; int1 & sent4 -> int25: the lion eats the bald eagle; int1 & sent4 -> int26: the lion eats the bald eagle; int1 & sent4 -> int27: the lion eats the bald eagle; int1 & sent4 -> int28: the lion eats the bald eagle; int1 & sent4 -> int29: the lion eats the bald eagle; int1 & sent4 -> int30: the lion eats the bald eagle", "score": 0.6820520758628845, "hypothesis": "the bald eagle is red", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent23 & sent21 -> int1: the lion is green; sent8 & sent10 -> int2: the bald eagle needs the wing; int1 & sent4 -> int3: the lion eats the bald eagle; sent12 & int3 -> int4: the lion eats the lion; sent8 & sent10 -> int5: the bald eagle needs the lard; sent12 & sent9 -> int6: the cow eats the lion; sent12 & sent9 -> int7: the cow eats the lion; sent12 & sent9 -> int8: the cow eats the lion; sent12 & sent9 -> int9: the cow eats the lion; sent12 & sent9 -> int10: the cow eats the lion; sent12 & sent9 -> int11: the cow eats the lion; sent12 & sent9 -> int12: the cow eats the lion; sent12 & sent9 -> int13: the cow eats the lion; sent12 & sent9 -> int14: the cow eats the lion; sent12 & sent9 -> int15: the cow eats the lion; sent12 & sent9 -> int16: the cow eats the lion; sent12 & sent9 -> int17: the cow eats the lion; sent12 & sent9 -> int18: the cow eats the lion; sent12 & sent9 -> int19: the cow eats the lion; sent12 & sent9 -> int20: the cow eats the lion; sent12 & sent9 -> int21: the cow eats the lion; sent12 & sent9 -> int22: the cow eats the lion; sent12 & sent9 -> int23: the cow eats the lion; sent12 & sent9 -> int24: the cow eats the lion; sent12 & sent9 -> int25: the cow eats the lion; sent12 & sent9 -> int26: the cow eats the lion; sent12 & sent9 -> int27: the cow eats the lion; sent12 & sent9 -> int28: the cow eats the lion; sent12 & sent9 -> int29: the cow eats the lion; sent12 & sent9 -> int30: the cow eats the lion", "score": 0.4837079346179962, "hypothesis": "i don't think the bald eagle is red", "context": {"sent1": "the lion needs the bald eagle", "sent2": "if someone eats the cow and they are not round then the cow does not eat the bald eagle", "sent3": "if someone eats the bald eagle and the bald eagle is round then the bald eagle is not green", "sent4": "if someone is green then they eat the bald eagle", "sent5": "the cow eats the lion", "sent6": "the lion is blue", "sent7": "the cow likes the lion", "sent8": "if the bald eagle needs the cow then the bald eagle needs the lion", "sent9": "the cow eats the bald eagle", "sent10": "the bald eagle needs the cow", "sent11": "the cow is blue", "sent12": "if someone eats the bald eagle then they eat the lion", "sent13": "the bald eagle likes the cow", "sent14": "if the bald eagle does not eat the lion then the bald eagle needs the cow", "sent15": "the lion is round", "sent16": "if the lion needs the cow and the lion is not red then the cow needs the lion", "sent17": "the bald eagle does not eat the cow", "sent18": "the bald eagle needs the lion", "sent19": "the bald eagle is rough", "sent20": "the cow is rough", "sent21": "if someone eats the lion and the lion eats the cow then the lion is green", "sent22": "the lion needs the cow", "sent23": "the lion eats the cow", "sent24": "the cow is round"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis;", "sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "fiona is blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis;", "sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 0.999998152256012, "hypothesis": "i don't think fiona is blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis;", "sent12 -> hypothesis;"], "proof_pred": "sent11 & sent9 -> int1: fiona is cold; sent15 & int1 -> hypothesis", "score": 0.94339919090271, "hypothesis": "fiona is not blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis;", "sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "i don't think fiona is not blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent5 & sent9 -> hypothesis;"], "proof_pred": "sent9 & sent5 -> hypothesis", "score": 0.9523126482963562, "hypothesis": "fiona is quiet", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent5 & sent9 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent5 & sent9 -> hypothesis;"], "proof_pred": "sent9 & sent5 -> hypothesis", "score": 0.9502270221710205, "hypothesis": "i don't think fiona is quiet", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent5 & sent9 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> int2: fiona is blue; sent10 & int2 -> hypothesis;", "sent10 & sent12 -> hypothesis;"], "proof_pred": "sent11 & sent9 -> int1: fiona is cold; int1 & sent5 -> int2: fiona is quiet; sent11 & sent9 -> int3: fiona is cold; sent10 & sent12 -> int4: fiona is not furry; int4 & sent10 -> hypothesis", "score": 0.7435941696166992, "hypothesis": "fiona is furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> int2: fiona is blue; sent10 & int2 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> int2: fiona is blue; sent10 & int2 -> hypothesis;", "sent10 & sent12 -> hypothesis;"], "proof_pred": "sent12 & sent10 -> hypothesis", "score": 0.9468627572059631, "hypothesis": "i don't think fiona is furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent9 & sent11 -> int1: fiona is cold; sent15 & int1 -> int2: fiona is blue; sent10 & int2 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> hypothesis", "score": 0.943929135799408, "hypothesis": "gary is blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; int1 & sent15 -> hypothesis", "score": 0.9438931941986084, "hypothesis": "i don't think gary is blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> hypothesis", "score": 0.9441710710525513, "hypothesis": "gary is not blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> hypothesis", "score": 0.9439470767974854, "hypothesis": "i don't think gary is not blue", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; int1 & sent15 -> int2: gary is blue; int2 & sent10 -> hypothesis", "score": 0.9604638814926147, "hypothesis": "gary is not furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis", "score": 0.9495100378990173, "hypothesis": "i don't think gary is not furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; int2 & sent10 -> hypothesis", "score": 0.9048539996147156, "hypothesis": "gary is furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis;"], "proof_pred": "sent11 & sent4 -> int1: gary is cold; int1 & sent15 -> int2: gary is blue; int2 & sent10 -> hypothesis", "score": 0.9609203338623047, "hypothesis": "i don't think gary is furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": "sent4 & sent11 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.7825666069984436, "hypothesis": "anne is not cold", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent4 & sent11 -> hypothesis", "score": 0.9081661701202393, "hypothesis": "i don't think anne is not cold", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.42996132373809814, "hypothesis": "anne is quiet", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent8 & sent5 -> int3: anne is quiet; int3 & sent9 -> int4: anne is cold; int4 & sent15 -> int5: anne is blue; int3 & sent3 -> hypothesis", "score": 0.7333767414093018, "hypothesis": "i don't think anne is quiet", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: dave is cold; sent15 & int1 -> int2: dave is blue; sent5 & sent4 -> int3: dave is quiet; int3 & sent7 & sent15 -> hypothesis", "score": 0.7415863275527954, "hypothesis": "dave is not quiet", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: dave is cold; sent15 & int1 -> int2: dave is blue; sent5 & sent4 -> int3: dave is quiet; int3 & sent9 -> hypothesis", "score": 0.7597073912620544, "hypothesis": "i don't think dave is not quiet", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; int1 & sent15 -> int2: gary is blue; sent11 & sent4 -> int3: gary is cold; sent15 & int3 -> int4: gary is blue; int2 & sent3 & sent1 -> int5: gary is nice; int2 & sent3 & sent2 -> int6: gary is fine; int5 & sent10 -> int7: gary is not furry; int4 & sent10 -> int8: gary is not furry; sent7 & int8 -> int9: gary is not furry; int9 & sent7 -> int10: gary is not furry; sent7 & int10 -> int11: gary is not furry; sent7 & int11 -> int12: gary is not furry; sent7 & int12 -> int13: gary is not furry; sent7 & int13 -> int14: gary is not furry; sent7 & int14 -> int15: gary is not furry; sent7 & int15 -> int16: gary is not furry; int2 & sent10 -> int17: gary is not furry; int2 & sent10 -> int18: gary is not furry; int2 & sent10 -> int19: gary is not furry; int2 & sent10 -> int20: gary is not furry; int2 & sent10 -> int21: gary is not furry; int2 & sent10 -> int22: gary is not furry; int2 & sent10 -> int23: gary is not furry; int2 & sent10 -> int24: gary is not furry; int2 & sent10 -> int25: gary is not furry; int2 & sent10 -> int26: gary is not furry; int2 & sent10 -> int27: gary is not furry; int2 & sent10 -> int28: gary is not furry; int2 & sent10 -> int29: gary is not furry; int2 & sent10 -> int30: gary is not furry", "score": 0.7280173897743225, "hypothesis": "anne is nice", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; int2 & sent10 -> int3: gary is not furry; sent10 & int2 -> int4: gary is not furry; int3 & sent7 & sent8 -> int5: gary is silent; sent8 & sent8 -> int6: gary is quiet; sent8 & int6 -> int7: gary is quiet; sent4 & sent11 -> int8: gary is cold; sent15 & int8 -> int9: gary is blue; int9 & sent10 -> int10: gary is not furry; int10 & int4 & int3 -> int11: gary is not fond; int5 & sent11 & sent4 -> int12: gary is cold; sent15 & int8 -> int13: gary is blue; sent10 & int9 -> int14: gary is not furry; int4 & int5 & sent7 -> int15: gary is not happy; int5 & sent7 & sent4 -> int16: gary is quiet; int5 & sent7 & sent4 -> int17: gary is quiet; int5 & sent7 & sent4 -> int18: gary is quiet; int5 & sent7 & sent4 -> int19: gary is quiet; int5 & sent7 & sent4 -> int20: gary is quiet; int5 & sent7 & sent4 -> int21: gary is quiet; int5 & sent7 & sent4 -> int22: gary is quiet; int5 & sent7 & sent4 -> int23: gary is quiet; int5 & sent7 & sent4 -> int24: gary is quiet; int5 & sent7 & sent4 -> int25: gary is quiet; int5 & sent7 & sent4 -> int26: gary is quiet; int5 & sent7 & sent4 -> int27: gary is quiet; int5 & sent7 & sent4 -> int28: gary is quiet; int5 & sent7 & sent4 -> int29: gary is quiet; int5 & sent7 & sent4 -> int30: gary is quiet", "score": 0.6583717465400696, "hypothesis": "i don't think anne is nice", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent9 -> int1: fiona is cold; sent5 & sent9 -> int2: fiona is quiet; int1 & sent15 -> int3: fiona is blue; sent11 & sent9 -> int4: fiona is cold; sent1 & int4 & sent15 -> int5: fiona is green; int5 & sent10 -> int6: fiona is not furry; int3 & sent10 -> hypothesis", "score": 0.4846276640892029, "hypothesis": "fiona is not nice", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent9 -> int1: fiona is cold; sent5 & sent9 -> int2: fiona is quiet; int1 & sent15 -> int3: fiona is blue; sent11 & sent9 -> int4: fiona is cold; sent1 & int4 & sent15 -> int5: fiona is green; int5 & sent10 -> int6: fiona is not furry; int3 & sent10 -> hypothesis", "score": 0.49848899245262146, "hypothesis": "i don't think fiona is not nice", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6025475263595581, "hypothesis": "anne is smart", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.4610750377178192, "hypothesis": "i don't think anne is smart", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; int2 & sent10 -> int3: gary is not furry; int3 & sent10 -> hypothesis", "score": 0.5315067172050476, "hypothesis": "gary is not nice", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> int3: gary is not blue; sent7 & int3 & sent4 -> int4: gary is not quiet; sent11 & sent4 -> int5: gary is cold; sent15 & int5 -> int6: gary is blue; int6 & sent10 & sent4 -> int7: gary is furry; sent6 & int7 & int6 -> int8: gary is round; sent6 & int6 & int7 -> int9: gary is not quiet; int4 & sent9 & int9 -> hypothesis", "score": 0.48772379755973816, "hypothesis": "i don't think gary is not nice", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.7745667695999146, "hypothesis": "anne is furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent11 & sent4 -> int1: gary is cold; sent15 & int1 -> int2: gary is blue; sent10 & int2 -> hypothesis", "score": 0.9469979405403137, "hypothesis": "i don't think anne is furry", "context": {"sent1": "if anne is cold and anne is furry then anne is nice", "sent2": "dave is nice", "sent3": "if dave is cold and dave is quiet then dave is nice", "sent4": "gary is smart", "sent5": "if someone is smart then they are quiet", "sent6": "if someone is furry and blue then they are round", "sent7": "if someone is smart and not blue then they are not quiet", "sent8": "gary is quiet", "sent9": "fiona is smart", "sent10": "if someone is blue then they are not furry", "sent11": "all smart people are cold", "sent12": "fiona is blue", "sent13": "dave is not cold", "sent14": "dave is round", "sent15": "cold people are blue", "sent16": "anne is not round", "sent17": "dave is not smart", "sent18": "gary is not round"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent8 & sent16 -> hypothesis;", "sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 1.0, "hypothesis": "anne is young", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent8 & sent16 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent8 & sent16 -> hypothesis;", "sent6 -> hypothesis;"], "proof_pred": "sent6 -> hypothesis", "score": 0.9999973773956299, "hypothesis": "i don't think anne is young", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent8 & sent16 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent11 -> hypothesis;", "sent12 & sent15 -> hypothesis;"], "proof_pred": "sent11 -> hypothesis", "score": 0.9999868273735046, "hypothesis": "harry is not quiet", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent11 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent11 -> hypothesis;", "sent12 & sent15 -> hypothesis;"], "proof_pred": "sent11 -> hypothesis", "score": 1.0, "hypothesis": "i don't think harry is not quiet", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent11 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent5 & sent18 -> hypothesis;"], "proof_pred": "sent18 & sent5 -> hypothesis", "score": 0.9514289498329163, "hypothesis": "fiona is furry", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent5 & sent18 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent5 & sent18 -> hypothesis;"], "proof_pred": "sent18 & sent5 -> hypothesis", "score": 0.9492018222808838, "hypothesis": "i don't think fiona is furry", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent5 & sent18 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent16 & sent18 -> hypothesis;"], "proof_pred": "sent16 & sent18 -> hypothesis", "score": 0.9494308233261108, "hypothesis": "fiona is not young", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent16 & sent18 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent16 & sent18 -> hypothesis;"], "proof_pred": "sent16 & sent18 -> hypothesis", "score": 0.9486920237541199, "hypothesis": "i don't think fiona is not young", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": "sent16 & sent18 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent8 -> int1: anne is furry; int1 & sent6 & sent2 -> int2: anne is smart; sent6 & sent2 & int1 -> int3: anne is smart; int3 & sent2 & int1 -> hypothesis", "score": 0.8504374027252197, "hypothesis": "anne is not red", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent8 & sent5 -> int1: anne is furry; int1 & sent6 & sent2 -> int2: anne is smart; sent6 & sent2 & int1 -> int3: anne is smart; int3 & sent2 & int1 -> hypothesis", "score": 0.8283812403678894, "hypothesis": "i don't think anne is not red", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent8 -> int1: anne is furry; sent6 & sent2 & int1 -> int2: anne is smart; sent5 & sent8 -> int3: anne is furry; sent6 & sent2 & int3 -> int4: anne is smart; int4 & sent2 & int3 -> hypothesis", "score": 0.8048539757728577, "hypothesis": "anne is rough", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent5 & sent8 -> int1: anne is furry; sent6 & sent2 & int1 -> int2: anne is smart; int2 & sent3 -> int3: anne is rough; int3 & sent6 & sent2 -> hypothesis", "score": 0.8125169277191162, "hypothesis": "i don't think anne is rough", "context": {"sent1": "harry is smart", "sent2": "furry, young people are smart", "sent3": "if fiona is smart then fiona is rough", "sent4": "anne is quiet", "sent5": "all nice people are furry", "sent6": "anne is young", "sent7": "fiona is red", "sent8": "anne is nice", "sent9": "if anne is rough and anne is furry then anne is young", "sent10": "fiona is rough", "sent11": "harry is quiet", "sent12": "all red people are quiet", "sent13": "harry is rough", "sent14": "harry is furry", "sent15": "harry is red", "sent16": "all nice people are young", "sent17": "harry is young", "sent18": "fiona is nice"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 1.0, "hypothesis": "the mouse sees the dog", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent12 -> hypothesis;"], "proof_pred": "sent12 -> hypothesis", "score": 0.9999996423721313, "hypothesis": "i don't think the mouse sees the dog", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent12 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.9998569488525391, "hypothesis": "the bald eagle is not rough", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the bald eagle is not rough", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent1 & sent4 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.9444475769996643, "hypothesis": "the bald eagle is cold", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent1 & sent4 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent1 & sent4 -> hypothesis;"], "proof_pred": "sent1 & sent4 -> hypothesis", "score": 0.947162926197052, "hypothesis": "i don't think the bald eagle is cold", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent1 & sent4 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent8 & sent11 & sent16 -> hypothesis;"], "proof_pred": "INVALID_PROOF", "score": 0.7667577862739563, "hypothesis": "the mouse is cold", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent8 & sent11 & sent16 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent8 & sent11 & sent16 -> hypothesis;"], "proof_pred": "sent11 & sent16 & sent8 -> hypothesis", "score": 0.9071316719055176, "hypothesis": "i don't think the mouse is cold", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": "sent8 & sent11 & sent16 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent4 -> int1: the bald eagle is cold; int1 & sent6 & sent14 -> int2: the rabbit is rough; sent1 & sent4 -> int3: the bald eagle is cold; sent1 & sent4 -> int4: the bald eagle is cold; sent1 & sent4 -> int5: the bald eagle is cold; sent1 & sent4 -> int6: the bald eagle is cold; sent1 & sent4 -> int7: the bald eagle is cold; sent1 & sent4 -> int8: the bald eagle is cold; sent1 & sent4 -> int9: the bald eagle is cold; sent1 & sent4 -> int10: the bald eagle is cold; int5 & sent6 & sent15 -> int11: the rabbit likes the dog; sent1 & sent4 -> int12: the bald eagle is cold; sent1 & sent4 -> int13: the rabbit is cold; sent1 & sent4 -> int14: the bald eagle is cold; sent1 & sent4 -> int15: the bald eagle is cold; sent1 & sent4 -> int16: the bald eagle is cold; sent1 & sent4 -> int17: the bald eagle is cold; sent1 & sent4 -> int18: the bald eagle is cold; sent1 & sent4 -> int19: the bald eagle is cold; sent1 & sent4 -> int20: the bald eagle is cold; sent1 & sent4 -> int21: the bald eagle is cold; sent1 & sent4 -> int22: the bald eagle is cold; sent1 & sent4 -> int23: the bald eagle is cold; sent1 & sent4 -> int24: the bald eagle is cold; sent1 & sent4 -> int25: the bald eagle is cold; sent1 & sent4 -> int26: the bald eagle is cold; sent1 & sent4 -> int27: the bald eagle is cold; sent1 & sent4 -> int28: the bald eagle is cold; sent1 & sent4 -> int29: the bald eagle is cold; sent1 & sent4 -> int30: the bald eagle is cold", "score": 0.7381133437156677, "hypothesis": "the dog is not rough", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.7623515725135803, "hypothesis": "i don't think the dog is not rough", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.9029220342636108, "hypothesis": "the rabbit is cold", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent1 & sent4 -> int1: the bald eagle is cold; int1 & sent14 & sent6 -> int2: the rabbit is rough; sent6 & sent15 & sent10 -> int3: the rabbit likes the dog; int3 & sent1 -> int4: the rabbit is cold; int4 & sent8 & sent11 -> int5: the mouse is not cold; int4 & sent16 & sent8 -> hypothesis", "score": 0.7927041053771973, "hypothesis": "i don't think the rabbit is cold", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent6 -> hypothesis", "score": 0.9997591972351074, "hypothesis": "the rabbit does not see the mouse", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8437383770942688, "hypothesis": "i don't think the rabbit does not see the mouse", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.69685298204422, "hypothesis": "the mouse eats the bald eagle", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.6904579401016235, "hypothesis": "i don't think the mouse eats the bald eagle", "context": {"sent1": "if something likes the mouse then it is cold", "sent2": "the dog sees the bald eagle", "sent3": "the bald eagle is rough", "sent4": "the bald eagle likes the mouse", "sent5": "the dog sees the rabbit", "sent6": "the rabbit does not like the mouse", "sent7": "if the bald eagle is green then the bald eagle eats the mouse", "sent8": "the mouse is red", "sent9": "the mouse sees the rabbit", "sent10": "the rabbit sees the dog", "sent11": "the mouse is green", "sent12": "the mouse sees the dog", "sent13": "if the bald eagle is green and the rabbit does not eat the bald eagle then the rabbit is rough", "sent14": "if something is cold and it does not like the rabbit then it is rough", "sent15": "if the rabbit sees the dog and the rabbit does not like the bald eagle then the rabbit likes the dog", "sent16": "if something is green and red then it is not cold"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "anne is smart", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.9999997019767761, "hypothesis": "i don't think anne is smart", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 0.9999980330467224, "hypothesis": "bob is not rough", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent2 -> hypothesis;"], "proof_pred": "sent2 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is not rough", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": "sent2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent1 & sent2 -> hypothesis", "score": 0.9604860544204712, "hypothesis": "bob is not nice", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent2 & sent1 & sent3 -> hypothesis", "score": 0.9509499669075012, "hypothesis": "i don't think bob is not nice", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent3 & sent4 & sent2 -> int1: anne is rough; int1 & sent1 & sent2 -> int2: anne is nice; int2 & sent1 & int1 -> int3: anne is nice; int3 & sent2 & int2 -> int4: anne is nice; int4 & sent3 & sent4 -> hypothesis", "score": 0.8127936124801636, "hypothesis": "anne is white", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8910623788833618, "hypothesis": "i don't think anne is white", "context": {"sent1": "if someone is rough and not kind then they are nice", "sent2": "bob is rough", "sent3": "anne is smart", "sent4": "if someone is smart and white then they are rough"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent4 -> hypothesis;"], "proof_pred": "sent4 -> hypothesis", "score": 1.0, "hypothesis": "the squirrel likes the bald eagle", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": "sent4 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent4 -> hypothesis;"], "proof_pred": "sent4 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the squirrel likes the bald eagle", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": "sent4 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent9 -> hypothesis;"], "proof_pred": "sent9 -> hypothesis", "score": 0.9999997615814209, "hypothesis": "the lion is not green", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": "sent9 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent9 -> hypothesis;"], "proof_pred": "sent9 -> hypothesis", "score": 1.0, "hypothesis": "i don't think the lion is not green", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": "sent9 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent6 & sent4 -> int1: the squirrel needs the lion; int1 & sent7 & sent6 -> int2: the squirrel needs the lion; int1 & int2 & sent8 -> int3: the lion needs the squirrel; int3 & sent5 -> int4: the bald eagle is big; sent10 & int4 -> int5: the bald eagle chases the lion; int3 & sent11 & sent6 -> int6: the squirrel likes the lion; int6 & sent1 & int3 -> int7: the lion is young; sent1 & int3 & int6 -> int8: the lion is young; int8 & sent7 & sent6 -> hypothesis", "score": 0.8556327819824219, "hypothesis": "the lion does not need the lion", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent7 & sent6 & sent4 -> int1: the squirrel needs the lion; int1 & sent7 & sent4 -> hypothesis", "score": 0.8179760575294495, "hypothesis": "i don't think the lion does not need the lion", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent6 & sent7 -> int1: the squirrel needs the lion; sent4 & sent6 & sent7 -> int2: the squirrel needs the lion; int2 & int1 & sent8 -> int3: the bald eagle is big; sent10 & int3 -> int4: the bald eagle chases the lion; sent10 & int3 -> int5: the bald eagle chases the horn; int4 & sent5 -> int6: the bald eagle is big; int6 & sent10 -> int7: the bald eagle chases the lion; int4 & sent5 -> int8: the bald eagle is big; int8 & sent6 & sent7 -> int9: the bald eagle needs the animal; int9 & sent11 & sent6 -> int10: the squirrel likes the lion; int6 & sent8 -> int11: the bald eagle needs the squirrel; int8 & sent10 -> int12: the bald eagle chases the lion; int8 & sent10 -> int13: the bald eagle chases the lion; int8 & sent10 -> int14: the bald eagle chases the lion; int8 & sent10 -> int15: the bald eagle chases the lion; int8 & sent10 -> int16: the bald eagle chases the lion; int8 & sent10 -> int17: the bald eagle chases the lion; int8 & sent10 -> int18: the bald eagle chases the lion; int8 & sent10 -> int19: the bald eagle chases the lion; int8 & sent10 -> int20: the bald eagle chases the lion; int8 & sent10 -> int21: the bald eagle chases the lion; int8 & sent10 -> int22: the bald eagle chases the lion; int8 & sent10 -> int23: the bald eagle chases the lion; int8 & sent10 -> int24: the bald eagle chases the lion; int8 & sent10 -> int25: the bald eagle chases the lion; int8 & sent10 -> int26: the bald eagle chases the lion; int8 & sent10 -> int27: the bald eagle chases the lion; int8 & sent10 -> int28: the bald eagle chases the lion; int8 & sent10 -> int29: the bald eagle chases the lion; int8 & sent10 -> int30: the bald eagle chases the lion", "score": 0.5188083052635193, "hypothesis": "the lion needs the bald eagle", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent4 & sent6 & sent7 -> int1: the squirrel needs the lion; sent4 & sent6 & sent7 -> int2: the squirrel needs the lion; int1 & sent8 -> int3: the bald eagle is big; sent8 & sent6 -> int4: the lion needs the squirrel; int4 & sent11 & sent6 -> int5: the squirrel likes the lion; int2 & sent1 & int4 -> int6: the lion is young; sent1 & int2 & int4 -> int7: the lion is young; int6 & sent6 & sent11 -> int8: the lion likes the lion; sent1 & int8 & int4 -> int9: the lion is young; int9 & sent7 & sent4 -> int10: the lion needs the lion; int10 & sent10 -> int11: the lion chases the lion; int9 & sent7 & sent4 -> int12: the lion needs the lion; int9 & sent7 & sent4 -> int13: the lion needs the lion; int9 & sent7 & sent4 -> int14: the lion needs the lion; int9 & sent7 & sent4 -> int15: the lion needs the lion; int9 & sent7 & sent4 -> int16: the lion needs the lion; int9 & sent7 & sent4 -> int17: the lion needs the lion; int9 & sent7 & sent4 -> int18: the lion needs the lion; int9 & sent7 & sent4 -> int19: the lion needs the lion; int9 & sent7 & sent4 -> int20: the lion needs the lion; int9 & sent7 & sent4 -> int21: the lion needs the lion; int9 & sent7 & sent4 -> int22: the lion needs the lion; int9 & sent7 & sent4 -> int23: the lion needs the lion; int9 & sent7 & sent4 -> int24: the lion needs the lion; int9 & sent7 & sent4 -> int25: the lion needs the lion; int9 & sent7 & sent4 -> int26: the lion needs the lion; int9 & sent7 & sent4 -> int27: the lion needs the lion; int9 & sent7 & sent4 -> int28: the lion needs the lion; int9 & sent7 & sent4 -> int29: the lion needs the lion; int9 & sent7 & sent4 -> int30: the lion needs the lion", "score": 0.8177622556686401, "hypothesis": "i don't think the lion needs the bald eagle", "context": {"sent1": "if someone likes the lion and they need the squirrel then the lion is young", "sent2": "if the bald eagle chases the squirrel then the bald eagle is green", "sent3": "if someone is cold then they need the lion", "sent4": "the squirrel likes the bald eagle", "sent5": "if someone chases the bald eagle then the bald eagle is big", "sent6": "the bald eagle likes the squirrel", "sent7": "if someone likes the bald eagle and the bald eagle likes the squirrel then they need the lion", "sent8": "if the lion chases the squirrel then the lion needs the squirrel", "sent9": "the lion is green", "sent10": "if the bald eagle is big then the bald eagle chases the lion", "sent11": "if the bald eagle likes the squirrel and the bald eagle needs the squirrel then the squirrel likes the lion"}, "proof_gt": ""}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "bob is kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": false, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 0.9999958276748657, "hypothesis": "bob is not kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 0, "all_proofs": ["sent3 -> hypothesis;"], "proof_pred": "sent3 -> hypothesis", "score": 1.0, "hypothesis": "i don't think bob is not kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent3 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent2 & sent14 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> hypothesis", "score": 0.9427585601806641, "hypothesis": "charlie is kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent2 & sent14 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> hypothesis", "score": 0.9426365494728088, "hypothesis": "i don't think charlie is kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> hypothesis"}, {"answer": false, "depth": 1, "all_proofs": ["sent2 & sent14 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> hypothesis", "score": 0.9429387450218201, "hypothesis": "charlie is not kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> hypothesis"}, {"answer": true, "depth": 1, "all_proofs": ["sent2 & sent14 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> hypothesis", "score": 0.9440976977348328, "hypothesis": "i don't think charlie is not kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> hypothesis", "score": 0.903153657913208, "hypothesis": "charlie is round", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> hypothesis", "score": 0.9036890268325806, "hypothesis": "i don't think charlie is round", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis"}, {"answer": false, "depth": 2, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> hypothesis", "score": 0.9048762917518616, "hypothesis": "charlie is not round", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis"}, {"answer": true, "depth": 2, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> hypothesis", "score": 0.9027537703514099, "hypothesis": "i don't think charlie is not round", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> int2: charlie is round; sent5 & int2 & sent4 -> hypothesis", "score": 0.9011145234107971, "hypothesis": "charlie is big", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> int2: charlie is round; sent5 & int2 & sent4 -> hypothesis", "score": 0.9020935893058777, "hypothesis": "i don't think charlie is big", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis"}, {"answer": false, "depth": 3, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> int2: charlie is round; sent5 & int2 & sent4 -> hypothesis", "score": 0.9019595384597778, "hypothesis": "charlie is not big", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis"}, {"answer": true, "depth": 3, "all_proofs": ["sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis;"], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent1 & int1 -> int2: charlie is round; sent5 & int2 & sent4 -> hypothesis", "score": 0.9014452695846558, "hypothesis": "i don't think charlie is not big", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": "sent2 & sent14 -> int1: charlie is kind; sent1 & sent18 & int1 -> int2: charlie is round; sent4 & sent5 & int2 -> hypothesis"}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent1 & int1 & sent18 -> int2: charlie is round; sent5 & int2 & sent4 -> hypothesis", "score": 0.9154707193374634, "hypothesis": "dave is not big", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent1 & int1 & sent18 -> int2: charlie is round; sent5 & int2 & sent4 -> hypothesis", "score": 0.9160605072975159, "hypothesis": "i don't think dave is not big", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; int1 & sent18 & sent17 -> int2: erin is round; int2 & sent8 & sent6 -> hypothesis", "score": 0.9093645215034485, "hypothesis": "erin is kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent18 & sent17 & int1 -> int2: erin is round; int2 & sent8 & sent6 -> hypothesis", "score": 0.9006258249282837, "hypothesis": "i don't think erin is kind", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; int1 & sent18 & sent17 -> int2: charlie is round; int2 & sent8 & sent6 -> int3: erin is kind; int3 & sent17 & sent18 -> hypothesis", "score": 0.9076703190803528, "hypothesis": "erin is not round", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; int1 & sent18 & sent17 -> hypothesis", "score": 0.9013737440109253, "hypothesis": "i don't think erin is not round", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; int1 & sent4 & sent5 -> int2: dave is big; int2 & sent14 -> int3: dave is kind; sent14 & sent2 -> int4: dave is kind; sent14 & sent2 -> int5: dave is kind; sent14 & sent2 -> int6: dave is kind; int3 & sent4 & sent2 -> int7: dave is quiet; sent5 & int7 & sent12 -> int8: dave is big; sent14 & sent2 -> int9: dave is kind; int9 & int2 & sent2 -> int10: dave is furry; sent9 & int4 & int10 -> hypothesis", "score": 0.7040415406227112, "hypothesis": "dave is rough", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; int1 & sent4 & sent5 -> int2: dave is big; int2 & sent14 -> int3: dave is kind; sent14 & sent2 -> int4: dave is kind; int3 & sent4 & sent2 -> int5: dave is nice; int3 & sent4 & sent2 -> int6: dave is kind; sent14 & sent2 -> int7: dave is kind; sent14 & sent2 -> int8: dave is kind; sent14 & sent2 -> int9: dave is kind; int8 & sent9 & int2 -> int10: dave is rough; sent14 & sent2 -> int11: dave is kind; sent14 & sent2 -> int12: dave is kind; sent14 & sent2 -> int13: dave is kind; sent14 & sent2 -> int14: dave is good; int11 & sent14 & int5 -> int15: dave is good; sent14 & int15 & sent9 -> hypothesis", "score": 0.7248440384864807, "hypothesis": "i don't think dave is rough", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent1 & int1 & sent18 -> int2: charlie is round; sent4 & int2 & sent5 -> int3: charlie is big; sent10 & int3 & sent2 -> int4: charlie is round; sent5 & int4 & sent4 -> int5: charlie is big; sent10 & int3 & int4 -> int6: charlie is round; int6 & sent5 & sent4 -> int7: charlie is big; sent10 & int4 & int7 -> int8: charlie is round; int8 & sent6 & sent4 -> int9: charlie is big; int6 & sent10 & int3 -> int10: charlie is round; int10 & sent8 & sent6 -> int11: charlie is kind; sent1 & int11 & sent18 -> int12: charlie is round; int12 & sent8 & sent6 -> int13: charlie is kind; sent1 & int13 & sent5 -> int14: charlie is big; sent1 & int13 & sent5 -> int15: charlie is big; sent1 & int13 & sent5 -> int16: charlie is big; sent1 & int13 & sent5 -> int17: charlie is big; sent1 & int13 & sent5 -> int18: charlie is big; sent1 & int13 & sent5 -> int19: charlie is big; sent1 & int13 & sent5 -> int20: charlie is big; sent1 & int13 & sent5 -> int21: charlie is big; sent1 & int13 & sent5 -> int22: charlie is big; sent1 & int13 & sent5 -> int23: charlie is big; sent1 & int13 & sent5 -> int24: charlie is big; sent1 & int13 & sent5 -> int25: charlie is big; sent1 & int13 & sent5 -> int26: charlie is big; sent1 & int13 & sent5 -> int27: charlie is big; sent1 & int13 & sent5 -> int28: charlie is big; sent1 & int13 & sent5 -> int29: charlie is big; sent1 & int13 & sent5 -> int30: charlie is big", "score": 0.8626276850700378, "hypothesis": "dave is not furry", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "sent14 & sent2 -> int1: charlie is kind; sent1 & int1 & sent18 -> int2: charlie is round; sent4 & int2 & sent5 -> int3: charlie is big; sent10 & int3 & sent2 -> int4: charlie is round; sent4 & int4 & sent5 -> int5: charlie is big; sent10 & int3 & int4 -> int6: charlie is round; int6 & sent5 & sent4 -> int7: charlie is big; int6 & sent10 & int3 -> int8: charlie is round; int8 & sent6 & int3 -> int9: charlie is kind; sent1 & int9 & sent18 -> int10: charlie is round; int10 & sent6 & int2 -> int11: charlie is kind; int9 & sent1 & int4 -> int12: charlie is rough; sent18 & int12 & int9 -> int13: charlie is round; int13 & sent10 & sent8 -> int14: charlie is round; int13 & sent10 & sent8 -> int15: charlie is round; int13 & sent10 & sent8 -> int16: charlie is round; int13 & sent10 & sent8 -> int17: charlie is round; int13 & sent10 & sent8 -> int18: charlie is round; int13 & sent10 & sent8 -> int19: charlie is round; int13 & sent10 & sent8 -> int20: charlie is round; int13 & sent10 & sent8 -> int21: charlie is round; int13 & sent10 & sent8 -> int22: charlie is round; int13 & sent10 & sent8 -> int23: charlie is round; int13 & sent10 & sent8 -> int24: charlie is round; int13 & sent10 & sent8 -> int25: charlie is round; int13 & sent10 & sent8 -> int26: charlie is round; int13 & sent10 & sent8 -> int27: charlie is round; int13 & sent10 & sent8 -> int28: charlie is round; int13 & sent10 & sent8 -> int29: charlie is round; int13 & sent10 & sent8 -> int30: charlie is round", "score": 0.8371022343635559, "hypothesis": "i don't think dave is not furry", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": "Unknown", "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.883094072341919, "hypothesis": "bob is red", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}, {"answer": false, "depth": null, "all_proofs": [], "proof_pred": "INVALID_PROOF", "score": 0.8818028569221497, "hypothesis": "i don't think bob is red", "context": {"sent1": "charlie is rough", "sent2": "charlie is red", "sent3": "bob is kind", "sent4": "charlie is quiet", "sent5": "if something is round and quiet then it is big", "sent6": "if erin is round and erin is big then erin is kind", "sent7": "bob is round", "sent8": "erin is big", "sent9": "all kind, furry things are rough", "sent10": "if something is big and red then it is round", "sent11": "if something is red and not furry then it is rough", "sent12": "dave is round", "sent13": "bob is quiet", "sent14": "if something is red then it is kind", "sent15": "bob is rough", "sent16": "if charlie is not quiet then charlie is not rough", "sent17": "erin is rough", "sent18": "all rough, kind things are round"}, "proof_gt": ""}]