geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
/*
Copyright (c) 2008 - Chris Buckley.
Permission is granted for use and modification of this file for
research, non-commercial purposes.
*/
/* Copyright 2008 Chris Buckley */
#include "common.h"
#include "sysfunc.h"
#include "trec_eval.h"
#include "trec_format.h"
#include "functions.h"
/* Takes the top docs and judged prefs for a query, and returns a
results_prefs_info object giving the preferences from judged_prefs that
are observed in the retrieved docs.
Three part process here:
1. Add a docid_rank (0..num_judged) to the judgment prefs that for
every docno in any judgment pref gives the relative rank at which
it occurs in the results (a number 0 to num_judged_ret-1). If the
docno does not occur in the results, it is given a (consistent)
value from num_judged_ret to num_judged-1. These docid_ranks are
used to represent the docs within preferences.
2. Go through the judgements again and represent all preferences per judgment
group (JG). Two options for representing the preferences within a JG:
A. If there is only 1 judgment sub-group (JSG), then the preferences are
given by a set of equivalence classes (ECs) where all docs with the
same rel_level in the input are in the same equivalence class. Thus all
docs in a higher EC.rel_level are preferred to the docs in a lower
EC.rel_level. Since there is only 1 JSG, there are complete preferences
between any pair of docs not in the same EC.
B. If there are multiple JSGs, then the JG preference relation is assumed to
be partial. Preferences are given by a preference array, where array[i][j]
is 1 iff doc with docid_rank i is preferred to doc with docid_rank j
in this JG. Transitive preferences that aren't explicitly in the
judgments are added - this happens when (doc A > doc B in JSG jsg1) and
(doc B > doc C in JSG jsg2). Note that a JG represents a single information
need set of judgments, and is required to be consistent (inconsistent
preferences are represented in different JGs). Preference array is of
size num_judged * num_judged.
3. Go through the preference in each JG, and count num_fulfilled and
num_possible preferences in categories "retrieved", "implied" and
"not_retrieved" where
retrieved means both A and B were retrieved in a pref A > B
implied means exactly one of A or B was retrieved.
not_retrieved means neither was retrieved.
Different evaluation measures may want to do different things with these
categories. Counting preferences is accomplished in the two preference
representations by:
A. When comparing EC1 and EC2 with EC1.rel_level > EC2.rel_level, compare
each (docid) rank1 in EC1->ranks with rank2 in EC2->ranks.
If rank1 < num_judged_ret then it was retrieved, similarly for rank2.
If both retrieved, then if rank1 < rank2 the preference is fulfilled
otherwise it wasn't.
If rank1 retrieved and rank2 was not, then implied pref fulfilled.
If rank1 not retrieved and rank2 was, then preference was not fulfilled.
If both not retrieved, then that count is incremented.
B. Given preference array PA there are five areas of importance, divided
by lines i == NJR (where NJR is num_judged_ret), j == NJR, and i == j
NJR
\................|...
.\...............|...
..\.......A1.....|.A3
...\.............|...
....\............|...
...
...A2..........\.|...
................\|...
NJR -----------------|---
.................|...
..........A4.....|.A5
.................|...
Area A1 is preferences fulfilled, both retrieved.
Area A2 is preferences not fulfilled, both retrieved.
Area A3 is preference implied fulfilled (i retrieved, j not)
Area A4 is preference implied not fulfilled (i not retrieved, j retrieved)
Area A5 is both i and j not retrieved.
Simply count the marks (PA[i][j] == 1) in each appropriate area.
As well as storing counts within each JG, a counts array for the
entire pref_results is constructed. Counts_array CA is exactly the same
format and size as the preference arrays, except CA[i][j] is the sum
of the conceptual PA[i][j] over all JGs. This allows counts of
confirmations (CA[i][j] > 1) and conflicts (CA[i][j] and CA[j][i] both
non-zero).
Not mentioned in steps 2 and 3 above since it adds even more confusion,
is that the relevant (rel_level > 0.0) and nonrelevant docs are also
tracked. Different measures may deal with those preferences differently.
This procedure may be called repeatedly for a given topic - returned
values are cached until the qid changes.
results and prefs_info formats must be "trec_results" and "prefs"
respectively.
UNDEF returned if error, 0 if used cache values, 1 if new values.
*/
typedef struct {
char *docno;
float sim;
long rank;
} DOCNO_RESULTS;
typedef struct {
char *jg;
char *jsg;
float rel_level;
char *docno;
long rank;
} PREFS_AND_RANKS;
static int form_jg_ec (const PREFS_AND_RANKS *prefs, const long num_prefs,
long *rank_pool_ptr, JG *jg,
RESULTS_PREFS *results_prefs);
static int add_ec_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs);
static int form_jg_pa (const PREFS_AND_RANKS *prefs, const long num_prefs,
JG *jg, RESULTS_PREFS *results_prefs);
static int add_transitives (PREFS_ARRAY *pa);
static int add_pa_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs);
static int mult_and_check_change (const PREFS_ARRAY *a1, const PREFS_ARRAY *a2,
PREFS_ARRAY *res);
static int form_prefs_and_ranks (const EPI*epi,
const TEXT_RESULTS_INFO *text_results_info,
const TEXT_PREFS_INFO *trec_prefs,
PREFS_AND_RANKS *prefs_and_ranks,
long *num_judged, long *num_judged_ret);
static void init_prefs_array (PREFS_ARRAY *pa);
static void init_counts_array (COUNTS_ARRAY *ca);
static int comp_prefs_and_ranks_jg_rel_level ();
static int comp_prefs_and_ranks_docno();
static int comp_sim_docno (), comp_docno (), comp_results_inc_rank ();
static void debug_print_ec (EC *ec),debug_print_prefs_array (PREFS_ARRAY *pa),
debug_print_counts_array (COUNTS_ARRAY *ca), debug_print_jg (JG *jg),
debug_print_results_prefs (RESULTS_PREFS *rp);
static void debug_print_docno_results (DOCNO_RESULTS *dr, long num_results,
char *location);
static void debug_print_prefs_and_ranks (PREFS_AND_RANKS *par, long num_prefs,
char *location);
/* Intermediate Temp storage. Not malloc'd and freed every query just
for memory management efficiency (avoids fragmentations and
thus effects on caching) */
/* Temp Structure for mapping results docno to results rank */
/* Current cached query */
static char *current_query = "no query";
static long max_current_query = 0;
/* Space reserved for cached returned values */
static long num_judged_ret;
static long num_judged;
static long num_jgs;
static JG *jgs;
static long max_num_jgs = 0;
static long *rank_pool;
static long max_rank_pool = 0;
static EC *ec_pool;
static long max_ec_pool = 0;
static unsigned short *ca_pool;
static long max_ca_pool = 0;
static unsigned short **ca_ptr_pool;
static long max_ca_ptr_pool = 0;
static unsigned char *pa_pool;
static long max_pa_pool = 0;
static unsigned char **pa_ptr_pool;
static long max_pa_ptr_pool = 0;
static float *rel_pool;
static long max_rel_pool = 0;
/* Space reserved for intermediate values */
static PREFS_AND_RANKS *prefs_and_ranks;
static long max_prefs_and_ranks = 0;
static DOCNO_RESULTS *docno_results;
static long max_docno_results = 0;
static unsigned char *temp_pa_pool;
static long max_temp_pa_pool;
static unsigned char **temp_pa_ptr_pool;
static long max_temp_pa_ptr_pool;
static long saved_num_judged = 0;
int
form_prefs_counts (const EPI *epi, const REL_INFO *rel_info,
const RESULTS *results, RESULTS_PREFS *results_prefs)
{
long i;
char *jgid, *jsgid;
long jg_ind;
long num_jgs_with_subgroups;
float rel_level;
EC * ec_pool_ptr;
float *rel_pool_ptr;
long *rank_pool_ptr;
unsigned char *pa_pool_ptr;
unsigned char **pa_ptr_pool_ptr;
long start_jg;
long num_rel_level;
long num_sub_group;
TEXT_RESULTS_INFO *text_results_info;
TEXT_PREFS_INFO *trec_prefs;
if (epi->debug_level >= 3)
printf ("Debug: Form_prefs starting query '%s'\n", results->qid);
if (0 == strcmp (current_query, results->qid)) {
/* Have done this query already. Return cached values */
results_prefs->num_jgs = num_jgs;
results_prefs->jgs = jgs;
results_prefs->num_judged = num_judged;
results_prefs->num_judged_ret = num_judged_ret;
results_prefs->pref_counts = (COUNTS_ARRAY) {ca_pool,
ca_ptr_pool,
num_judged};
if (epi->debug_level >= 3)
printf ("Returned Cached Form_prefs\n");
return (0);
}
/* Check that format type of result info and rel info are correct */
if ((strcmp ("prefs", rel_info->rel_format) &&
strcmp ("qrels_prefs", rel_info->rel_format)) ||
strcmp ("trec_results", results->ret_format)) {
fprintf (stderr, "trec_eval.form_prefs_info: prefs_info format not (prefs or qrels_prefs) or results format not trec_results\n");
return (UNDEF);
}
/* Make sure enough space for query and save copy */
i = strlen(results->qid)+1;
if (NULL == (current_query =
te_chk_and_malloc (current_query, &max_current_query,
i, sizeof (char))))
return (UNDEF);
(void) strncpy (current_query, results->qid, i);
text_results_info = (TEXT_RESULTS_INFO *) results->q_results;
trec_prefs = (TEXT_PREFS_INFO *) rel_info->q_rel_info;
/* Reserve space for returned and intermediate values, if needed */
if (NULL == (prefs_and_ranks =
te_chk_and_malloc (prefs_and_ranks, &max_prefs_and_ranks,
trec_prefs->num_text_prefs,
sizeof (PREFS_AND_RANKS))) ||
NULL == (ec_pool =
te_chk_and_malloc (ec_pool, &max_ec_pool,
trec_prefs->num_text_prefs, sizeof (EC))) ||
NULL == (rank_pool =
te_chk_and_malloc (rank_pool, &max_rank_pool,
trec_prefs->num_text_prefs, sizeof (long))))
return (UNDEF);
/* get prefs_and_ranks from results and prefs. Will be sorted by
jg, jsg, rel_level, rank. Set num_judged, num_judged_ret */
if (UNDEF == form_prefs_and_ranks (epi, text_results_info, trec_prefs,
prefs_and_ranks, &num_judged,
&num_judged_ret))
return (UNDEF);
/* Go through prefs_and ranks, count and reserve space for judgment groups.
Also count number of JGs that have subgroups and will need preference
arrays. */
jgid = ""; jsgid = "";
num_jgs = 0; num_jgs_with_subgroups = 0;
for (i = 0; i < trec_prefs->num_text_prefs; i++) {
if (strcmp (jgid, prefs_and_ranks[i].jg)) {
/* New JG */
jgid = prefs_and_ranks[i].jg;
jsgid = prefs_and_ranks[i].jsg;
num_jgs++;
}
else if (strcmp (jsgid, prefs_and_ranks[i].jsg)) {
jsgid = prefs_and_ranks[i].jsg;
num_jgs_with_subgroups++;
}
}
/* Reserve pool space for JGs, and final pref_counts */
if (NULL == (jgs =
te_chk_and_malloc (jgs, &max_num_jgs, num_jgs, sizeof (JG))) ||
NULL == (ca_pool =
te_chk_and_malloc (ca_pool, &max_ca_pool,
num_judged * num_judged,
sizeof (unsigned short))) ||
NULL == (ca_ptr_pool =
te_chk_and_malloc (ca_ptr_pool, &max_ca_ptr_pool, num_judged,
sizeof (unsigned short *))))
return (UNDEF);
if (num_jgs_with_subgroups) {
/* Reserve pool space for preference arrays, and rel_level arrays */
if (NULL == (rel_pool =
te_chk_and_malloc (rel_pool, &max_rel_pool,
num_judged * num_jgs_with_subgroups,
sizeof (float))) ||
NULL == (pa_pool =
te_chk_and_malloc (pa_pool, &max_pa_pool,
num_judged * num_judged * num_jgs_with_subgroups,
sizeof (unsigned char))) ||
NULL == (pa_ptr_pool =
te_chk_and_malloc (pa_ptr_pool, &max_pa_ptr_pool,
num_judged * num_jgs_with_subgroups,
sizeof (unsigned char *))))
return (UNDEF);
}
ec_pool_ptr = ec_pool;
rel_pool_ptr = rel_pool;
rank_pool_ptr = rank_pool;
pa_pool_ptr = pa_pool;
pa_ptr_pool_ptr = pa_ptr_pool;
/* setup returned results_prefs so its summary values can be filled in */
results_prefs->num_jgs = num_jgs;
results_prefs->jgs = jgs;
results_prefs->num_judged = num_judged;
results_prefs->num_judged_ret = num_judged_ret;
results_prefs->pref_counts = (COUNTS_ARRAY) {ca_pool, ca_ptr_pool,
num_judged};
init_counts_array (&results_prefs->pref_counts);
/* Go through prefs_and_ranks, determine and construct appropriate JG
preference format. Preferences are counted and add to summary values
as each JG is handled. */
jg_ind = 0;
start_jg = 0;
num_rel_level = 0;
num_sub_group = 0;
rel_level = -3.0; /* Illegal rel_level */
jgid = prefs_and_ranks[0].jg;
jsgid = "";
for (i = 0; i < trec_prefs->num_text_prefs; i++) {
if (strcmp (jgid, prefs_and_ranks[i].jg)) {
/* New judgment group. Form previous JG and initialize coounts
for new JG */
if (num_sub_group > 1) {
/* Preference array JG */
jgs[jg_ind].num_ecs = 0; /* Indicator thet prefs_array used */
jgs[jg_ind].prefs_array = (PREFS_ARRAY) {pa_pool_ptr,
pa_ptr_pool_ptr,
num_judged};
pa_pool_ptr += num_judged * num_judged;
pa_ptr_pool_ptr += num_judged;
jgs[jg_ind].rel_array = rel_pool_ptr;
rel_pool_ptr += num_judged;
if (UNDEF == form_jg_pa (&prefs_and_ranks[start_jg],
i - start_jg,
&jgs[jg_ind],
results_prefs))
return (UNDEF);
}
else {
/* EC JG */
jgs[jg_ind].ecs = ec_pool_ptr;
ec_pool_ptr += num_rel_level;
jgs[jg_ind].num_ecs = num_rel_level;
if (UNDEF == form_jg_ec (&prefs_and_ranks[start_jg],
i - start_jg,
rank_pool_ptr,
&jgs[jg_ind],
results_prefs))
return (UNDEF);
rank_pool_ptr += i - start_jg;
}
jgid = prefs_and_ranks[i].jg;
jg_ind++;
jsgid = "";
start_jg = i;
num_sub_group = 0;
num_rel_level = 0;
rel_level = -3.0; /* Illegal rel_level */
}
if (strcmp (jsgid, prefs_and_ranks[i].jsg)) {
num_sub_group++;
jsgid = prefs_and_ranks[i].jsg;
}
if (rel_level != prefs_and_ranks[i].rel_level) {
num_rel_level++;
rel_level = prefs_and_ranks[i].rel_level;
}
}
/* Form last JG */
if (num_sub_group > 1) {
/* Preference array JG */
jgs[jg_ind].num_ecs = 0; /* Indicator thet prefs_array used */
jgs[jg_ind].prefs_array = (PREFS_ARRAY) {pa_pool_ptr,
pa_ptr_pool_ptr,
num_judged};
pa_pool_ptr += num_judged * num_judged;
pa_ptr_pool_ptr += num_judged;
jgs[jg_ind].rel_array = rel_pool_ptr;
rel_pool_ptr += num_judged;
if (UNDEF == form_jg_pa (&prefs_and_ranks[start_jg],
i - start_jg,
&jgs[jg_ind],
results_prefs))
return (UNDEF);
}
else {
/* EC JG */
jgs[jg_ind].ecs = ec_pool_ptr;
ec_pool_ptr += num_rel_level;
jgs[jg_ind].num_ecs = num_rel_level;
if (UNDEF == form_jg_ec (&prefs_and_ranks[start_jg],
i - start_jg,
rank_pool_ptr,
&jgs[jg_ind],
results_prefs))
return (UNDEF);
}
if (epi->debug_level >= 3)
debug_print_results_prefs (results_prefs);
return (1);
}
static int
form_jg_ec (const PREFS_AND_RANKS *prefs, const long num_prefs,
long *rank_pool_ptr, JG *jg, RESULTS_PREFS *results_prefs)
{
EC *ec_ptr =jg->ecs;
long *rank_ptr = rank_pool_ptr;
long i;
float rel_level;
/* Fill in prefs array with all known info from prefs */
/* prefs is sorted by jsg, then rel_level, then rank */
rel_level = prefs[0].rel_level;
*ec_ptr = (EC) {rel_level, 0, rank_ptr};
for (i = 0; i < num_prefs; i++) {
if (prefs[i].rel_level != rel_level) {
/* new equivalence class */
rel_level = prefs[i].rel_level;
rank_ptr += ec_ptr->num_in_ec;
ec_ptr++;
*ec_ptr = (EC) {prefs[i].rel_level, 0, rank_ptr};
}
ec_ptr->docid_ranks[ec_ptr->num_in_ec++] = prefs[i].rank;
}
/* Add counts of preference fulfilled and possible to jg and
summary counts */
if (UNDEF == add_ec_pref_to_jg (jg, results_prefs))
return (UNDEF);
return (1);
}
/* Add counts of preference fulfilled and possible to jg for EC pref info */
static int
add_ec_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs)
{
long num_judged_ret = results_prefs->num_judged_ret;
unsigned short **pc = results_prefs->pref_counts.array;
long *ptr1, *ptr2;
long ec1, ec2;
jg->num_prefs_fulfilled_ret = 0;
jg->num_prefs_possible_ret = 0;
jg->num_prefs_fulfilled_imp = 0;
jg->num_prefs_possible_imp = 0;
jg->num_prefs_possible_notoccur = 0;
jg->num_nonrel = 0;
jg->num_nonrel_ret = 0;
jg->num_rel = 0;
jg->num_rel_ret = 0;
/* Go through all ecs counting preferences, and setting up
prefs_count */
for (ec1 = 0; ec1 < jg->num_ecs; ec1++) {
/* Count num rel and ret */
for (ptr1 = jg->ecs[ec1].docid_ranks;
ptr1 < &jg->ecs[ec1].docid_ranks[jg->ecs[ec1].num_in_ec];
ptr1++) {
if (*ptr1 >= num_judged_ret)
break;
}
if (jg->ecs[ec1].rel_level > 0.0) {
jg->num_rel_ret += ptr1 - jg->ecs[ec1].docid_ranks;
jg->num_rel += jg->ecs[ec1].num_in_ec;
}
else {
jg->num_nonrel_ret += ptr1 - jg->ecs[ec1].docid_ranks;
jg->num_nonrel += jg->ecs[ec1].num_in_ec;
}
/* Count prefs */
for (ec2 = ec1 + 1; ec2 < jg->num_ecs; ec2++) {
for (ptr1 = jg->ecs[ec1].docid_ranks;
ptr1 < &jg->ecs[ec1].docid_ranks[jg->ecs[ec1].num_in_ec];
ptr1++) {
for (ptr2 = jg->ecs[ec2].docid_ranks;
ptr2 < &jg->ecs[ec2].docid_ranks[jg->ecs[ec2].num_in_ec];
ptr2++) {
/* Add pref to summary info */
pc[*ptr1][*ptr2]++;
/* check for inconsistency: same doc in multiple ec */
if (*ptr1 == *ptr2) {
fprintf (stderr,
"trec_eval.form_prefs_counts: Internal docid %ld occurs with different rel_level in same jsg\n", *ptr1);
return (UNDEF);
/* need to check this in pa also? */
}
/* Add count to appropriate jg_num* */
if (*ptr1 < *ptr2) {
/* judgment fulfilled */
if (*ptr2 < num_judged_ret)
jg->num_prefs_fulfilled_ret++;
else if (*ptr1 < num_judged_ret)
jg->num_prefs_fulfilled_imp++;
else
jg->num_prefs_possible_notoccur++;
}
else {
if (*ptr1 < num_judged_ret)
jg->num_prefs_possible_ret++;
else if (*ptr2 < num_judged_ret)
jg->num_prefs_possible_imp++;
else
jg->num_prefs_possible_notoccur++;
}
}
}
}
}
jg->num_prefs_possible_ret += jg->num_prefs_fulfilled_ret;
jg->num_prefs_possible_imp += jg->num_prefs_fulfilled_imp;
return (1);
}
static int
form_jg_pa (const PREFS_AND_RANKS *prefs, const long num_prefs,
JG *jg, RESULTS_PREFS *results_prefs)
{
long i,j;
PREFS_ARRAY *pa = &jg->prefs_array;
/* Initialize and zero prefs_array */
init_prefs_array(pa);
/* Initialize and set rel_level to -1. Will check for inconsistencies
(rel_level for some docid 0.0 and some > 0.0) as prefs handled */
for (i = 0; i < pa->num_judged; i++) {
jg->rel_array[i] = -1.0;
}
/* Fill in prefs array with all known info from prefs */
/* prefs is sorted by jsg, then rel_level, then rank */
for (i = 0; i < num_prefs; i++) {
/* check for consistency and add rel_level info */
if ((jg->rel_array[prefs[i].rank] > 0.0 &&
prefs[i].rel_level == 0.0) ||
(jg->rel_array[prefs[i].rank] == 0.0 &&
prefs[i].rel_level > 0.0)) {
fprintf (stderr,
"trec_eval.form_prefs_counts: doc '%s' has both 0 and non-0 rel_level assigned\n",
prefs[i].docno);
return (UNDEF);
}
jg->rel_array[prefs[i].rank] = prefs[i].rel_level;
/* Add preference for i to all other entries in this JSG with lower
rel_levels */
j = i+1;
/* Just skip over all docs at same rel_level */
while (j < num_prefs &&
0 == strcmp (prefs[i].jsg, prefs[j].jsg) &&
prefs[i].rel_level == prefs[j].rel_level)
j++;
/* Rest of JSG has lower rel_level */
while (j < num_prefs &&
0 == strcmp (prefs[i].jsg, prefs[j].jsg)) {
pa->array[prefs[i].rank][prefs[j].rank] = 1;
j++;
}
}
/* Add transitive preferences to pa */
if (UNDEF == add_transitives (pa))
return (UNDEF);
/* Add counts of preference fulfilled and possible to jg and
summary counts */
if (UNDEF == add_pa_pref_to_jg (jg, results_prefs))
return (UNDEF);
return (1);
}
static int
add_transitives(PREFS_ARRAY *pa)
{
PREFS_ARRAY m1;
PREFS_ARRAY m2;
long i, j;
PREFS_ARRAY *array_in, *array_out, *temp;
/* Need two temporary arrays of same size as pa. Reserve space and
zero out arrays */
if (NULL == (temp_pa_pool =
te_chk_and_malloc (temp_pa_pool, &max_temp_pa_pool,
2 * pa->num_judged * pa->num_judged,
sizeof (unsigned char))) ||
NULL == (temp_pa_ptr_pool =
te_chk_and_malloc (temp_pa_ptr_pool, &max_temp_pa_ptr_pool,
2 * pa->num_judged,
sizeof (unsigned char *))))
return (UNDEF);
m1 = (PREFS_ARRAY) {temp_pa_pool, temp_pa_ptr_pool, pa->num_judged};
m2 = (PREFS_ARRAY) {temp_pa_pool + pa->num_judged * pa->num_judged,
temp_pa_ptr_pool + pa->num_judged,
pa->num_judged};
if (pa->num_judged != saved_num_judged) {
/* if new size array, must reset row pointers */
saved_num_judged = pa->num_judged;
for (i = 0; i < pa->num_judged; i++) {
m1.array[i] = m1.full_array + i * pa->num_judged;
m2.array[i] = m2.full_array + i * pa->num_judged;
}
}
/* Add diagonal all ones in pa */
for (i = 0; i < pa->num_judged; i++) {
pa->array[i][i] = 1;
}
/* Multiply prefs_array by itself until there are no changes */
array_in = pa;
array_out = &m1;
if (mult_and_check_change (pa, array_in, array_out)) {
array_in = array_out;
array_out = &m2;
while (mult_and_check_change (pa, array_in, array_out)) {
temp = array_in;
array_in = array_out;
array_out = temp;
}
/* There were changes, now set pa values to those of array_out */
for (i = 0; i < pa->num_judged; i++) {
for (j = 0; j <pa-> num_judged; j++) {
pa->array[i][j] = array_out->array[i][j];
}
}
}
/* Reset all diagonals to 0 in pa */
for (i = 0; i < pa->num_judged; i++) {
pa->array[i][i] = 0;
}
/* Check for inconsistencies */
for (i = 0; i < pa->num_judged; i++) {
for (j = 0; j <pa-> num_judged; j++) {
if (i != j && pa->array[i][j] && pa->array[j][i]) {
fprintf (stderr, "trec_eval.form_prefs_counts: Pref inconsistency found\n internal rank %ld and internal rank %ld are conflicted\n", i, j);
abort();
return (-1);
}
}
}
return (1);
}
/* Add counts of preference fulfilled and possible to jg */
static int
add_pa_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs)
{
unsigned char **a = jg->prefs_array.array;
unsigned short **c = results_prefs->pref_counts.array;
long num_judged = results_prefs->num_judged;
long num_judged_ret = results_prefs->num_judged_ret;
long i,j;
jg->num_prefs_fulfilled_ret = 0;
jg->num_prefs_possible_ret = 0;
jg->num_prefs_fulfilled_imp = 0;
jg->num_prefs_possible_imp = 0;
jg->num_prefs_possible_notoccur = 0;
jg->num_nonrel = 0;
jg->num_nonrel_ret = 0;
jg->num_rel = 0;
jg->num_rel_ret = 0;
for (i = 0; i < num_judged_ret; i++) {
if (jg->rel_array[i] > 0.0)
jg->num_rel_ret++;
else if (jg->rel_array[i] == 0.0)
jg->num_nonrel_ret++;
}
jg->num_rel = jg->num_rel_ret;
jg->num_nonrel = jg->num_nonrel_ret;
for (i = num_judged_ret; i < num_judged; i++) {
if (jg->rel_array[i] > 0.0)
jg->num_rel++;
else if (jg->rel_array[i] == 0.0)
jg->num_nonrel++;
}
for (i = 0; i < num_judged_ret; i++) {
for (j = 0; j < i; j++) {
if (a[i][j]) {
/* Pref not fulfilled. Area A2 (see comment at top) */
c[i][j]++;
jg->num_prefs_possible_ret++;
}
}
for (j = i+1; j < num_judged_ret; j++) {
if (a[i][j]) {
/* Pref fulfilled. Area A1 (see comment at top) */
c[i][j]++;
jg->num_prefs_fulfilled_ret++;
}
}
for (j = num_judged_ret; j < num_judged; j++) {
if (a[i][j]) {
/* Pref fulfilled implied. Area A3 (see comment at top) */
c[i][j]++;
jg->num_prefs_fulfilled_imp++;
}
}
}
for (i = num_judged_ret; i < num_judged; i++) {
for (j = 0; j < num_judged_ret; j++) {
if (a[i][j]) {
/* Pref not fulfilled implied. Area A4 (see comment at top) */
c[i][j]++;
jg->num_prefs_possible_imp++;
}
}
for (j = num_judged_ret; j < num_judged; j++) {
if (a[i][j]) {
/* Pref not occur at all. Area A5 (see comment at top) */
c[i][j]++;
jg->num_prefs_possible_notoccur++;
}
}
}
jg->num_prefs_possible_ret += jg->num_prefs_fulfilled_ret;
jg->num_prefs_possible_imp += jg->num_prefs_fulfilled_imp;
return (1);
}
/* Multiply array a1 * array a2 and put result in array res.
Return (1) if array res is changed from array a2 after multiplication
else return 0 */
static int
mult_and_check_change (const PREFS_ARRAY *a1, const PREFS_ARRAY *a2, PREFS_ARRAY *res)
{
int change = 0;
long i, j, k;
for (i = 0; i < a1->num_judged; i++) {
for (j = 0; j < a1->num_judged; j++) {
res->array[i][j] = 0;
for (k = 0; k < a1->num_judged; k++) {
if (a1->array[i][k] && a2->array[k][j]) {
res->array[i][j] = 1;
break;
}
}
if (a2->array[i][j] != res->array[i][j]) change = 1;
}
}
return (change);
}
static int form_prefs_and_ranks (const EPI*epi,
const TEXT_RESULTS_INFO *text_results_info,
const TEXT_PREFS_INFO *trec_prefs,
PREFS_AND_RANKS *prefs_and_ranks,
long *num_judged, long *num_judged_ret)
{
long lnum_judged_ret; /* local num_judged_ret */
long next_unretrieved_rank;
long i;
long num_results; /* Current number of results (changes as docs
thrown away from docno_results) */
long num_prefs = trec_prefs->num_text_prefs;
PREFS_AND_RANKS *ranks_ptr, *end_ranks, *start_ptr;
/* Copy docno results and add ranks */
num_results = text_results_info->num_text_results;
if (NULL == (docno_results =
te_chk_and_malloc (docno_results, &max_docno_results,
num_results, sizeof (DOCNO_RESULTS))))
return (UNDEF);
for (i = 0; i < num_results; i++) {
docno_results[i].docno = text_results_info->text_results[i].docno;
docno_results[i].sim = text_results_info->text_results[i].sim;
}
/* Sort results by sim, breaking ties lexicographically using docno */
qsort ((char *) docno_results,
(int) num_results,
sizeof (DOCNO_RESULTS),
comp_sim_docno);
if (epi->debug_level >= 5)
debug_print_docno_results (docno_results, num_results,
"After input, before ranks");
/* Only look at epi->max_num_docs_per_topic (not normally an issue) */
if (num_results > epi->max_num_docs_per_topic)
num_results = epi->max_num_docs_per_topic;
/* Add ranks to docno_results (starting at 1) */
for (i = 0; i < num_results; i++) {
docno_results[i].rank = i+1;
}
/* Sort docno_results by increasing docno */
qsort ((char *) docno_results,
(int) num_results,
sizeof (DOCNO_RESULTS),
comp_docno);
/* Error checking for duplicates */
for (i = 1; i < num_results; i++) {
if (0 == strcmp (docno_results[i].docno,
docno_results[i-1].docno)) {
fprintf (stderr, "trec_eval.form_prefs_counts: duplicate docs %s",
docno_results[i].docno);
return (UNDEF);
}
}
if (epi->debug_level >= 5)
debug_print_docno_results (docno_results, num_results,
"After -M, ranks");
/* Copy trec_prefs - sort by docno. Space already reserved */
for (i = 0; i < num_prefs; i++) {
prefs_and_ranks[i].jg = trec_prefs->text_prefs[i].jg;
prefs_and_ranks[i].jsg = trec_prefs->text_prefs[i].jsg;
prefs_and_ranks[i].rel_level = trec_prefs->text_prefs[i].rel_level;
prefs_and_ranks[i].docno = trec_prefs->text_prefs[i].docno;
}
qsort ((char *) prefs_and_ranks,
(int) num_prefs,
sizeof (PREFS_AND_RANKS),
comp_prefs_and_ranks_docno);
if (epi->debug_level >= 5)
debug_print_prefs_and_ranks (prefs_and_ranks, num_prefs,
"Input, before ranks");
/* Go through docno_results and prefs_and_ranks in parallel (both sorted
by docno) and assign ranks of -1 to those docs in docno_results that
are not in prefs_and_ranks */
ranks_ptr = prefs_and_ranks;
end_ranks = &prefs_and_ranks[num_prefs];
for (i = 0; i < num_results && ranks_ptr < end_ranks; i++) {
while (ranks_ptr < end_ranks &&
strcmp (ranks_ptr->docno, docno_results[i].docno) < 0)
ranks_ptr++;
if (ranks_ptr < end_ranks &&
strcmp (ranks_ptr->docno, docno_results[i].docno) == 0) {
do {
ranks_ptr++;
} while (ranks_ptr < end_ranks &&
strcmp (ranks_ptr->docno, docno_results[i].docno) == 0);
}
else
/* dpcno_results[i] not judged */
docno_results[i].rank = -1;
}
/* sort docno_results[0..i] by increasing rank */
num_results = i;
qsort ((char *) docno_results,
(int) num_results,
sizeof (DOCNO_RESULTS),
comp_results_inc_rank);
if (epi->debug_level >= 5)
debug_print_docno_results (docno_results, num_results,
"After marking not judged");
/* Assign new docid ranks starting at 0 for only judged docs */
lnum_judged_ret = 0;
for (i = 0; i < num_results; i ++) {
if (docno_results[i].rank > -1) {
docno_results[lnum_judged_ret].docno = docno_results[i].docno;
docno_results[lnum_judged_ret].rank = lnum_judged_ret;
lnum_judged_ret++;
}
}
num_results = lnum_judged_ret;
/* Sort docno_results by increasing docno */
qsort ((char *) docno_results,
(int) num_results,
sizeof (DOCNO_RESULTS),
comp_docno);
if (epi->debug_level >= 5)
debug_print_docno_results (docno_results, num_results,
"After assigning docid_ranks");
/* Go through reduced docno_results and prefs_and_ranks in parallel and
assign ranks in prefs_and_ranks from docno_results. Note all docnos
in docno_results now guaranteed to be in prefs_and_ranks */
ranks_ptr = prefs_and_ranks;
end_ranks = &prefs_and_ranks[num_prefs];
next_unretrieved_rank = num_results;
for (i = 0; i < num_results; i++) {
while (strcmp (ranks_ptr->docno, docno_results[i].docno) < 0) {
ranks_ptr->rank = next_unretrieved_rank++;
start_ptr = ranks_ptr++;
while (strcmp (ranks_ptr->docno, start_ptr->docno) == 0) {
ranks_ptr->rank = start_ptr->rank;
ranks_ptr++;
}
}
ranks_ptr->rank = docno_results[i].rank;
start_ptr = ranks_ptr++;
while (ranks_ptr < end_ranks &&
strcmp (ranks_ptr->docno, start_ptr->docno) == 0) {
ranks_ptr->rank = start_ptr->rank;
ranks_ptr++;
}
}
while (ranks_ptr < end_ranks) {
ranks_ptr->rank = next_unretrieved_rank++;
start_ptr = ranks_ptr++;
while (ranks_ptr < end_ranks &&
strcmp (ranks_ptr->docno, start_ptr->docno) == 0) {
ranks_ptr->rank = start_ptr->rank;
ranks_ptr++;
}
}
/* Now sort prefs_and_ranks by jg, jsg, rel_level, docid_rank */
qsort ((void *) prefs_and_ranks,
num_prefs,
sizeof (PREFS_AND_RANKS),
comp_prefs_and_ranks_jg_rel_level);
if (epi->debug_level >= 4) {
printf ("Form_prefs: num_judged %ld, num_judged_ret %ld\n",
next_unretrieved_rank, num_results);
debug_print_prefs_and_ranks (prefs_and_ranks, num_prefs,
"Final prefs");
}
*num_judged = next_unretrieved_rank;
*num_judged_ret = num_results;
return (1);
}
static void
init_prefs_array (PREFS_ARRAY *pa)
{
unsigned char *row_ptr;
long i;
(void) memset ((void *) pa->full_array, 0,
pa->num_judged * pa->num_judged *
sizeof (unsigned char));
row_ptr = pa->full_array;
for (i = 0; i < pa->num_judged; i++) {
pa->array[i] = row_ptr;
row_ptr += pa->num_judged;
}
}
static void
init_counts_array (COUNTS_ARRAY *ca)
{
unsigned short *row_ptr;
long i;
(void) memset ((void *) ca->full_array, 0,
ca->num_judged * ca->num_judged *
sizeof (unsigned short));
row_ptr = ca->full_array;
for (i = 0; i < ca->num_judged; i++) {
ca->array[i] = row_ptr;
row_ptr += ca->num_judged;
}
}
static int
comp_prefs_and_ranks_docno (PREFS_AND_RANKS *ptr1, PREFS_AND_RANKS *ptr2)
{
return (strcmp (ptr1->docno, ptr2->docno));
}
static int
comp_prefs_and_ranks_jg_rel_level (PREFS_AND_RANKS *ptr1, PREFS_AND_RANKS *ptr2)
{
int jg_comp = strcmp (ptr1->jg, ptr2->jg);
if (jg_comp != 0) return (jg_comp);
jg_comp = strcmp (ptr1->jsg, ptr2->jsg);
if (jg_comp != 0) return (jg_comp);
if (ptr1->rel_level > ptr2->rel_level) return (-1);
if (ptr1->rel_level < ptr2->rel_level) return (1);
return (ptr1->rank - ptr2->rank);
}
static int
comp_sim_docno (ptr1, ptr2)
DOCNO_RESULTS *ptr1;
DOCNO_RESULTS *ptr2;
{
if (ptr1->sim > ptr2->sim)
return (-1);
if (ptr1->sim < ptr2->sim)
return (1);
return (strcmp (ptr1->docno, ptr2->docno));
}
static int
comp_docno (DOCNO_RESULTS *ptr1, DOCNO_RESULTS *ptr2)
{
return (strcmp (ptr1->docno, ptr2->docno));
}
static int
comp_results_inc_rank (DOCNO_RESULTS *ptr1, DOCNO_RESULTS *ptr2)
{
return (ptr1->rank - ptr2->rank);
}
static void
debug_print_prefs_and_ranks (PREFS_AND_RANKS *par, long num_prefs,
char *location)
{
long i;
printf ("Prefs_and_ranks Dump. num_pref_lines %ld, %s\n",
num_prefs, location);
for (i = 0; i < num_prefs; i++) {
printf (" %s\t%s\t%4.2f\t%s\t%3ld\n",
par[i].jg, par[i].jsg, par[i].rel_level, par[i].docno,
par[i].rank);
}
fflush (stdout);
}
static void
debug_print_docno_results (DOCNO_RESULTS *dr, long num_results,
char *location)
{
long i;
printf ("Docno_results Dump. num_results %ld, %s\n",
num_results, location);
for (i = 0; i < num_results; i++) {
printf (" %s\t%4.2f\t%3ld\n",
dr[i].docno, dr[i].sim, dr[i].rank);
}
fflush (stdout);
}
static void
debug_print_ec (EC *ec) {
long i;
printf (" EC Dump. Rel_level %4.2f. Num_docid_ranks %ld",
ec->rel_level, ec->num_in_ec);
for (i = 0; i < ec->num_in_ec; i++) {
if (0 == (i % 10))
printf ("\n ");
printf ("%3ld ", ec->docid_ranks[i]);
}
putchar ('\n');
fflush (stdout);
}
static void
debug_print_prefs_array (PREFS_ARRAY *pa) {
long i,j;
printf (" Prefs_Array Dump. Num_judged %ld", pa->num_judged);
for (i = 0; i < pa->num_judged; i++) {
printf ("\n Row %3ld\n ", i);
for (j = 0; j < pa->num_judged; j++) {
if (j && 0 == (j % 20))
printf (" (%ld)\n ", j);
printf ("%2hhd", pa->array[i][j]);
}
}
putchar ('\n');
fflush (stdout);
}
static void
debug_print_counts_array (COUNTS_ARRAY *ca) {
long i,j;
printf (" Counts_Array Dump. Num_judged %ld", ca->num_judged);
for (i = 0; i < ca->num_judged; i++) {
printf ("\n Row %3ld\n ", i);
for (j = 0; j < ca->num_judged; j++) {
if (j && 0 == (j % 20))
printf (" (%ld)\n ", j);
printf ("%2hd ", ca->array[i][j]);
}
}
putchar ('\n');
fflush (stdout);
}
static void
debug_print_jg (JG *jg) {
long i;
printf (" JG Dump. Type %s\n", jg->num_ecs > 0 ? "EC":"Prefs_array");
printf (" num_prefs_fulfilled_ret %ld\n", jg->num_prefs_fulfilled_ret);
printf (" num_prefs_possible_ret %ld\n", jg->num_prefs_possible_ret);
printf (" num_prefs_fulfilled_imp %ld\n", jg->num_prefs_fulfilled_imp);
printf (" num_prefs_possible_imp %ld\n", jg->num_prefs_possible_imp);
printf (" num_prefs_possible_notoccur %ld\n", jg->num_prefs_possible_notoccur);
printf (" num_nonrel %ld\n", jg->num_nonrel);
printf (" num_nonrel_ret %ld\n", jg->num_nonrel_ret);
printf (" num_rel %ld\n", jg->num_rel);
printf (" num_rel_ret %ld\n", jg->num_rel_ret);
if (0 == jg->num_ecs && jg->rel_array) {
debug_print_prefs_array (&jg->prefs_array);
printf (" Rel_array Dump. %ld values",
jg->prefs_array.num_judged);
for (i = 0; i < jg->prefs_array.num_judged; i++) {
if (0 == (i % 10))
printf ("\n ");
printf ("%4.2f ", jg->rel_array[i]);
}
putchar ('\n');
}
else if (0 == jg->num_ecs)
printf (" JG is not initialized (0 ECs and no rel_array\n");
else {
printf (" Dump of %ld ECs within JG\n", jg->num_ecs);
for (i = 0; i < jg->num_ecs; i++)
debug_print_ec (&jg->ecs[i]);
}
fflush (stdout);
}
static void
debug_print_results_prefs (RESULTS_PREFS *rp) {
long i;
printf ("Results_prefs Dump. %ld Judgment Groups\n", rp->num_jgs);
printf (" num_judged_ret %ld, num_judged %ld\n",
rp->num_judged_ret, rp->num_judged);
for (i = 0; i < rp->num_jgs; i++)
debug_print_jg (&rp->jgs[i]);
debug_print_counts_array (&rp->pref_counts);
}
int
te_form_pref_counts_cleanup ()
{
if (max_current_query > 0) {
Free (current_query);
max_current_query = 0;
current_query = "no_query";
}
if (max_num_jgs > 0) {
Free (jgs);
max_num_jgs = 0;
}
if (max_rank_pool > 0) {
Free (rank_pool);
max_rank_pool = 0;
}
if (max_ec_pool > 0) {
Free (ec_pool);
max_ec_pool = 0;
}
if (max_ca_pool > 0) {
Free (ca_pool);
max_ca_pool = 0;
}
if (max_ca_ptr_pool > 0) {
Free (ca_ptr_pool);
max_ca_ptr_pool = 0;
}
if (max_pa_pool > 0) {
Free (pa_pool);
max_pa_pool = 0;
}
if (max_pa_ptr_pool > 0) {
Free (pa_ptr_pool);
max_pa_ptr_pool = 0;
}
if (max_rel_pool > 0) {
Free (rel_pool);
max_rel_pool = 0;
}
if (max_prefs_and_ranks > 0) {
Free (prefs_and_ranks);
max_prefs_and_ranks = 0;
}
if (max_docno_results > 0) {
Free (docno_results);
max_docno_results = 0;
}
if (max_temp_pa_pool > 0) {
Free (temp_pa_pool);
max_temp_pa_pool = 0;
}
if (max_temp_pa_ptr_pool > 0) {
Free (temp_pa_ptr_pool);
max_temp_pa_ptr_pool = 0;
}
return (1);
}