34 #define log2(x) (log(x)/log(2))
116 yaz_log(YLOG_LOG,
"Normalizing: Client %d uses '%s'", n->
num, n->
native_score );
128 yaz_log(YLOG_LOG,
"Normalizing: Client %d uses '%s' = %d",
160 yaz_log(YLOG_LOG,
"Got score for %d/%d : %f ",
163 if ( norm->
count == 1 )
181 for ( ; rp; rp = rp->
next )
184 double normscore = rp->
score * a + b;
185 double diff = target - normscore;
194 const int maxiterations = 1000;
195 const double enough = 100.0;
196 const double stepchange = 0.5;
199 for ( norm = rel->
norm; norm; norm = norm->
next )
201 yaz_log(YLOG_LOG,
"Normalizing client %d: scorefield=%d count=%d range=%f %f = %f",
209 double range = norm->
max - norm->
min;
229 b = -1.0 * norm->
min / range;
234 yaz_log(YLOG_LOG,
"Initial done: it=%d: a=%f / %f b=%f / %f chi = %f",
235 0,
a, as,
b, bs, chi );
236 while (it++ < maxiterations)
242 double prevchi = chi;
243 if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus)
247 as = as * (1.0 + stepchange);
250 else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus)
254 as = as * (1.0 + stepchange);
257 else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus)
261 bs = bs * (1.0 + stepchange);
264 else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus)
269 bs = bs * (1.0+stepchange);
275 double adif = 0.5 * ( aplus + aminus ) - prevchi;
276 double bdif = 0.5 * ( bplus + bminus ) - prevchi;
277 if ( fabs(adif) > fabs(bdif) )
279 as = as * ( 1.0 - stepchange);
284 bs = bs * ( 1.0 - stepchange);
288 yaz_log(YLOG_LOG,
"Fitting %s it=%d: a=%g %g b=%g %g chi=%g ap=%g am=%g, bp=%g bm=%g p=%g",
289 branch, it,
a, as,
b, bs, chi,
290 aplus, aminus, bplus, bminus, prevchi );
293 if ( fabs(as) * enough < fabs(
a) &&
294 fabs(bs) * enough < fabs(
b) ) {
299 yaz_log(YLOG_LOG,
"Fitting done: it=%d: a=%g / %g b=%g / %g chi = %g",
300 it-1,
a, as,
b, bs, chi );
306 for ( ; nr ; nr = nr->
next ) {
307 double r = nr->
score;
308 r = norm->
a * r + norm -> b;
311 yaz_log(YLOG_LOG,
"Normalized %f * %f + %f = %f",
312 nr->
score, norm->
a, norm->
b, r );
323 const char *rank,
int *weight)
327 for (; entries; entries = entries->
next, i++)
333 sscanf(rank,
"%d%n", weight, &no_read);
337 if (no_read > 0 && (cp = strchr(rank,
' ')))
339 if ((cp - rank) == strlen(entries->
ccl_field) &&
340 memcmp(entries->
ccl_field, rank, cp - rank) == 0)
341 *weight = atoi(cp + 1);
350 const char *words,
const char *
name,
360 size_t org_start, org_len;
365 for (; entries; entries = entries->
next, i++)
375 wrbuf_puts(w_snippet,
"<match>");
384 wrbuf_puts(w_snippet,
"</match>");
387 wrbuf_xmlputs_n(w_snippet, words + org_start, org_len);
390 wrbuf_puts(w_snippet,
"</match>");
393 yaz_log(YLOG_DEBUG,
"SNIPPET match: %s", wrbuf_cstr(w_snippet));
399 const char *words,
const char *rank,
408 int printed_about_field = 0;
411 for (e = r->
entries, i = 1; i < r->vec_len; i++, e = e->
next)
420 int local_weight = 0;
427 if (!printed_about_field)
429 printed_about_field = 1;
430 wrbuf_printf(wr,
"field=%s content=",
name);
431 if (strlen(words) > 50)
433 wrbuf_xmlputs_n(wr, words, 49);
434 wrbuf_puts(wr,
" ...");
437 wrbuf_xmlputs(wr, words);
438 wrbuf_puts(wr,
";\n");
440 assert(res < r->vec_len);
441 w[res] += local_weight / (1 + log2(1 + lead_decay * length));
442 wrbuf_printf(wr,
"%s: w[%d] += w(%d) / "
443 "(1+log2(1+lead_decay(%f) * length(%d)));\n",
444 e->
display_str, res, local_weight, lead_decay, length);
448 int d = length + 1 - r->
term_pos[j];
449 wrbuf_printf(wr,
"%s: w[%d] += w[%d](%d) * follow(%f) / "
455 for (j = 0; j < r->
vec_len; j++)
456 r->
term_pos[j] = j < res ? 0 : length + 1;
461 for (e = r->
entries, i = 1; i < r->vec_len; i++, e = e->
next)
463 if (length == 0 || w[i] == 0)
465 wrbuf_printf(wr,
"%s: tf[%d] += w[%d](%d)", e->
display_str, i, i, w[i]);
472 wrbuf_printf(wr,
" / log2(1+length(%d))", length);
474 (double) w[i] / log2(1 + length);
477 wrbuf_printf(wr,
" / length(%d)", length);
504 nmem_strsplit(res->
nmem,
" ", n->u.t.term, &words, &numwords);
505 for (i = 0; i < numwords; i++)
517 *e = nmem_malloc(res->
nmem,
sizeof(**e));
521 (*e)->display_str = nmem_strdup(res->
nmem, words[i]);
535 for (i = 0; i < r->
vec_len; i++)
541 struct ccl_rpn_node *query,
546 NMEM
nmem = nmem_create();
565 nmem_malloc(res->
nmem,
580 nmem_destroy((*rp)->nmem);
590 for (i = 0; i < r->
vec_len; i++)
593 for (i = 0; i < r->
vec_len; i++)
605 for (i = 0; i < r->
vec_len; i++)
612 for (i = 0; i < r->
vec_len; i++)
620 for (i = 1; i < r->
vec_len; i++)
633 float *idfvec = xmalloc(rel->
vec_len *
sizeof(
float));
638 for (i = 1; i < rel->
vec_len; i++)
660 wrbuf_puts(w,
"relevance = 0;\n");
661 for (i = 1; i < rel->
vec_len; i++)
664 int add = 100000 * termfreq * idfvec[i];
666 wrbuf_printf(w,
"idf[%d] = log(((1 + total(%d))/termoccur(%d));\n",
669 wrbuf_printf(w,
"%s: relevance += 100000 * tf[%d](%f) * "
670 "idf[%d](%f) (%d);\n",
678 int cluster_size = 0;
683 wrbuf_printf(w,
"score = relevance(%d)/cluster_size(%d);\n",
689 wrbuf_printf(w,
"score = relevance(%d);\n",
relevance);
const char * pp2_charset_token_next(pp2_charset_token_t prt)
pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft, const char *id)
void pp2_get_org(pp2_charset_token_t prt, size_t *start, size_t *len)
void pp2_charset_token_first(pp2_charset_token_t prt, const char *buf, int skip_article)
void pp2_charset_token_destroy(pp2_charset_token_t prt)
struct session * client_get_session(struct client *cl)
struct session_database * client_get_database(struct client *cl)
int conf_service_metadata_field_id(struct conf_service *service, const char *name)
void reclist_enter(struct reclist *l)
void reclist_leave(struct reclist *l)
struct record_cluster * reclist_read_record(struct reclist *l)
void reclist_rewind(struct reclist *l)
struct relevance * relevance_create_ccl(pp2_charset_fact_t pft, struct ccl_rpn_node *query, int rank_cluster, double follow_factor, double lead_decay, int length_divide)
void relevance_clear(struct relevance *r)
const int scorefield_position
static void normalize_scores(struct relevance *rel)
const int scorefield_internal
static void pull_terms(struct relevance *res, struct ccl_rpn_node *n)
void relevance_donerecord(struct relevance *r, struct record_cluster *cluster)
void relevance_destroy(struct relevance **rp)
void relevance_newrec(struct relevance *r, struct record_cluster *rec)
const int scorefield_none
static double squaresum(struct norm_record *rp, double a, double b)
int relevance_snippet(struct relevance *r, const char *words, const char *name, WRBUF w_snippet)
void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, const char *rank, const char *name)
void relevance_mergerec(struct relevance *r, struct record_cluster *dst, const struct record_cluster *src)
static void setup_norm_record(struct relevance *rel, struct record_cluster *clust)
static struct word_entry * word_entry_match(struct relevance *r, const char *norm_str, const char *rank, int *weight)
void relevance_prepare_read(struct relevance *rel, struct reclist *reclist)
struct norm_client * findnorm(struct relevance *rel, struct client *client)
const char * session_setting_oneval(struct session_database *db, int offset)
Represents client state for a connection to one search target.
struct norm_client * next
struct norm_record * records
const char * native_score
struct record_cluster * clust
struct norm_record * next
float * term_frequency_vecf
struct record_metadata ** metadata
struct norm_client * norm
struct word_entry * entries
int * term_frequency_vec_tmp
struct conf_service * service