-
Notifications
You must be signed in to change notification settings - Fork 0
/
avb2.java
141 lines (119 loc) · 4.21 KB
/
avb2.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// BSD License (http://lemurproject.org/galago-license)
package org.lemurproject.galago.core.retrieval.iterator.scoring;
import org.lemurproject.galago.core.retrieval.RequiredParameters;
import org.lemurproject.galago.core.retrieval.RequiredStatistics;
import org.lemurproject.galago.core.retrieval.iterator.CountIterator;
import org.lemurproject.galago.core.retrieval.iterator.DeltaScoringIterator;
import org.lemurproject.galago.core.retrieval.iterator.LengthsIterator;
import org.lemurproject.galago.core.retrieval.iterator.ScoringFunctionIterator;
import org.lemurproject.galago.core.retrieval.processing.ScoringContext;
import org.lemurproject.galago.core.retrieval.query.AnnotatedNode;
import org.lemurproject.galago.core.retrieval.query.NodeParameters;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*
* @author irmarc
*/
@RequiredStatistics(statistics = {"collectionLength", "documentCount", "nodeFrequency", "nodeDocumentCount", "maximumCount"})
@RequiredParameters(parameters = {"b", "k"})
public class avb2 extends ScoringFunctionIterator implements DeltaScoringIterator {
// delta
private final double weight;
private final double min;
private final double max;
private final double weightedMax;
private final double weightedMin;
private final double weightedMaxDiff;
// scoring
private final double b;
private final double k;
private final long documentCount;
private final double avgDocLength;
private final double idf;
private final long collectionFrequency;
public avb2(NodeParameters np, LengthsIterator ls, CountIterator it)
throws IOException {
super(np, ls, it);
// statistics for BM25 scoring
b = np.get("b", 0.75);
k = np.get("k", 1.2);
double collectionLength = np.getLong("collectionLength");
collectionFrequency = np.getLong("nodeFrequency");
documentCount = np.getLong("documentCount");
avgDocLength = (collectionLength + 0.0) / (documentCount + 0.0);
// now get idf
long df = np.getLong("nodeDocumentCount");
// I'm not convinced this is the correct idf formulation -- MAC
//idf = Math.log((documentCount - df + 0.5) / (df + 0.5));
idf = Math.log(documentCount / (df + 0.5));
// Delta scoring stuff
weight = np.get("w", 1.0);
max = score(np.getLong("maximumCount"), np.getLong("maximumCount"));
min = score(0, 1);
weightedMin = weight * min;
weightedMax = weight * max;
weightedMaxDiff = weightedMax - weightedMin;
}
@Override
public double minimumScore() {
return min;
}
@Override
public double maximumScore() {
return max;
}
@Override
public double getWeight() {
return weight;
}
@Override
public double maximumDifference() {
return weightedMaxDiff;
}
@Override
public double maximumWeightedScore() {
return weightedMax;
}
@Override
public double minimumWeightedScore() {
return weightedMin;
}
@Override
public double deltaScore(ScoringContext c) {
double diff = weight * (max - score(c));
return diff;
}
/**
* Scoring function interface (allows direct scoring)
*
* @return
*/
@Override
public double score(ScoringContext c) {
double count = ((CountIterator) iterator).count(c);
double length = this.lengthsIterator.length(c);
return score(count, length);
}
private double score(double count, double length) {
double numerator=1 + log(1 + count);
double denominator = 1 - b + (b * length / avgDocLength);
logg = log(( documentCount + 1) / (df));
return count*(numerator/denominator)*(logg);
}
@Override
public AnnotatedNode getAnnotatedNode(ScoringContext c) throws IOException {
String type = "avb2";
String className = this.getClass().getSimpleName();
String parameters = np.toString();
long document = currentCandidate();
boolean atCandidate = hasMatch(c);
String returnValue = Double.toString(score(c));
List<AnnotatedNode> children = new ArrayList<AnnotatedNode>();
children.add(this.lengthsIterator.getAnnotatedNode(c));
children.add(this.countIterator.getAnnotatedNode(c));
String extraInfo = "idf="+idf;
return new AnnotatedNode(type, className, parameters, document, atCandidate, returnValue, extraInfo, children);
}
}