-
Notifications
You must be signed in to change notification settings - Fork 25
/
benchmarking.h
321 lines (242 loc) · 7.74 KB
/
benchmarking.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#pragma once
/* Select a dataset below by uncommenting it.
Then modify the file location and parameters below in the Parameters section. */
//#define SIFT1M
//#define URL
#define WEBSPAM_TRI
//#define KDD12
/* Parameters. */
#if defined SIFT1M
#define DENSE_DATASET
#define NUMHASHBATCH 100
#define BATCHPRINT 10
#define RANGE_POW 22
#define RANGE_ROW_U 18
#define SAMFACTOR 24
#define NUMTABLES 512
#define RESERVOIR_SIZE 32
#define OCCUPANCY 0.4
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 128
#define FULL_DIMENSION 128
#define NUMQUERY 10000
#define NUMBASE 1000000
#define MAX_RESERVOIR_RAND 100000
#define AVAILABLE_TOPK 1000
#define TOPK 128
#define BASEFILE "../files/datasets/sift1m/sift_base.fvecs"
#define QUERYFILE "../files/datasets/sift1m/sift_query.fvecs"
#define GTRUTHINDICE "../files/datasets/sift1m/sift1m_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/sift1m/sift1m_gtruth_distances.txt"
#elif defined URL
#define SPARSE_DATASET
#define NUMHASHBATCH 200
#define BATCHPRINT 10
#define K 4
#define RANGE_POW 15
#define RANGE_ROW_U 15
#define NUMTABLES 128
#define RESERVOIR_SIZE 32
#define OCCUPANCY 1
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 120
#define FULL_DIMENSION 3231961
#define NUMBASE 2386130
#define MAX_RESERVOIR_RAND 2386130
#define NUMQUERY 10000
#define TOPK 128
#define AVAILABLE_TOPK 1024
#define NUMQUERY 10000
#define AVAILABLE_TOPK 1024
#define TOPK 128
#define BASEFILE "../files/datasets/url/url_combined"
#define QUERYFILE "../files/datasets/url/url_combined"
#define GTRUTHINDICE "../files/datasets/url/url_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/url/url_gtruth_distances.txt"
#elif defined WEBSPAM_TRI
#define SPARSE_DATASET
#define NUMHASHBATCH 50
#define BATCHPRINT 5
#define K 4
#define RANGE_POW 15
#define RANGE_ROW_U 15
#define NUMTABLES 32
#define RESERVOIR_SIZE 64
#define OCCUPANCY 1
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 4000
#define FULL_DIMENSION 16609143
#define NUMBASE 340000
#define MAX_RESERVOIR_RAND 35000
#define NUMQUERY 10000
#define TOPK 128
#define AVAILABLE_TOPK 1024
#define NUMQUERY 10000
#define AVAILABLE_TOPK 1024
#define TOPK 128
#define BASEFILE "../files/datasets/webspam/trigram.svm"
#define QUERYFILE "../files/datasets/webspam/trigram.svm"
#define GTRUTHINDICE "../files/datasets/webspam/webspam_tri_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/webspam/webspam_tri_gtruth_distances.txt"
#elif defined KDD12
#define SPARSE_DATASET
#define NUMHASHBATCH 20000
#define BATCHPRINT 2000
#define K 4
#define RANGE_POW 20
#define RANGE_ROW_U 20
#define NUMTABLES 8
#define RESERVOIR_SIZE 64
#define OCCUPANCY 1
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 12
#define FULL_DIMENSION 54686452
#define NUMBASE 149629105
#define MAX_RESERVOIR_RAND 149629105
#define NUMQUERY 10000
#define TOPK 128
#define AVAILABLE_TOPK 1024
#define BASEFILE "../files/datasets/kdd2012/kdd12"
#define QUERYFILE "../files/datasets/kdd2012/kdd12"
#define GTRUTHINDICE "../files/datasets/kdd2012/kdd12_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/kdd2012/kdd12_gtruth_distances.txt"
#elif defined FRIENDSTER
#define SPARSE_DATASET
#define NUMHASHBATCH 10000
#define BATCHPRINT 500
#define K 2
#define RANGE_POW 20
#define RANGE_ROW_U 20
#define NUMTABLES 32
#define RESERVOIR_SIZE 128
#define OCCUPANCY 1
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 30
#define FULL_DIMENSION 65608366
#define NUMBASE 65598366
#define MAX_RESERVOIR_RAND 65608366
#define NUMQUERY 10000
#define TOPK 128
#define AVAILABLE_TOPK 1024
#define BASEFILE "../files/datasets/friendster/friendster.svm"
#define QUERYFILE "../files/datasets/friendster/friendster.svm"
#define GTRUTHINDICE "../files/datasets/friendster/friendster_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/friendster/friendster_gtruth_distances.txt"
#elif defined WEBSPAM_UNI
#define SPARSE_DATASET
#define NUMHASHBATCH 10
#define BATCHPRINT 2
#define K 4
#define RANGE_POW 12
#define RANGE_ROW_U 12
#define NUMTABLES 32
#define RESERVOIR_SIZE 64
#define OCCUPANCY 1
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 254
#define FULL_DIMENSION 254
#define NUMBASE 10000
#define MAX_RESERVOIR_RAND 10000
#define NUMQUERY 100
#define TOPK 128
#define AVAILABLE_TOPK 128
#define BASEFILE "../files/datasets/webspam/unigram.svm"
#define QUERYFILE "../files/datasets/webspam/unigram.svm"
#define GTRUTHINDICE "../files/datasets/webspam_uni_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/webspam_uni_gtruth_distances.txt"
#elif defined(SIFT10M)
#define DENSE_DATASET
#define NUMHASHBATCH 1000
#define BATCHPRINT 100
#define RANGE_POW 22
#define RANGE_ROW_U 18
#define SAMFACTOR 24
#define NUMTABLES 512
#define RESERVOIR_SIZE 32
#define OCCUPANCY 0.4
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 128
#define FULL_DIMENSION 128
#define NUMQUERY 10000
#define NUMBASE 10000000
#define MAX_RESERVOIR_RAND 100000
#define AVAILABLE_TOPK 1000
#define TOPK 128
#define BASEFILE "../files/datasets/sift1b/bigann_base.bvecs"
#define QUERYFILE "../files/datasets/sift1b/bigann_query.bvecs"
#define GTRUTHINDICE "../files/datasets/sift1b/sift10m_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/sift1b/sift10m_gtruth_distances.txt"
#elif defined SIFT1B
#define DENSE_DATASET
#define NUMHASHBATCH 100000
#define BATCHPRINT 10000
#define RANGE_POW 25
#define RANGE_ROW_U 22
#define SAMFACTOR 24
#define NUMTABLES 512
#define RESERVOIR_SIZE 32
#define OCCUPANCY 0.4
#define QUERYPROBES 1
#define HASHINGPROBES 1
#define DIMENSION 128
#define FULL_DIMENSION 128
#define NUMQUERY 10000
#define NUMBASE 10000000
#define MAX_RESERVOIR_RAND 100000
#define AVAILABLE_TOPK 100
#define TOPK 64
#define BASEFILE "../files/datasets/sift1b/bigann_base.bvecs"
#define QUERYFILE "../files/datasets/sift1b/bigann_query.bvecs"
#define GTRUTHINDICE "../files/datasets/sift1b/sift1b_gtruth_indices.txt"
#define GTRUTHDIST "../files/datasets/sift1b/sift1b_gtruth_distances.txt"
#elif defined SIFTSMALLTEST
#define DENSE_DATASET
#define NUMHASHBATCH 10
#define BATCHPRINT 5
#define RANGE_POW 12
#define RANGE_ROW_U 12
#define RESERVOIR_SIZE 16
#define NUMTABLES 32
#define DIMENSION 128
#define FULL_DIMENSION 128
#define NUMBASE 10000
#define MAX_RESERVOIR_RAND 10000
#define QUERYPROBES 2
#define HASHINGPROBES 1
#define SAMFACTOR 24
#define OCCUPANCY 0.4
#define NUMQUERY 100
#define AVAILABLE_TOPK 100
#define TOPK 64
#define BASEFILE "siftsmall_base.fvecs"
#define QUERYFILE "siftsmall_query.fvecs"
#define GTRUTHINDICE "siftsmall_gtruth_indices.txt"
#define GTRUTHDIST "siftsmall_gtruth_distances.txt"
#endif
void benchmark_kselect();
void benchmark_naiverp(int RANDPROJ_COMPRESS);
void benchmark_paragrid();
void benchmark_bruteforce();
void benchmark_ava();
void benchmark_friendster_quality();
void benchmark_sparse();
void benchmark_dense();
void benchmark_doph(int TEST_DOPH);
void benchmark_smartrp(int SMART_RP);
#if !defined (DENSE_DATASET)
#define SAMFACTOR 24 // DUMMY.
#endif
#if !defined (SPARSE_DATASET)
#define K 10 // DUMMY
#endif
//#define FRIENDSTER
//#define SIFTSMALLTEST
//#define SIFT10M
//#define SIFT1B