Skip to content

Commit

Permalink
orama: test different parameter #407
Browse files Browse the repository at this point in the history
for now PT15 seems to generate the best results for the exampleSite
  • Loading branch information
McShelby committed Oct 27, 2024
1 parent 6a20332 commit f986a7d
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 11 deletions.
2 changes: 1 addition & 1 deletion layouts/partials/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
7.1.1+57b73a5f47d69d695fba57ec966ae7dd25400a66
7.1.1+6a20332b518af28c3c59ab45eced9979246ff0a6
59 changes: 49 additions & 10 deletions static/js/orama-adapter.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import {
create,
search as oramaSearch,
insertMultiple,
} from "https://cdn.jsdelivr.net/npm/@orama/orama@latest/+esm";
// "https://unpkg.com/browse/@orama/orama@latest/dist/esm/index.js";
// https://cdn.jsdelivr.net/npm/@orama/[email protected]/dist/esm/index.js
import { create, search as oramaSearch, insertMultiple } from "https://cdn.jsdelivr.net/npm/@orama/orama@latest/+esm";
import { pluginQPS } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-qps@latest/+esm'
import { pluginPT15 } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-pt15@latest/+esm'
//import { pluginEmbeddings } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-embeddings@latest/+esm'
//import * as tf from 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-core';
//import 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-webgl';


//import { createTokenizer } from '@orama/tokenizers/japanese'
Expand All @@ -14,15 +13,38 @@ let searchEngine = null;

async function init() {
async function initIndex( index ){
/*
const embeddings = await pluginEmbeddings({
embeddings: {
// Property used to store generated embeddings. Must be defined in the schema.
defaultProperty: 'embeddings',
onInsert: {
// Generate embeddings at insert-time.
// Turn off if you're inserting documents with embeddings already generated.
generate: true,
// Properties to use for generating embeddings at insert time.
// These properties will be concatenated and used to generate embeddings.
properties: ['description'],
verbose: true,
}
}
});
*/
searchEngine = await create({
schema: {
title: 'string',
content: 'string',
uri: 'string',
uri: 'string',
breadcrumb: 'string',
description: 'string',
tags: 'string[]',
// embeddings: 'vector[1]'
},
plugins: [
// embeddings,
// pluginQPS()
pluginPT15()
],
/*
defaultLanguage: 'french',
components: {
Expand Down Expand Up @@ -53,8 +75,25 @@ async function init() {
}

async function search( term ){
const searchResponse = await oramaSearch(searchEngine, {term: term, properties: '*'});
return searchResponse.hits.map( hit => ({ page: hit.document }) );
const searchResponse = await oramaSearch(searchEngine, {
// mode: 'hybrid', // vector search seems not to work
term: term,
properties: '*',
threshold: 0, // only show results where all keywords were found
limit: 99,
boost: { // doesn't seem to make a difference in score
tags: 1.8,
title: 1.5,
descriptoin: 1.3,
breadcrumb: 1.2,
},
// distinctOn: 'title', // just to filter out changelog/releasenotes if having the same title
// exact: true, // not for PT15
// tolerance: 1, // not for PT15
});
console.log( "new term", term )
searchResponse.hits.forEach( hit => console.log(hit.score, hit.document.uri) );
return searchResponse.hits.map( hit => ({ matches: [ term, ...term.split(' ') ], page: hit.document }) );
}

export { init, search };

0 comments on commit f986a7d

Please sign in to comment.