From 52fa86c80d1674150db24432bcf29d1870a3343e Mon Sep 17 00:00:00 2001 From: Maiko Tan Date: Thu, 23 Mar 2023 09:01:44 +0800 Subject: [PATCH 1/2] refa: add basic tokenizer for booru sites --- packages/core/src/index.ts | 14 ++++++++------ packages/core/src/source.ts | 9 +++++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a9856d76b..c1ea33dc1 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -33,7 +33,7 @@ class ImageService extends Service { .filter((source) => { if (query.labels.length && !query.labels.includes(source.config.label)) return false if (this.config.detectLanguage) { - const probabilities = this.languageDetect.detect(query.raw, 3).filter((x) => x[1] > this.config.confidence) + const probabilities = this.languageDetect.detect(query.query, 3).filter((x) => x[1] > this.config.confidence) if (!probabilities.length) { // if no language detected, just treat it as any language return true @@ -49,7 +49,8 @@ class ImageService extends Service { // return the first non-empty result for (const source of sources) { - const images = await source.get(query) + const tags = source.tokenize(query.query) + const images = await source.get({ ...query, tags, raw: query.query }) if (images?.length) return images } @@ -58,8 +59,10 @@ class ImageService extends Service { } namespace ImageService { - export interface Query extends ImageSource.Query { + export interface Query { + query: string labels: string[] + count: number } } @@ -117,15 +120,14 @@ export function apply(ctx: Context, config: Config) { } ctx - .command('booru ') + .command('booru ') .option('count', '-c ', { type: count, fallback: 1 }) .option('label', '-l ') .action(async ({ session, options }, query) => { query = query?.trim() ?? '' const images = await ctx.booru.get({ - tags: query.split(/\s+/), - raw: query, + query, count: options.count, labels: options.label?.split(',')?.map((x) => x.trim())?.filter(Boolean) ?? [], }) diff --git a/packages/core/src/source.ts b/packages/core/src/source.ts index 636997833..12592f9aa 100644 --- a/packages/core/src/source.ts +++ b/packages/core/src/source.ts @@ -9,6 +9,15 @@ export abstract class ImageSource `['tag1', 'wordy_tag2', 'upper_cased_tag3']` + */ + tokenize(query: string): string[] { + return query.split(',').map((x) => x.trim()).filter(Boolean).map((x) => x.toLowerCase().replace(/ +/g, '_')) + } + abstract get(query: ImageSource.Query): Promise } From 2ebff02b659e7ca01161f6c5834cf5b0ca9a8642 Mon Sep 17 00:00:00 2001 From: Maiko Tan Date: Thu, 23 Mar 2023 09:06:10 +0800 Subject: [PATCH 2/2] feat: add overrided tokenizer for pixiv / lolicon --- packages/lolicon/src/index.ts | 4 ++++ packages/pixiv/src/index.ts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/packages/lolicon/src/index.ts b/packages/lolicon/src/index.ts index 867292397..bf4963f5a 100644 --- a/packages/lolicon/src/index.ts +++ b/packages/lolicon/src/index.ts @@ -9,6 +9,10 @@ class LoliconImageSource extends ImageSource { super(ctx, config) } + override tokenize(query: string) { + return query.split(/\s+/) + } + async get(query: ImageSource.Query): Promise { const proxy = typeof this.config.proxy === 'string' ? this.config.proxy : this.config.proxy?.endpoint const param: Lolicon.Request = { diff --git a/packages/pixiv/src/index.ts b/packages/pixiv/src/index.ts index eafa4eb9c..954031a24 100644 --- a/packages/pixiv/src/index.ts +++ b/packages/pixiv/src/index.ts @@ -20,6 +20,10 @@ class PixivImageSource extends ImageSource { this.refreshToken = config.token } + override tokenize(query: string) { + return query.split(/\s+/) + } + async get(query: ImageSource.Query): Promise { const url = '/v1/search/illust' const params: PixivAppApi.SearchParams = {