Skip to content

Commit

Permalink
refa: add basic tokenizer for booru sites (#36)
Browse files Browse the repository at this point in the history
* refa: add basic tokenizer for booru sites

* feat: add overrided tokenizer for pixiv / lolicon
  • Loading branch information
MaikoTan authored Mar 23, 2023
1 parent c63789a commit ce3c9a9
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 6 deletions.
14 changes: 8 additions & 6 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ImageService extends Service {
.filter((source) => {
if (query.labels.length && !query.labels.includes(source.config.label)) return false
if (this.config.detectLanguage) {
const probabilities = this.languageDetect.detect(query.raw, 3).filter((x) => x[1] > this.config.confidence)
const probabilities = this.languageDetect.detect(query.query, 3).filter((x) => x[1] > this.config.confidence)
if (!probabilities.length) {
// if no language detected, just treat it as any language
return true
Expand All @@ -49,7 +49,8 @@ class ImageService extends Service {

// return the first non-empty result
for (const source of sources) {
const images = await source.get(query)
const tags = source.tokenize(query.query)
const images = await source.get({ ...query, tags, raw: query.query })
if (images?.length) return images
}

Expand All @@ -58,8 +59,10 @@ class ImageService extends Service {
}

namespace ImageService {
export interface Query extends ImageSource.Query {
export interface Query {
query: string
labels: string[]
count: number
}
}

Expand Down Expand Up @@ -117,15 +120,14 @@ export function apply(ctx: Context, config: Config) {
}

ctx
.command('booru <query...>')
.command('booru <query:text>')
.option('count', '-c <count:number>', { type: count, fallback: 1 })
.option('label', '-l <label:string>')
.action(async ({ session, options }, query) => {
query = query?.trim() ?? ''

const images = await ctx.booru.get({
tags: query.split(/\s+/),
raw: query,
query,
count: options.count,
labels: options.label?.split(',')?.map((x) => x.trim())?.filter(Boolean) ?? [],
})
Expand Down
9 changes: 9 additions & 0 deletions packages/core/src/source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ export abstract class ImageSource<Config extends ImageSource.Config = ImageSourc
this.ctx.booru.register(this)
}

/**
* split query into tags, default implementation is comma-separated.
*
* e.g. `tag1, wordy tag2, UPPER CASED tag3` => `['tag1', 'wordy_tag2', 'upper_cased_tag3']`
*/
tokenize(query: string): string[] {
return query.split(',').map((x) => x.trim()).filter(Boolean).map((x) => x.toLowerCase().replace(/ +/g, '_'))
}

abstract get(query: ImageSource.Query): Promise<ImageSource.Result[]>
}

Expand Down
4 changes: 4 additions & 0 deletions packages/lolicon/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ class LoliconImageSource extends ImageSource<LoliconImageSource.Config> {
super(ctx, config)
}

override tokenize(query: string) {
return query.split(/\s+/)
}

async get(query: ImageSource.Query): Promise<ImageSource.Result[]> {
const proxy = typeof this.config.proxy === 'string' ? this.config.proxy : this.config.proxy?.endpoint
const param: Lolicon.Request = {
Expand Down
4 changes: 4 additions & 0 deletions packages/pixiv/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class PixivImageSource extends ImageSource<PixivImageSource.Config> {
this.refreshToken = config.token
}

override tokenize(query: string) {
return query.split(/\s+/)
}

async get(query: ImageSource.Query): Promise<ImageSource.Result[]> {
const url = '/v1/search/illust'
const params: PixivAppApi.SearchParams = {
Expand Down

0 comments on commit ce3c9a9

Please sign in to comment.