From e173259bee707d2ba4a0fb9b53d561bb9c34a8b7 Mon Sep 17 00:00:00 2001 From: Maxim Tsoy Date: Thu, 28 Nov 2024 21:30:20 +0100 Subject: [PATCH 1/4] Set waits log default value --- lib/utils.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/utils.ts b/lib/utils.ts index 45a96996..6e43e6ca 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -84,6 +84,7 @@ export function normalizeConfig(providedConfig: any): Config { evals: false, errors: true, messages: false, + waits: false, }, }; const updatedConfig: Config = copyObject(defaultConfig); From c6c2708c0011e0bd7967f851bfabf8bd6a03019f Mon Sep 17 00:00:00 2001 From: Maxim Tsoy Date: Thu, 28 Nov 2024 21:30:49 +0100 Subject: [PATCH 2/4] Increase the timeout for the prehide filterlist check --- lib/web.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/web.ts b/lib/web.ts index bc138637..9fdf986b 100644 --- a/lib/web.ts +++ b/lib/web.ts @@ -489,7 +489,7 @@ export default class AutoConsent { logsConfig?.lifecycle && console.log("Prehide cosmetic filters didn't match", location.href); } } - }, 1000); + }, 2000); this.updateState({ cosmeticFiltersOn: true }); try { From 818b8516152419196c44d7873b04a57b1c44669c Mon Sep 17 00:00:00 2001 From: Maxim Tsoy Date: Thu, 28 Nov 2024 21:31:15 +0100 Subject: [PATCH 3/4] Add optional check for heuristic patterns --- addon/utils.ts | 1 + lib/heuristics.ts | 39 +++++++++++++++++++++++++++++++++++++++ lib/types.ts | 3 +++ lib/utils.ts | 1 + lib/web.ts | 14 ++++++++++++++ 5 files changed, 58 insertions(+) create mode 100644 lib/heuristics.ts diff --git a/addon/utils.ts b/addon/utils.ts index 94193b8c..437806f7 100644 --- a/addon/utils.ts +++ b/addon/utils.ts @@ -47,6 +47,7 @@ export async function initConfig() { const storedConfig = (await storageGet('config')) || {}; console.log('storedConfig', storedConfig); const updatedConfig = normalizeConfig(storedConfig); + updatedConfig.enableHeuristicDetection = true; console.log('updated config', updatedConfig); await storageSet({ config: updatedConfig, diff --git a/lib/heuristics.ts b/lib/heuristics.ts new file mode 100644 index 00000000..4aabc38b --- /dev/null +++ b/lib/heuristics.ts @@ -0,0 +1,39 @@ +// TODO: check for false positive detections per pattern +export const DETECT_PATTERNS = [ + /accept cookies/gi, + /accept all/gi, + /reject all/gi, + /only necessary cookies/gi, // "only necessary" is probably too broad + /by clicking.*(accept|agree|allow)/gi, + /by continuing/gi, + /we (use|serve)( optional)? cookies/gi, + /we are using cookies/gi, + /use of cookies/gi, + /(this|our) (web)?site.*cookies/gi, + /cookies (and|or) .* technologies/gi, + /such as cookies/gi, + /read more about.*cookies/gi, + /consent to.*cookies/gi, + /we and our partners.*cookies/gi, + /we.*store.*information.*such as.*cookies/gi, + /store and\/or access information.*on a device/gi, + /personalised ads and content, ad and content measurement/gi, + + // it might be tempting to add the patterns below, but they cause too many false positives. Don't do it :) + // /cookies? settings/i, + // /cookies? preferences/i, +]; + +export function checkHeuristicPatterns() { + const allText = document.documentElement.innerText; + const patterns = []; + const snippets = []; + for (const p of DETECT_PATTERNS) { + const matches = allText.match(p); + if (matches) { + patterns.push(p.toString()); + snippets.push(...matches.map((m) => m.substring(0, 200))); + } + } + return { patterns, snippets }; +} diff --git a/lib/types.ts b/lib/types.ts index af067782..567bd772 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -53,6 +53,7 @@ export type Config = { isMainWorld: boolean; prehideTimeout: number; enableFilterList: boolean; + enableHeuristicDetection: boolean; logs: { lifecycle: boolean; rulesteps: boolean; @@ -88,5 +89,7 @@ export type ConsentState = { findCmpAttempts: number; // Number of times we tried to find CMPs in this frame. detectedCmps: string[]; // Names of CMP rules where `detectCmp` returned true. detectedPopups: string[]; // Names of CMP rules where `detectPopup` returned true. + heuristicPatterns: string[]; // Matched heuristic patterns + heuristicSnippets: string[]; // Matched heuristic snippets selfTest: boolean; // null if no self test was run, otherwise it holds the result of the self test. }; diff --git a/lib/utils.ts b/lib/utils.ts index 6e43e6ca..a7a04da8 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -74,6 +74,7 @@ export function normalizeConfig(providedConfig: any): Config { disabledCmps: [], enablePrehide: true, enableCosmeticRules: true, + enableHeuristicDetection: false, detectRetries: 20, isMainWorld: false, prehideTimeout: 2000, diff --git a/lib/web.ts b/lib/web.ts index 9fdf986b..1ab67d88 100644 --- a/lib/web.ts +++ b/lib/web.ts @@ -11,6 +11,7 @@ import { normalizeConfig, scheduleWhenIdle } from './utils'; import { deserializeFilterList, getCosmeticStylesheet, getFilterlistSelectors } from './filterlist-utils'; import { FiltersEngine } from '@ghostery/adblocker'; import serializedEngine from './filterlist-engine'; +import { checkHeuristicPatterns } from './heuristics'; function filterCMPs(rules: AutoCMP[], config: Config) { return rules.filter((cmp) => { @@ -34,6 +35,8 @@ export default class AutoConsent { findCmpAttempts: 0, detectedCmps: [], detectedPopups: [], + heuristicPatterns: [], + heuristicSnippets: [], selfTest: null, }; domActions: DomActions; @@ -224,6 +227,17 @@ export default class AutoConsent { this.updateState({ findCmpAttempts: this.state.findCmpAttempts + 1 }); const foundCMPs: AutoCMP[] = []; + if (this.config.enableHeuristicDetection) { + const { patterns, snippets } = checkHeuristicPatterns(); + if ( + patterns.length > 0 && + (patterns.length !== this.state.heuristicPatterns.length || this.state.heuristicPatterns.some((p, i) => p !== patterns[i])) + ) { + logsConfig.lifecycle && console.log('Heuristic patterns found', patterns, snippets); + this.updateState({ heuristicPatterns: patterns, heuristicSnippets: snippets }); // we don't care about previously found patterns + } + } + for (const cmp of this.rules) { try { if (!cmp.checkRunContext()) { From 3e06dd594a8557ba578f308892a31ed9630190be Mon Sep 17 00:00:00 2001 From: Maxim Tsoy Date: Fri, 29 Nov 2024 23:43:15 +0100 Subject: [PATCH 4/4] Make sure the heuristics are tested after a CMP is detected --- lib/web.ts | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/web.ts b/lib/web.ts index 1ab67d88..56af0e30 100644 --- a/lib/web.ts +++ b/lib/web.ts @@ -227,17 +227,6 @@ export default class AutoConsent { this.updateState({ findCmpAttempts: this.state.findCmpAttempts + 1 }); const foundCMPs: AutoCMP[] = []; - if (this.config.enableHeuristicDetection) { - const { patterns, snippets } = checkHeuristicPatterns(); - if ( - patterns.length > 0 && - (patterns.length !== this.state.heuristicPatterns.length || this.state.heuristicPatterns.some((p, i) => p !== patterns[i])) - ) { - logsConfig.lifecycle && console.log('Heuristic patterns found', patterns, snippets); - this.updateState({ heuristicPatterns: patterns, heuristicSnippets: snippets }); // we don't care about previously found patterns - } - } - for (const cmp of this.rules) { try { if (!cmp.checkRunContext()) { @@ -258,6 +247,8 @@ export default class AutoConsent { } } + this.detectHeuristics(); + if (foundCMPs.length === 0 && retries > 0) { await this.domActions.wait(500); return this.findCmp(retries - 1); @@ -266,6 +257,19 @@ export default class AutoConsent { return foundCMPs; } + detectHeuristics() { + if (this.config.enableHeuristicDetection) { + const { patterns, snippets } = checkHeuristicPatterns(); + if ( + patterns.length > 0 && + (patterns.length !== this.state.heuristicPatterns.length || this.state.heuristicPatterns.some((p, i) => p !== patterns[i])) + ) { + this.config.logs.lifecycle && console.log('Heuristic patterns found', patterns, snippets); + this.updateState({ heuristicPatterns: patterns, heuristicSnippets: snippets }); // we don't care about previously found patterns + } + } + } + /** * Detect if a CMP has a popup open. Fullfils with the CMP if a popup is open, otherwise rejects. */ @@ -295,6 +299,7 @@ export default class AutoConsent { await Promise.any(tasks) .then((cmp) => { + this.detectHeuristics(); onFirstPopupAppears(cmp); }) .catch(() => null);