diff --git a/addon/utils.ts b/addon/utils.ts index 94193b8c..437806f7 100644 --- a/addon/utils.ts +++ b/addon/utils.ts @@ -47,6 +47,7 @@ export async function initConfig() { const storedConfig = (await storageGet('config')) || {}; console.log('storedConfig', storedConfig); const updatedConfig = normalizeConfig(storedConfig); + updatedConfig.enableHeuristicDetection = true; console.log('updated config', updatedConfig); await storageSet({ config: updatedConfig, diff --git a/lib/heuristics.ts b/lib/heuristics.ts new file mode 100644 index 00000000..4aabc38b --- /dev/null +++ b/lib/heuristics.ts @@ -0,0 +1,39 @@ +// TODO: check for false positive detections per pattern +export const DETECT_PATTERNS = [ + /accept cookies/gi, + /accept all/gi, + /reject all/gi, + /only necessary cookies/gi, // "only necessary" is probably too broad + /by clicking.*(accept|agree|allow)/gi, + /by continuing/gi, + /we (use|serve)( optional)? cookies/gi, + /we are using cookies/gi, + /use of cookies/gi, + /(this|our) (web)?site.*cookies/gi, + /cookies (and|or) .* technologies/gi, + /such as cookies/gi, + /read more about.*cookies/gi, + /consent to.*cookies/gi, + /we and our partners.*cookies/gi, + /we.*store.*information.*such as.*cookies/gi, + /store and\/or access information.*on a device/gi, + /personalised ads and content, ad and content measurement/gi, + + // it might be tempting to add the patterns below, but they cause too many false positives. Don't do it :) + // /cookies? settings/i, + // /cookies? preferences/i, +]; + +export function checkHeuristicPatterns() { + const allText = document.documentElement.innerText; + const patterns = []; + const snippets = []; + for (const p of DETECT_PATTERNS) { + const matches = allText.match(p); + if (matches) { + patterns.push(p.toString()); + snippets.push(...matches.map((m) => m.substring(0, 200))); + } + } + return { patterns, snippets }; +} diff --git a/lib/types.ts b/lib/types.ts index af067782..567bd772 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -53,6 +53,7 @@ export type Config = { isMainWorld: boolean; prehideTimeout: number; enableFilterList: boolean; + enableHeuristicDetection: boolean; logs: { lifecycle: boolean; rulesteps: boolean; @@ -88,5 +89,7 @@ export type ConsentState = { findCmpAttempts: number; // Number of times we tried to find CMPs in this frame. detectedCmps: string[]; // Names of CMP rules where `detectCmp` returned true. detectedPopups: string[]; // Names of CMP rules where `detectPopup` returned true. + heuristicPatterns: string[]; // Matched heuristic patterns + heuristicSnippets: string[]; // Matched heuristic snippets selfTest: boolean; // null if no self test was run, otherwise it holds the result of the self test. }; diff --git a/lib/utils.ts b/lib/utils.ts index 45a96996..a7a04da8 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -74,6 +74,7 @@ export function normalizeConfig(providedConfig: any): Config { disabledCmps: [], enablePrehide: true, enableCosmeticRules: true, + enableHeuristicDetection: false, detectRetries: 20, isMainWorld: false, prehideTimeout: 2000, @@ -84,6 +85,7 @@ export function normalizeConfig(providedConfig: any): Config { evals: false, errors: true, messages: false, + waits: false, }, }; const updatedConfig: Config = copyObject(defaultConfig); diff --git a/lib/web.ts b/lib/web.ts index bc138637..56af0e30 100644 --- a/lib/web.ts +++ b/lib/web.ts @@ -11,6 +11,7 @@ import { normalizeConfig, scheduleWhenIdle } from './utils'; import { deserializeFilterList, getCosmeticStylesheet, getFilterlistSelectors } from './filterlist-utils'; import { FiltersEngine } from '@ghostery/adblocker'; import serializedEngine from './filterlist-engine'; +import { checkHeuristicPatterns } from './heuristics'; function filterCMPs(rules: AutoCMP[], config: Config) { return rules.filter((cmp) => { @@ -34,6 +35,8 @@ export default class AutoConsent { findCmpAttempts: 0, detectedCmps: [], detectedPopups: [], + heuristicPatterns: [], + heuristicSnippets: [], selfTest: null, }; domActions: DomActions; @@ -244,6 +247,8 @@ export default class AutoConsent { } } + this.detectHeuristics(); + if (foundCMPs.length === 0 && retries > 0) { await this.domActions.wait(500); return this.findCmp(retries - 1); @@ -252,6 +257,19 @@ export default class AutoConsent { return foundCMPs; } + detectHeuristics() { + if (this.config.enableHeuristicDetection) { + const { patterns, snippets } = checkHeuristicPatterns(); + if ( + patterns.length > 0 && + (patterns.length !== this.state.heuristicPatterns.length || this.state.heuristicPatterns.some((p, i) => p !== patterns[i])) + ) { + this.config.logs.lifecycle && console.log('Heuristic patterns found', patterns, snippets); + this.updateState({ heuristicPatterns: patterns, heuristicSnippets: snippets }); // we don't care about previously found patterns + } + } + } + /** * Detect if a CMP has a popup open. Fullfils with the CMP if a popup is open, otherwise rejects. */ @@ -281,6 +299,7 @@ export default class AutoConsent { await Promise.any(tasks) .then((cmp) => { + this.detectHeuristics(); onFirstPopupAppears(cmp); }) .catch(() => null); @@ -489,7 +508,7 @@ export default class AutoConsent { logsConfig?.lifecycle && console.log("Prehide cosmetic filters didn't match", location.href); } } - }, 1000); + }, 2000); this.updateState({ cosmeticFiltersOn: true }); try {