Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional heuristic pattern detection #555

Merged
merged 4 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions addon/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export async function initConfig() {
const storedConfig = (await storageGet('config')) || {};
console.log('storedConfig', storedConfig);
const updatedConfig = normalizeConfig(storedConfig);
updatedConfig.enableHeuristicDetection = true;
console.log('updated config', updatedConfig);
await storageSet({
config: updatedConfig,
Expand Down
39 changes: 39 additions & 0 deletions lib/heuristics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// TODO: check for false positive detections per pattern
export const DETECT_PATTERNS = [
/accept cookies/gi,
/accept all/gi,
/reject all/gi,
/only necessary cookies/gi, // "only necessary" is probably too broad
/by clicking.*(accept|agree|allow)/gi,
/by continuing/gi,
/we (use|serve)( optional)? cookies/gi,
/we are using cookies/gi,
/use of cookies/gi,
/(this|our) (web)?site.*cookies/gi,
/cookies (and|or) .* technologies/gi,
/such as cookies/gi,
/read more about.*cookies/gi,
/consent to.*cookies/gi,
/we and our partners.*cookies/gi,
/we.*store.*information.*such as.*cookies/gi,
/store and\/or access information.*on a device/gi,
/personalised ads and content, ad and content measurement/gi,

// it might be tempting to add the patterns below, but they cause too many false positives. Don't do it :)
// /cookies? settings/i,
// /cookies? preferences/i,
];

export function checkHeuristicPatterns() {
const allText = document.documentElement.innerText;
const patterns = [];
const snippets = [];
for (const p of DETECT_PATTERNS) {
const matches = allText.match(p);
if (matches) {
patterns.push(p.toString());
snippets.push(...matches.map((m) => m.substring(0, 200)));
}
}
return { patterns, snippets };
}
3 changes: 3 additions & 0 deletions lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export type Config = {
isMainWorld: boolean;
prehideTimeout: number;
enableFilterList: boolean;
enableHeuristicDetection: boolean;
logs: {
lifecycle: boolean;
rulesteps: boolean;
Expand Down Expand Up @@ -88,5 +89,7 @@ export type ConsentState = {
findCmpAttempts: number; // Number of times we tried to find CMPs in this frame.
detectedCmps: string[]; // Names of CMP rules where `detectCmp` returned true.
detectedPopups: string[]; // Names of CMP rules where `detectPopup` returned true.
heuristicPatterns: string[]; // Matched heuristic patterns
heuristicSnippets: string[]; // Matched heuristic snippets
selfTest: boolean; // null if no self test was run, otherwise it holds the result of the self test.
};
2 changes: 2 additions & 0 deletions lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ export function normalizeConfig(providedConfig: any): Config {
disabledCmps: [],
enablePrehide: true,
enableCosmeticRules: true,
enableHeuristicDetection: false,
detectRetries: 20,
isMainWorld: false,
prehideTimeout: 2000,
Expand All @@ -84,6 +85,7 @@ export function normalizeConfig(providedConfig: any): Config {
evals: false,
errors: true,
messages: false,
waits: false,
},
};
const updatedConfig: Config = copyObject(defaultConfig);
Expand Down
21 changes: 20 additions & 1 deletion lib/web.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { normalizeConfig, scheduleWhenIdle } from './utils';
import { deserializeFilterList, getCosmeticStylesheet, getFilterlistSelectors } from './filterlist-utils';
import { FiltersEngine } from '@ghostery/adblocker';
import serializedEngine from './filterlist-engine';
import { checkHeuristicPatterns } from './heuristics';

function filterCMPs(rules: AutoCMP[], config: Config) {
return rules.filter((cmp) => {
Expand All @@ -34,6 +35,8 @@ export default class AutoConsent {
findCmpAttempts: 0,
detectedCmps: [],
detectedPopups: [],
heuristicPatterns: [],
heuristicSnippets: [],
selfTest: null,
};
domActions: DomActions;
Expand Down Expand Up @@ -244,6 +247,8 @@ export default class AutoConsent {
}
}

this.detectHeuristics();

if (foundCMPs.length === 0 && retries > 0) {
await this.domActions.wait(500);
return this.findCmp(retries - 1);
Expand All @@ -252,6 +257,19 @@ export default class AutoConsent {
return foundCMPs;
}

detectHeuristics() {
if (this.config.enableHeuristicDetection) {
const { patterns, snippets } = checkHeuristicPatterns();
if (
patterns.length > 0 &&
(patterns.length !== this.state.heuristicPatterns.length || this.state.heuristicPatterns.some((p, i) => p !== patterns[i]))
) {
this.config.logs.lifecycle && console.log('Heuristic patterns found', patterns, snippets);
this.updateState({ heuristicPatterns: patterns, heuristicSnippets: snippets }); // we don't care about previously found patterns
}
}
}

/**
* Detect if a CMP has a popup open. Fullfils with the CMP if a popup is open, otherwise rejects.
*/
Expand Down Expand Up @@ -281,6 +299,7 @@ export default class AutoConsent {

await Promise.any(tasks)
.then((cmp) => {
this.detectHeuristics();
onFirstPopupAppears(cmp);
})
.catch(() => null);
Expand Down Expand Up @@ -489,7 +508,7 @@ export default class AutoConsent {
logsConfig?.lifecycle && console.log("Prehide cosmetic filters didn't match", location.href);
}
}
}, 1000);
}, 2000);

this.updateState({ cosmeticFiltersOn: true });
try {
Expand Down
Loading