From 14afd7982792a5082990f000b6b8cc06f3d79176 Mon Sep 17 00:00:00 2001 From: Nitin Jadhav <3931042+nitinja@users.noreply.github.com> Date: Tue, 17 Dec 2024 08:08:05 -0600 Subject: [PATCH] Broken internal links priority (#528) * testing placeholder priority * fixed a test * Changed event to 404-internal-links from 404 and remove associated logic * Fixed priority percentage logic * updated logic to calculate priorities based on array positions rather than % * test updated * removed comments --- src/internal-links/handler.js | 78 +++++++++++--------------- test/audits/internal-links.test.js | 26 ++++++--- test/fixtures/internal-links-data.js | 84 ++++++++++------------------ 3 files changed, 79 insertions(+), 109 deletions(-) diff --git a/src/internal-links/handler.js b/src/internal-links/handler.js index 8aeed4ba..fdec9b0c 100644 --- a/src/internal-links/handler.js +++ b/src/internal-links/handler.js @@ -18,56 +18,41 @@ import { noopUrlResolver } from '../common/audit.js'; import { syncSuggestions } from '../utils/data-access.js'; const INTERVAL = 30; // days -const DAILY_PAGEVIEW_THRESHOLD = 100; const AUDIT_TYPE = 'broken-internal-links'; /** - * Determines if the URL has the same host as the current host. - * @param {*} url - * @param {*} currentHost - * @returns + * Classifies links into priority categories based on views + * High: top 25%, Medium: next 25%, Low: bottom 50% + * @param {Array} links - Array of objects with views property + * @returns {Array} - Links with priority classifications included */ -function hasSameHost(url, currentHost) { - const host = new URL(url).hostname; - return host === currentHost; -} - -/** - * Filter out the 404 links that: - * - have less than 100 views and do not have a URL. - * - do not have any sources from the same domain. - * @param {*} links - all 404 links Data - * @param {*} hostUrl - the host URL of the domain - * @param {*} auditUrl - the URL to run audit against - * @param {*} log - the logger object - * @returns {Array} - Returns an array of 404 links that meet the criteria. - */ - -function transform404LinksData(responseData, hostUrl, auditUrl, log) { - return responseData.reduce((result, { url, views, all_sources: allSources }) => { - try { - if (!url || views < DAILY_PAGEVIEW_THRESHOLD) { - return result; - } - const sameDomainSources = allSources.filter( - (source) => source && hasSameHost(source, hostUrl), - ); - - for (const source of sameDomainSources) { - result.push({ - url_to: url, - url_from: source, - traffic_domain: views, - }); - } - } catch { - log.error( - `Error occurred for audit type broken-internal-links for url ${auditUrl}, while processing sources for link ${url}`, - ); +function calculatePriority(links) { + // Sort links by views in descending order + const sortedLinks = [...links].sort((a, b) => b.views - a.views); + + // Calculate indices for the 25% and 50% marks + const quarterIndex = Math.ceil(sortedLinks.length * 0.25); + const halfIndex = Math.ceil(sortedLinks.length * 0.5); + + // Map through sorted links and assign priority + return sortedLinks.map((link, index) => { + let priority; + + if (index < quarterIndex) { + priority = 'high'; + } else if (index < halfIndex) { + priority = 'medium'; + } else { + priority = 'low'; } - return result; - }, []); + + return { + ...link, + priority, + }; + }); } + /** * Perform an audit to check which internal links for domain are broken. * @@ -93,9 +78,10 @@ export async function internalLinksAuditRunner(auditUrl, context, site) { log.info('broken-internal-links: Options for RUM call: ', JSON.stringify(options)); - const all404Links = await rumAPIClient.query('404', options); + const internal404Links = await rumAPIClient.query('404-internal-links', options); + const priorityLinks = calculatePriority(internal404Links); const auditResult = { - brokenInternalLinks: transform404LinksData(all404Links, finalUrl, auditUrl, log), + brokenInternalLinks: priorityLinks, fullAuditRef: auditUrl, finalUrl, auditContext: { diff --git a/test/audits/internal-links.test.js b/test/audits/internal-links.test.js index 641c36d7..72871a62 100644 --- a/test/audits/internal-links.test.js +++ b/test/audits/internal-links.test.js @@ -23,14 +23,22 @@ import { MockContextBuilder } from '../shared.js'; const AUDIT_RESULT_DATA = [ { - url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter', - url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds', - traffic_domain: 100, + traffic_domain: 1800, + url_to: 'https://www.petplace.com/a01', + url_from: 'https://www.petplace.com/a02nf', + priority: 'high', }, { - url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle', - url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior', - traffic_domain: 100, + traffic_domain: 1200, + url_to: 'https://www.petplace.com/ax02', + url_from: 'https://www.petplace.com/ax02nf', + priority: 'medium', + }, + { + traffic_domain: 200, + url_to: 'https://www.petplace.com/a01', + url_from: 'https://www.petplace.com/a01nf', + priority: 'low', }, ]; @@ -76,7 +84,7 @@ describe('Broken internal links audit', () => { context, site, ); - expect(context.rumApiClient.query).calledWith('404', { + expect(context.rumApiClient.query).calledWith('404-internal-links', { domain: 'www.example.com', domainkey: 'test-key', interval: 30, @@ -166,10 +174,10 @@ describe('broken-internal-links audit to opportunity conversion', () => { expect(context.dataAccess.Opportunity.create).to.have.been.calledOnceWith(expectedOpportunity); - // make sure that newly oppty has 2 new suggestions + // make sure that newly oppty has 3 new suggestions expect(opportunity.addSuggestions).to.have.been.calledOnce; const suggestionsArg = opportunity.addSuggestions.getCall(0).args[0]; - expect(suggestionsArg).to.be.an('array').with.lengthOf(2); + expect(suggestionsArg).to.be.an('array').with.lengthOf(3); }).timeout(5000); it('creating a new opportunity object fails', async () => { diff --git a/test/fixtures/internal-links-data.js b/test/fixtures/internal-links-data.js index b3433406..265ce562 100644 --- a/test/fixtures/internal-links-data.js +++ b/test/fixtures/internal-links-data.js @@ -12,58 +12,22 @@ export const internalLinksData = [ { - url: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter', - views: 100, - all_sources: [ - 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds', - ], - source_count: 1, - top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds', - }, - { - url: 'https://www.example.com/article/dogs/breeds/choosing-an-german-dog', - views: 5, - all_sources: [ - 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8', - ], - source_count: 1, - top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8', - }, - { - url: 'x', - views: 100, - all_sources: [ - 'invalid-url', - ], - source_count: 1, - top_source: '', - }, - { - url: 'https://www.example.com/dogs/the-stages-of-canine-reproduction', - views: 100, - all_sources: [ - 'android-app://com.google.android.googlequicksearchbox/', - ], - source_count: 1, - top_source: 'android-app://com.google.android.googlequicksearchbox/', + traffic_domain: 1800, + url_to: 'https://www.petplace.com/a01', + url_from: 'https://www.petplace.com/a02nf', + priority: 'high', }, { - url: 'https://www.example.com/article/reptiles/general/unusual-pets-praying-mantis', - views: 100, - all_sources: [ - 'https://www.google.com/', - ], - source_count: 1, - top_source: 'https://www.google.com/', + traffic_domain: 1200, + url_to: 'https://www.petplace.com/ax02', + url_from: 'https://www.petplace.com/ax02nf', + priority: 'medium', }, { - url: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle', - views: 100, - all_sources: [ - 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior', - ], - source_count: 1, - top_source: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior', + traffic_domain: 200, + url_to: 'https://www.petplace.com/a01', + url_from: 'https://www.petplace.com/a01nf', + priority: 'low', }, ]; @@ -93,18 +57,30 @@ export const expectedSuggestions = [ type: 'CONTENT_UPDATE', rank: 100, data: { - url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter', - url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds', - traffic_domain: 100, + traffic_domain: 1800, + url_to: 'https://www.petplace.com/a01', + url_from: 'https://www.petplace.com/a02nf', + priority: 'high', + }, + }, + { + type: 'CONTENT_UPDATE', + rank: 100, + data: { + traffic_domain: 1200, + url_to: 'https://www.petplace.com/ax02-changed', + url_from: 'https://www.petplace.com/ax02nf', + priority: 'medium', }, }, { type: 'CONTENT_UPDATE', rank: 100, data: { - url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle-1', - url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior', - traffic_domain: 100, + traffic_domain: 200, + url_to: 'https://www.petplace.com/a01', + url_from: 'https://www.petplace.com/a01nf', + priority: 'low', }, }, ];