Skip to content

Commit

Permalink
Broken internal links priority (#528)
Browse files Browse the repository at this point in the history
* testing placeholder priority

* fixed a test

* Changed event to 404-internal-links from 404 and remove associated logic

* Fixed priority percentage logic

* updated logic to calculate priorities based on array positions rather than %

* test updated

* removed comments
  • Loading branch information
nitinja authored Dec 17, 2024
1 parent c74d5e2 commit 14afd79
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 109 deletions.
78 changes: 32 additions & 46 deletions src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,56 +18,41 @@ import { noopUrlResolver } from '../common/audit.js';
import { syncSuggestions } from '../utils/data-access.js';

const INTERVAL = 30; // days
const DAILY_PAGEVIEW_THRESHOLD = 100;
const AUDIT_TYPE = 'broken-internal-links';

/**
* Determines if the URL has the same host as the current host.
* @param {*} url
* @param {*} currentHost
* @returns
* Classifies links into priority categories based on views
* High: top 25%, Medium: next 25%, Low: bottom 50%
* @param {Array} links - Array of objects with views property
* @returns {Array} - Links with priority classifications included
*/
function hasSameHost(url, currentHost) {
const host = new URL(url).hostname;
return host === currentHost;
}

/**
* Filter out the 404 links that:
* - have less than 100 views and do not have a URL.
* - do not have any sources from the same domain.
* @param {*} links - all 404 links Data
* @param {*} hostUrl - the host URL of the domain
* @param {*} auditUrl - the URL to run audit against
* @param {*} log - the logger object
* @returns {Array} - Returns an array of 404 links that meet the criteria.
*/

function transform404LinksData(responseData, hostUrl, auditUrl, log) {
return responseData.reduce((result, { url, views, all_sources: allSources }) => {
try {
if (!url || views < DAILY_PAGEVIEW_THRESHOLD) {
return result;
}
const sameDomainSources = allSources.filter(
(source) => source && hasSameHost(source, hostUrl),
);

for (const source of sameDomainSources) {
result.push({
url_to: url,
url_from: source,
traffic_domain: views,
});
}
} catch {
log.error(
`Error occurred for audit type broken-internal-links for url ${auditUrl}, while processing sources for link ${url}`,
);
function calculatePriority(links) {
// Sort links by views in descending order
const sortedLinks = [...links].sort((a, b) => b.views - a.views);

// Calculate indices for the 25% and 50% marks
const quarterIndex = Math.ceil(sortedLinks.length * 0.25);
const halfIndex = Math.ceil(sortedLinks.length * 0.5);

// Map through sorted links and assign priority
return sortedLinks.map((link, index) => {
let priority;

if (index < quarterIndex) {
priority = 'high';
} else if (index < halfIndex) {
priority = 'medium';
} else {
priority = 'low';
}
return result;
}, []);

return {
...link,
priority,
};
});
}

/**
* Perform an audit to check which internal links for domain are broken.
*
Expand All @@ -93,9 +78,10 @@ export async function internalLinksAuditRunner(auditUrl, context, site) {

log.info('broken-internal-links: Options for RUM call: ', JSON.stringify(options));

const all404Links = await rumAPIClient.query('404', options);
const internal404Links = await rumAPIClient.query('404-internal-links', options);
const priorityLinks = calculatePriority(internal404Links);
const auditResult = {
brokenInternalLinks: transform404LinksData(all404Links, finalUrl, auditUrl, log),
brokenInternalLinks: priorityLinks,
fullAuditRef: auditUrl,
finalUrl,
auditContext: {
Expand Down
26 changes: 17 additions & 9 deletions test/audits/internal-links.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,22 @@ import { MockContextBuilder } from '../shared.js';

const AUDIT_RESULT_DATA = [
{
url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
traffic_domain: 100,
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
{
url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle',
url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 100,
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
{
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
];

Expand Down Expand Up @@ -76,7 +84,7 @@ describe('Broken internal links audit', () => {
context,
site,
);
expect(context.rumApiClient.query).calledWith('404', {
expect(context.rumApiClient.query).calledWith('404-internal-links', {
domain: 'www.example.com',
domainkey: 'test-key',
interval: 30,
Expand Down Expand Up @@ -166,10 +174,10 @@ describe('broken-internal-links audit to opportunity conversion', () => {

expect(context.dataAccess.Opportunity.create).to.have.been.calledOnceWith(expectedOpportunity);

// make sure that newly oppty has 2 new suggestions
// make sure that newly oppty has 3 new suggestions
expect(opportunity.addSuggestions).to.have.been.calledOnce;
const suggestionsArg = opportunity.addSuggestions.getCall(0).args[0];
expect(suggestionsArg).to.be.an('array').with.lengthOf(2);
expect(suggestionsArg).to.be.an('array').with.lengthOf(3);
}).timeout(5000);

it('creating a new opportunity object fails', async () => {
Expand Down
84 changes: 30 additions & 54 deletions test/fixtures/internal-links-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,58 +12,22 @@

export const internalLinksData = [
{
url: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
views: 100,
all_sources: [
'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
},
{
url: 'https://www.example.com/article/dogs/breeds/choosing-an-german-dog',
views: 5,
all_sources: [
'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8',
},
{
url: 'x',
views: 100,
all_sources: [
'invalid-url',
],
source_count: 1,
top_source: '',
},
{
url: 'https://www.example.com/dogs/the-stages-of-canine-reproduction',
views: 100,
all_sources: [
'android-app://com.google.android.googlequicksearchbox/',
],
source_count: 1,
top_source: 'android-app://com.google.android.googlequicksearchbox/',
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
{
url: 'https://www.example.com/article/reptiles/general/unusual-pets-praying-mantis',
views: 100,
all_sources: [
'https://www.google.com/',
],
source_count: 1,
top_source: 'https://www.google.com/',
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
{
url: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle',
views: 100,
all_sources: [
'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
];

Expand Down Expand Up @@ -93,18 +57,30 @@ export const expectedSuggestions = [
type: 'CONTENT_UPDATE',
rank: 100,
data: {
url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
traffic_domain: 100,
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
},
{
type: 'CONTENT_UPDATE',
rank: 100,
data: {
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02-changed',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
},
{
type: 'CONTENT_UPDATE',
rank: 100,
data: {
url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle-1',
url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 100,
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
},
];

0 comments on commit 14afd79

Please sign in to comment.