Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Broken internal links priority #528

Merged
merged 11 commits into from
Dec 17, 2024
78 changes: 32 additions & 46 deletions src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,56 +18,41 @@ import { noopUrlResolver } from '../common/audit.js';
import { syncSuggestions } from '../utils/data-access.js';

const INTERVAL = 30; // days
const DAILY_PAGEVIEW_THRESHOLD = 100;
const AUDIT_TYPE = 'broken-internal-links';

/**
* Determines if the URL has the same host as the current host.
* @param {*} url
* @param {*} currentHost
* @returns
* Classifies links into priority categories based on views
* High: top 25%, Medium: next 25%, Low: bottom 50%
* @param {Array} links - Array of objects with views property
* @returns {Array} - Links with priority classifications included
*/
function hasSameHost(url, currentHost) {
const host = new URL(url).hostname;
return host === currentHost;
}

/**
* Filter out the 404 links that:
* - have less than 100 views and do not have a URL.
* - do not have any sources from the same domain.
* @param {*} links - all 404 links Data
* @param {*} hostUrl - the host URL of the domain
* @param {*} auditUrl - the URL to run audit against
* @param {*} log - the logger object
* @returns {Array} - Returns an array of 404 links that meet the criteria.
*/

function transform404LinksData(responseData, hostUrl, auditUrl, log) {
return responseData.reduce((result, { url, views, all_sources: allSources }) => {
try {
if (!url || views < DAILY_PAGEVIEW_THRESHOLD) {
return result;
}
const sameDomainSources = allSources.filter(
(source) => source && hasSameHost(source, hostUrl),
);

for (const source of sameDomainSources) {
result.push({
url_to: url,
url_from: source,
traffic_domain: views,
});
}
} catch {
log.error(
`Error occurred for audit type broken-internal-links for url ${auditUrl}, while processing sources for link ${url}`,
);
function calculatePriority(links) {
// Sort links by views in descending order
const sortedLinks = [...links].sort((a, b) => b.views - a.views);

// Calculate indices for the 25% and 50% marks
const quarterIndex = Math.ceil(sortedLinks.length * 0.25);
const halfIndex = Math.ceil(sortedLinks.length * 0.5);

// Map through sorted links and assign priority
return sortedLinks.map((link, index) => {
let priority;

if (index < quarterIndex) {
priority = 'high';
} else if (index < halfIndex) {
priority = 'medium';
} else {
priority = 'low';
}
return result;
}, []);

return {
...link,
priority,
};
});
}

/**
* Perform an audit to check which internal links for domain are broken.
*
Expand All @@ -93,9 +78,10 @@ export async function internalLinksAuditRunner(auditUrl, context, site) {

log.info('broken-internal-links: Options for RUM call: ', JSON.stringify(options));

const all404Links = await rumAPIClient.query('404', options);
const internal404Links = await rumAPIClient.query('404-internal-links', options);
const priorityLinks = calculatePriority(internal404Links);
const auditResult = {
brokenInternalLinks: transform404LinksData(all404Links, finalUrl, auditUrl, log),
brokenInternalLinks: priorityLinks,
fullAuditRef: auditUrl,
finalUrl,
auditContext: {
Expand Down
26 changes: 17 additions & 9 deletions test/audits/internal-links.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,22 @@ import { MockContextBuilder } from '../shared.js';

const AUDIT_RESULT_DATA = [
{
url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
traffic_domain: 100,
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
{
url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle',
url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 100,
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
{
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
];

Expand Down Expand Up @@ -76,7 +84,7 @@ describe('Broken internal links audit', () => {
context,
site,
);
expect(context.rumApiClient.query).calledWith('404', {
expect(context.rumApiClient.query).calledWith('404-internal-links', {
domain: 'www.example.com',
domainkey: 'test-key',
interval: 30,
Expand Down Expand Up @@ -166,10 +174,10 @@ describe('broken-internal-links audit to opportunity conversion', () => {

expect(context.dataAccess.Opportunity.create).to.have.been.calledOnceWith(expectedOpportunity);

// make sure that newly oppty has 2 new suggestions
// make sure that newly oppty has 3 new suggestions
expect(opportunity.addSuggestions).to.have.been.calledOnce;
const suggestionsArg = opportunity.addSuggestions.getCall(0).args[0];
expect(suggestionsArg).to.be.an('array').with.lengthOf(2);
expect(suggestionsArg).to.be.an('array').with.lengthOf(3);
}).timeout(5000);

it('creating a new opportunity object fails', async () => {
Expand Down
84 changes: 30 additions & 54 deletions test/fixtures/internal-links-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,58 +12,22 @@

export const internalLinksData = [
{
url: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
views: 100,
all_sources: [
'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
},
{
url: 'https://www.example.com/article/dogs/breeds/choosing-an-german-dog',
views: 5,
all_sources: [
'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8',
},
{
url: 'x',
views: 100,
all_sources: [
'invalid-url',
],
source_count: 1,
top_source: '',
},
{
url: 'https://www.example.com/dogs/the-stages-of-canine-reproduction',
views: 100,
all_sources: [
'android-app://com.google.android.googlequicksearchbox/',
],
source_count: 1,
top_source: 'android-app://com.google.android.googlequicksearchbox/',
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
{
url: 'https://www.example.com/article/reptiles/general/unusual-pets-praying-mantis',
views: 100,
all_sources: [
'https://www.google.com/',
],
source_count: 1,
top_source: 'https://www.google.com/',
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
{
url: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle',
views: 100,
all_sources: [
'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
];

Expand Down Expand Up @@ -93,18 +57,30 @@ export const expectedSuggestions = [
type: 'CONTENT_UPDATE',
rank: 100,
data: {
url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
traffic_domain: 100,
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
},
{
type: 'CONTENT_UPDATE',
rank: 100,
data: {
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02-changed',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
},
{
type: 'CONTENT_UPDATE',
rank: 100,
data: {
url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle-1',
url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 100,
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
},
];
Loading