Skip to content

Commit

Permalink
Changed event to 404-internal-links from 404 and remove associated logic
Browse files Browse the repository at this point in the history
  • Loading branch information
nitinja committed Dec 13, 2024
1 parent f050c0a commit 5c15c3c
Showing 1 changed file with 32 additions and 47 deletions.
79 changes: 32 additions & 47 deletions src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,57 +18,41 @@ import { noopUrlResolver } from '../common/audit.js';
import { syncSuggestions } from '../utils/data-access.js';

const INTERVAL = 30; // days
const DAILY_PAGEVIEW_THRESHOLD = 100;
const AUDIT_TYPE = 'broken-internal-links';

/**
* Determines if the URL has the same host as the current host.
* @param {*} url
* @param {*} currentHost
* @returns
* Classifies links into priority categories based on views
* High: top 50%, Medium: next 25%, Low: bottom 25%
* @param {Array} links - Array of objects with views property
* @returns {Array} - Links with priority classifications included
*/
function hasSameHost(url, currentHost) {
const host = new URL(url).hostname;
return host === currentHost;
}

/**
* Filter out the 404 links that:
* - have less than 100 views and do not have a URL.
* - do not have any sources from the same domain.
* @param {*} links - all 404 links Data
* @param {*} hostUrl - the host URL of the domain
* @param {*} auditUrl - the URL to run audit against
* @param {*} log - the logger object
* @returns {Array} - Returns an array of 404 links that meet the criteria.
*/

function transform404LinksData(responseData, hostUrl, auditUrl, log) {
return responseData.reduce((result, { url, views, all_sources: allSources }) => {
try {
if (!url || views < DAILY_PAGEVIEW_THRESHOLD) {
return result;
}
const sameDomainSources = allSources.filter(
(source) => source && hasSameHost(source, hostUrl),
);

for (const source of sameDomainSources) {
result.push({
url_to: url,
url_from: source,
traffic_domain: views,
priority: 0,
});
}
} catch {
log.error(
`Error occurred for audit type broken-internal-links for url ${auditUrl}, while processing sources for link ${url}`,
);
function calculatePriority(links) {
// Sort links by views
const sortedLinks = [...links].sort((a, b) => b.views - a.views);

// Calculate total views
const totalViews = sortedLinks.reduce((sum, link) => sum + link.views, 0);

// Map through sorted links and assign priority based on contribution percentage
return sortedLinks.map((link) => {
const contributionPercentage = (link.views / totalViews) * 100;

let priority;
if (contributionPercentage >= 50) {
priority = 'high';
} else if (contributionPercentage >= 25) {
priority = 'medium';
} else {
priority = 'low';
}
return result;
}, []);

return {
...link,
priority,
};
});
}

/**
* Perform an audit to check which internal links for domain are broken.
*
Expand All @@ -94,9 +78,10 @@ export async function internalLinksAuditRunner(auditUrl, context, site) {

log.info('broken-internal-links: Options for RUM call: ', JSON.stringify(options));

const all404Links = await rumAPIClient.query('404', options);
const internal404Links = await rumAPIClient.query('404-internal-links', options);
const priorityLinks = calculatePriority(internal404Links);
const auditResult = {
brokenInternalLinks: transform404LinksData(all404Links, finalUrl, auditUrl, log),
brokenInternalLinks: priorityLinks,
fullAuditRef: auditUrl,
finalUrl,
auditContext: {
Expand Down

0 comments on commit 5c15c3c

Please sign in to comment.