Skip to content

Commit

Permalink
Merge branch 'main' into metatags-broken-backlinks-migration
Browse files Browse the repository at this point in the history
  • Loading branch information
martinst06 authored Dec 18, 2024
2 parents 19726e9 + 59c3dbc commit 7d96c5f
Show file tree
Hide file tree
Showing 9 changed files with 422 additions and 458 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## [1.45.3](https://github.com/adobe/spacecat-audit-worker/compare/v1.45.2...v1.45.3) (2024-12-18)


### Bug Fixes

* **deps:** update adobe fixes ([#537](https://github.com/adobe/spacecat-audit-worker/issues/537)) ([2ae262f](https://github.com/adobe/spacecat-audit-worker/commit/2ae262fb66c448614e0440fe03f7a5d6a76aa04b))

## [1.45.2](https://github.com/adobe/spacecat-audit-worker/compare/v1.45.1...v1.45.2) (2024-12-16)


Expand Down
654 changes: 327 additions & 327 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@adobe/spacecat-audit-worker",
"version": "1.45.2",
"version": "1.45.3",
"description": "SpaceCat Audit Worker",
"main": "src/index.js",
"type": "module",
Expand Down Expand Up @@ -57,9 +57,9 @@
"@adobe/helix-universal": "5.0.8",
"@adobe/helix-universal-logger": "3.0.23",
"@adobe/spacecat-shared-ahrefs-client": "1.5.16",
"@adobe/spacecat-shared-data-access": "1.59.2",
"@adobe/spacecat-shared-data-access": "1.60.1",
"@adobe/spacecat-shared-google-client": "1.3.6",
"@adobe/spacecat-shared-http-utils": "1.7.3",
"@adobe/spacecat-shared-http-utils": "1.8.0",
"@adobe/spacecat-shared-rum-api-client": "2.15.7",
"@adobe/spacecat-shared-rum-api-client-v1": "npm:@adobe/[email protected]",
"@adobe/spacecat-shared-utils": "1.23.8",
Expand Down
78 changes: 32 additions & 46 deletions src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,56 +18,41 @@ import { noopUrlResolver } from '../common/audit.js';
import { syncSuggestions } from '../utils/data-access.js';

const INTERVAL = 30; // days
const DAILY_PAGEVIEW_THRESHOLD = 100;
const AUDIT_TYPE = 'broken-internal-links';

/**
* Determines if the URL has the same host as the current host.
* @param {*} url
* @param {*} currentHost
* @returns
* Classifies links into priority categories based on views
* High: top 25%, Medium: next 25%, Low: bottom 50%
* @param {Array} links - Array of objects with views property
* @returns {Array} - Links with priority classifications included
*/
function hasSameHost(url, currentHost) {
const host = new URL(url).hostname;
return host === currentHost;
}

/**
* Filter out the 404 links that:
* - have less than 100 views and do not have a URL.
* - do not have any sources from the same domain.
* @param {*} links - all 404 links Data
* @param {*} hostUrl - the host URL of the domain
* @param {*} auditUrl - the URL to run audit against
* @param {*} log - the logger object
* @returns {Array} - Returns an array of 404 links that meet the criteria.
*/

function transform404LinksData(responseData, hostUrl, auditUrl, log) {
return responseData.reduce((result, { url, views, all_sources: allSources }) => {
try {
if (!url || views < DAILY_PAGEVIEW_THRESHOLD) {
return result;
}
const sameDomainSources = allSources.filter(
(source) => source && hasSameHost(source, hostUrl),
);

for (const source of sameDomainSources) {
result.push({
url_to: url,
url_from: source,
traffic_domain: views,
});
}
} catch {
log.error(
`Error occurred for audit type broken-internal-links for url ${auditUrl}, while processing sources for link ${url}`,
);
function calculatePriority(links) {
// Sort links by views in descending order
const sortedLinks = [...links].sort((a, b) => b.views - a.views);

// Calculate indices for the 25% and 50% marks
const quarterIndex = Math.ceil(sortedLinks.length * 0.25);
const halfIndex = Math.ceil(sortedLinks.length * 0.5);

// Map through sorted links and assign priority
return sortedLinks.map((link, index) => {
let priority;

if (index < quarterIndex) {
priority = 'high';
} else if (index < halfIndex) {
priority = 'medium';
} else {
priority = 'low';
}
return result;
}, []);

return {
...link,
priority,
};
});
}

/**
* Perform an audit to check which internal links for domain are broken.
*
Expand All @@ -93,9 +78,10 @@ export async function internalLinksAuditRunner(auditUrl, context, site) {

log.info('broken-internal-links: Options for RUM call: ', JSON.stringify(options));

const all404Links = await rumAPIClient.query('404', options);
const internal404Links = await rumAPIClient.query('404-internal-links', options);
const priorityLinks = calculatePriority(internal404Links);
const auditResult = {
brokenInternalLinks: transform404LinksData(all404Links, finalUrl, auditUrl, log),
brokenInternalLinks: priorityLinks,
fullAuditRef: auditUrl,
finalUrl,
auditContext: {
Expand Down
26 changes: 17 additions & 9 deletions test/audits/internal-links.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,22 @@ import { MockContextBuilder } from '../shared.js';

const AUDIT_RESULT_DATA = [
{
url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
traffic_domain: 100,
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
{
url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle',
url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 100,
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
{
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
];

Expand Down Expand Up @@ -76,7 +84,7 @@ describe('Broken internal links audit', () => {
context,
site,
);
expect(context.rumApiClient.query).calledWith('404', {
expect(context.rumApiClient.query).calledWith('404-internal-links', {
domain: 'www.example.com',
domainkey: 'test-key',
interval: 30,
Expand Down Expand Up @@ -166,10 +174,10 @@ describe('broken-internal-links audit to opportunity conversion', () => {

expect(context.dataAccess.Opportunity.create).to.have.been.calledOnceWith(expectedOpportunity);

// make sure that newly oppty has 2 new suggestions
// make sure that newly oppty has 3 new suggestions
expect(opportunity.addSuggestions).to.have.been.calledOnce;
const suggestionsArg = opportunity.addSuggestions.getCall(0).args[0];
expect(suggestionsArg).to.be.an('array').with.lengthOf(2);
expect(suggestionsArg).to.be.an('array').with.lengthOf(3);
}).timeout(5000);

it('creating a new opportunity object fails', async () => {
Expand Down
8 changes: 4 additions & 4 deletions test/audits/structured-data.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ import sinon from 'sinon';
import { convertToOpportunity, structuredDataHandler } from '../../src/structured-data/handler.js';
import { MockContextBuilder } from '../shared.js';

import fullUrlInspectionResult from '../fixtures/structured-data/structured-data.json' assert { type: 'json' };
import expectedOppty from '../fixtures/structured-data/oppty.json' assert { type: 'json' };
import auditDataMock from '../fixtures/structured-data/audit.json' assert { type: 'json' };
import suggestions from '../fixtures/structured-data/suggestions.json' assert { type: 'json' };
import fullUrlInspectionResult from '../fixtures/structured-data/structured-data.json' with { type: 'json' };
import expectedOppty from '../fixtures/structured-data/oppty.json' with { type: 'json' };
import auditDataMock from '../fixtures/structured-data/audit.json' with { type: 'json' };
import suggestions from '../fixtures/structured-data/suggestions.json' with { type: 'json' };

use(sinonChai);

Expand Down
15 changes: 0 additions & 15 deletions test/common/audit.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ describe('Audit tests', () => {
await defaultMessageSender(resultMessage, context);

expect(context.sqs.sendMessage).not.to.have.been.calledOnce;
expect(context.sqs.sendMessage).not.to.have.been.calledWith(queueUrl, resultMessage);
});

it('default url resolves gets the base url and follows redirects', async () => {
Expand Down Expand Up @@ -269,14 +268,7 @@ describe('Audit tests', () => {
expect(context.dataAccess.addAudit).to.have.been.calledWith(auditData);

const finalUrl = 'space.cat';
const expectedMessage = {
type: message.type,
url: 'https://space.cat',
auditContext: { someField: 431, finalUrl, fullAuditRef },
auditResult: { metric: 42 },
};
expect(context.sqs.sendMessage).not.to.have.been.calledOnce;
expect(context.sqs.sendMessage).not.to.have.been.calledWith(queueUrl, expectedMessage);
expect(postProcessors[0]).to.have.been.calledWith(finalUrl, auditData);
expect(postProcessors[1]).to.have.been.calledWith(finalUrl, auditData);
expect(postProcessors[2]).to.not.have.been.called;
Expand Down Expand Up @@ -333,14 +325,7 @@ describe('Audit tests', () => {
id: 'some-audit-id',
});

const expectedMessage = {
type: message.type,
url: 'https://space.cat',
auditContext: { finalUrl: 'space.cat', fullAuditRef },
auditResult: { metric: 42 },
};
expect(context.sqs.sendMessage).not.to.have.been.calledOnce;
expect(context.sqs.sendMessage).not.to.have.been.calledWith(queueUrl, expectedMessage);
});

it('wwwUrlResolver calculates audit urls correctly', async () => {
Expand Down
84 changes: 30 additions & 54 deletions test/fixtures/internal-links-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,58 +12,22 @@

export const internalLinksData = [
{
url: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
views: 100,
all_sources: [
'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
},
{
url: 'https://www.example.com/article/dogs/breeds/choosing-an-german-dog',
views: 5,
all_sources: [
'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-8',
},
{
url: 'x',
views: 100,
all_sources: [
'invalid-url',
],
source_count: 1,
top_source: '',
},
{
url: 'https://www.example.com/dogs/the-stages-of-canine-reproduction',
views: 100,
all_sources: [
'android-app://com.google.android.googlequicksearchbox/',
],
source_count: 1,
top_source: 'android-app://com.google.android.googlequicksearchbox/',
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
{
url: 'https://www.example.com/article/reptiles/general/unusual-pets-praying-mantis',
views: 100,
all_sources: [
'https://www.google.com/',
],
source_count: 1,
top_source: 'https://www.google.com/',
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
{
url: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle',
views: 100,
all_sources: [
'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
],
source_count: 1,
top_source: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
];

Expand Down Expand Up @@ -93,18 +57,30 @@ export const expectedSuggestions = [
type: 'CONTENT_UPDATE',
rank: 100,
data: {
url_to: 'https://www.example.com/article/dogs/breeds/choosing-an-irish-setter',
url_from: 'https://www.example.com/article/dogs/just-for-fun/dogs-good-for-men-13-manly-masculine-dog-breeds',
traffic_domain: 100,
traffic_domain: 1800,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a02nf',
priority: 'high',
},
},
{
type: 'CONTENT_UPDATE',
rank: 100,
data: {
traffic_domain: 1200,
url_to: 'https://www.petplace.com/ax02-changed',
url_from: 'https://www.petplace.com/ax02nf',
priority: 'medium',
},
},
{
type: 'CONTENT_UPDATE',
rank: 100,
data: {
url_to: 'https://www.example.com/article/dogs/breeds/choosing-a-miniature-poodle-1',
url_from: 'https://www.example.com/article/dogs/pet-care/when-is-a-dog-considered-senior',
traffic_domain: 100,
traffic_domain: 200,
url_to: 'https://www.petplace.com/a01',
url_from: 'https://www.petplace.com/a01nf',
priority: 'low',
},
},
];
2 changes: 2 additions & 0 deletions test/setup-env.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@ console.log('Forcing HTTP/1.1 for Adobe Fetch');
process.env.HELIX_FETCH_FORCE_HTTP1 = 'true';
process.env.AWS_ACCESS_KEY_ID = 'fake-key-id';
process.env.AWS_SECRET_ACCESS_KEY = 'fake-secret';
process.env.AWS_XRAY_SDK_ENABLED = 'false';
process.env.AWS_XRAY_CONTEXT_MISSING = 'IGNORE_ERROR';

dotenv.config({ override: true });

0 comments on commit 7d96c5f

Please sign in to comment.