From aa37b02d9877aae255b8a133b5f5a509157db102 Mon Sep 17 00:00:00 2001 From: rayangler <27821750+rayangler@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:02:52 -0400 Subject: [PATCH] DOP-5126: Sanitize input for SoftwareSourceCode structured data (#1292) --- src/utils/structured-data.js | 4 +++- .../utils/__snapshots__/structured-data.test.js.snap | 9 +++++++++ tests/unit/utils/structured-data.test.js | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/utils/structured-data.js b/src/utils/structured-data.js index 91a2d2928..b5d976751 100644 --- a/src/utils/structured-data.js +++ b/src/utils/structured-data.js @@ -6,6 +6,7 @@ * Optional overwrites can be set in params as default values */ +import sanitize from 'sanitize-html'; import { getFullLanguageName } from './get-language'; import { findKeyValuePair } from './find-key-value-pair'; import { getPlaintext } from './get-plaintext'; @@ -97,7 +98,8 @@ export class SoftwareSourceCodeSd extends StructuredData { constructor({ code, lang, slug }) { super('SoftwareSourceCode'); this.codeSampleType = 'code snippet'; - this.text = code; + // Sanitize all input in case HTML snippets are labeled with different language + this.text = sanitize(code, { disallowedTagsMode: 'escape' }); const programmingLanguage = getFullLanguageName(lang, slug); if (programmingLanguage) { diff --git a/tests/unit/utils/__snapshots__/structured-data.test.js.snap b/tests/unit/utils/__snapshots__/structured-data.test.js.snap index e48583bfd..b681d6f3a 100644 --- a/tests/unit/utils/__snapshots__/structured-data.test.js.snap +++ b/tests/unit/utils/__snapshots__/structured-data.test.js.snap @@ -77,6 +77,15 @@ SoftwareSourceCodeSd { } `; +exports[`Structured Data SoftwareSourceCode sanitizes and escapes unsafe HTML examples 1`] = ` +SoftwareSourceCodeSd { + "@context": "https://schema.org", + "@type": "SoftwareSourceCode", + "codeSampleType": "code snippet", + "text": " <script></script> <script> const app = new Realm.App({ id: "<your_realm_app_id>", }); // Callback used in \`data-callback\` to handle Google's response and log user into App Services function handleCredentialsResponse(response) { const credentials = Realm.Credentials.google({ idToken: response.credential }); app .logIn(credentials) .then((user) => alert(\`Logged in with id: user.id\`)); } </script>", +} +`; + exports[`Structured Data VideoObject returns valid structured data with description 1`] = ` VideoObjectSd { "@context": "https://schema.org", diff --git a/tests/unit/utils/structured-data.test.js b/tests/unit/utils/structured-data.test.js index 17184dfe4..fb1fa0375 100644 --- a/tests/unit/utils/structured-data.test.js +++ b/tests/unit/utils/structured-data.test.js @@ -24,6 +24,14 @@ describe('Structured Data', () => { expect(softwareSourceCodeSd.isValid()).toBeTruthy(); expect(softwareSourceCodeSd).toMatchSnapshot(); }); + + it('sanitizes and escapes unsafe HTML examples', () => { + const code = + ' '; + const softwareSourceCodeSd = new SoftwareSourceCodeSd({ code }); + expect(softwareSourceCodeSd.isValid()).toBeTruthy(); + expect(softwareSourceCodeSd).toMatchSnapshot(); + }); }); describe('VideoObject', () => {