From aa37b02d9877aae255b8a133b5f5a509157db102 Mon Sep 17 00:00:00 2001
From: rayangler <27821750+rayangler@users.noreply.github.com>
Date: Mon, 28 Oct 2024 12:02:52 -0400
Subject: [PATCH] DOP-5126: Sanitize input for SoftwareSourceCode structured
data (#1292)
---
src/utils/structured-data.js | 4 +++-
.../utils/__snapshots__/structured-data.test.js.snap | 9 +++++++++
tests/unit/utils/structured-data.test.js | 8 ++++++++
3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/src/utils/structured-data.js b/src/utils/structured-data.js
index 91a2d2928..b5d976751 100644
--- a/src/utils/structured-data.js
+++ b/src/utils/structured-data.js
@@ -6,6 +6,7 @@
* Optional overwrites can be set in params as default values
*/
+import sanitize from 'sanitize-html';
import { getFullLanguageName } from './get-language';
import { findKeyValuePair } from './find-key-value-pair';
import { getPlaintext } from './get-plaintext';
@@ -97,7 +98,8 @@ export class SoftwareSourceCodeSd extends StructuredData {
constructor({ code, lang, slug }) {
super('SoftwareSourceCode');
this.codeSampleType = 'code snippet';
- this.text = code;
+ // Sanitize all input in case HTML snippets are labeled with different language
+ this.text = sanitize(code, { disallowedTagsMode: 'escape' });
const programmingLanguage = getFullLanguageName(lang, slug);
if (programmingLanguage) {
diff --git a/tests/unit/utils/__snapshots__/structured-data.test.js.snap b/tests/unit/utils/__snapshots__/structured-data.test.js.snap
index e48583bfd..b681d6f3a 100644
--- a/tests/unit/utils/__snapshots__/structured-data.test.js.snap
+++ b/tests/unit/utils/__snapshots__/structured-data.test.js.snap
@@ -77,6 +77,15 @@ SoftwareSourceCodeSd {
}
`;
+exports[`Structured Data SoftwareSourceCode sanitizes and escapes unsafe HTML examples 1`] = `
+SoftwareSourceCodeSd {
+ "@context": "https://schema.org",
+ "@type": "SoftwareSourceCode",
+ "codeSampleType": "code snippet",
+ "text": " <script></script> <script> const app = new Realm.App({ id: "<your_realm_app_id>", }); // Callback used in \`data-callback\` to handle Google's response and log user into App Services function handleCredentialsResponse(response) { const credentials = Realm.Credentials.google({ idToken: response.credential }); app .logIn(credentials) .then((user) => alert(\`Logged in with id: user.id\`)); } </script>",
+}
+`;
+
exports[`Structured Data VideoObject returns valid structured data with description 1`] = `
VideoObjectSd {
"@context": "https://schema.org",
diff --git a/tests/unit/utils/structured-data.test.js b/tests/unit/utils/structured-data.test.js
index 17184dfe4..fb1fa0375 100644
--- a/tests/unit/utils/structured-data.test.js
+++ b/tests/unit/utils/structured-data.test.js
@@ -24,6 +24,14 @@ describe('Structured Data', () => {
expect(softwareSourceCodeSd.isValid()).toBeTruthy();
expect(softwareSourceCodeSd).toMatchSnapshot();
});
+
+ it('sanitizes and escapes unsafe HTML examples', () => {
+ const code =
+ ' ';
+ const softwareSourceCodeSd = new SoftwareSourceCodeSd({ code });
+ expect(softwareSourceCodeSd.isValid()).toBeTruthy();
+ expect(softwareSourceCodeSd).toMatchSnapshot();
+ });
});
describe('VideoObject', () => {