Skip to content

Commit

Permalink
fix: URL patterns should not be included in the sitemap (#3380)
Browse files Browse the repository at this point in the history
## Description

URL patterns should not be included in the sitemap. For instance, before
this PR, we included paths like /:slug in the sitemap.

## Steps for reproduction

Open


https://fix-sitemap-slug.staging.webstudio.is/builder/8404e444-093d-4e70-b5c2-03fd89b9eb6d?pageId=EksrLskoGCCreyVN4xp2r


See dynamic page is excluded

## Code Review

- [ ] hi @kof, I need you to do
  - conceptual review (architecture, feature-correctness)
  - detailed review (read every line)
  - test it on preview

## Before requesting a review

- [ ] made a self-review
- [ ] added inline comments where things may be not obvious (the "why",
not "what")

## Before merging

- [ ] tested locally and on preview environment (preview dev login:
5de6)
- [ ] updated [test
cases](https://github.com/webstudio-is/webstudio/blob/main/apps/builder/docs/test-cases.md)
document
- [ ] added tests
- [ ] if any new env variables are added, added them to `.env.example`
and the `builder/env-check.js` if mandatory
  • Loading branch information
istarkov authored May 19, 2024
1 parent 45fdd64 commit f47455b
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 12 deletions.
15 changes: 3 additions & 12 deletions apps/builder/app/builder/shared/url-pattern.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { matchPathnameParams } from "@webstudio-is/sdk";
import { URLPattern } from "urlpattern-polyfill";
export { isPathnamePattern } from "@webstudio-is/sdk";

const baseUrl = "http://url";

Expand All @@ -21,23 +23,12 @@ type Token =
| { type: "fragment"; value: string }
| { type: "param"; name: string; optional: boolean; splat: boolean };

// /:slug -> { name: "slug", modifier: "" }
// /:slug* -> { name: "slug", modifier: "*" }
// /:slug? -> { name: "slug", modifier: "?" }
// /* -> { wildcard: "*" }
const tokenRegex = /:(?<name>\w+)(?<modifier>[?*]?)|(?<wildcard>(?<!:\w+)\*)/;
// use separate regex from matchAll because regex.test is stateful when used with g flag
const tokenRegexGlobal = new RegExp(tokenRegex.source, "g");

export const isPathnamePattern = (pathname: string) =>
tokenRegex.test(pathname);

export const tokenizePathnamePattern = (pathname: string) => {
const tokens: Token[] = [];
let lastCursor = 0;
let lastWildcard = -1;

for (const match of pathname.matchAll(tokenRegexGlobal)) {
for (const match of matchPathnameParams(pathname)) {
const cursor = match.index ?? 0;
if (lastCursor < cursor) {
tokens.push({
Expand Down
1 change: 1 addition & 0 deletions packages/sdk/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ export * from "./expression";
export * from "./forms-generator";
export * from "./resources-generator";
export * from "./page-meta-generator";
export * from "./url-pattern";
2 changes: 2 additions & 0 deletions packages/sdk/src/page-utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { executeExpression } from "./expression";
import type { Folder, Page, Pages } from "./schema/pages";
import { isPathnamePattern } from "./url-pattern";

export const ROOT_FOLDER_ID = "root";

Expand Down Expand Up @@ -85,6 +86,7 @@ export const getStaticSiteMapXml = (pages: Pages, updatedAt: string) => {
.filter(
(page) => executeExpression(page.meta.excludePageFromSearch) !== true
)
.filter((page) => false === isPathnamePattern(page.path))
.map((page) => ({
path: getPagePath(page.id, pages),
lastModified: updatedAt.split("T")[0],
Expand Down
14 changes: 14 additions & 0 deletions packages/sdk/src/url-pattern.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { expect, test } from "@jest/globals";
import { isPathnamePattern } from "./url-pattern";

test("check pathname is pattern", () => {
expect(isPathnamePattern("/:name")).toEqual(true);
expect(isPathnamePattern("/:slug*")).toEqual(true);
expect(isPathnamePattern("/:id?")).toEqual(true);
expect(isPathnamePattern("/*")).toEqual(true);

expect(isPathnamePattern("")).toEqual(false);
expect(isPathnamePattern("/")).toEqual(false);
expect(isPathnamePattern("/blog")).toEqual(false);
expect(isPathnamePattern("/blog/post-name")).toEqual(false);
});
15 changes: 15 additions & 0 deletions packages/sdk/src/url-pattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// /:slug -> { name: "slug", modifier: "" }
// /:slug* -> { name: "slug", modifier: "*" }
// /:slug? -> { name: "slug", modifier: "?" }
// /* -> { wildcard: "*" }
const tokenRegex = /:(?<name>\w+)(?<modifier>[?*]?)|(?<wildcard>(?<!:\w+)\*)/;

export const isPathnamePattern = (pathname: string) =>
tokenRegex.test(pathname);

// use separate regex from matchAll because regex.test is stateful when used with g flag
const tokenRegexGlobal = new RegExp(tokenRegex.source, "g");

export const matchPathnameParams = (pathname: string) => {
return pathname.matchAll(tokenRegexGlobal);
};

0 comments on commit f47455b

Please sign in to comment.