From 970c3c91c04ef9966be196b674242d1c596b2f1a Mon Sep 17 00:00:00 2001
From: hengm3467 <100685635+hengm3467@users.noreply.github.com>
Date: Mon, 18 Nov 2024 09:01:13 +0800
Subject: [PATCH 01/11] tab title change and a topic title change (#56)
---
mint.json | 2 +-
performance/performance-faq.mdx | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mint.json b/mint.json
index 950597b7..3a41726e 100644
--- a/mint.json
+++ b/mint.json
@@ -32,7 +32,7 @@
"style": "roundedRectangle"
},
"primaryTab": {
- "name": "RisingWave"
+ "name": "Guides"
},
"feedback": {
"suggestEdit": true,
diff --git a/performance/performance-faq.mdx b/performance/performance-faq.mdx
index cdfbd2d4..d1db3f0b 100644
--- a/performance/performance-faq.mdx
+++ b/performance/performance-faq.mdx
@@ -1,5 +1,5 @@
---
-title: "Performance-related FAQs"
+title: "FAQs"
description: This topic addresses common queries related to resource allocation and adjustment for both streaming and batch queries. This will assist you in fine-tuning performance and maximizing efficiency.
mode: wide
---
From 503cc1fb9b477b02c7058fd2606d8d7d19a9e4c9 Mon Sep 17 00:00:00 2001
From: IrisWan <150207222+WanYixian@users.noreply.github.com>
Date: Tue, 19 Nov 2024 14:19:08 +0800
Subject: [PATCH 02/11] Fix broken link of monitor statement progress (#58)
* change view into monitor in file name
* sidebar
---
mint.json | 2 +-
...ew-statement-progress.mdx => monitor-statement-progress.mdx} | 0
sql/commands/sql-cancel-jobs.mdx | 2 +-
sql/commands/sql-show-jobs.mdx | 2 +-
sql/system-catalogs/rw-catalog.mdx | 2 +-
5 files changed, 4 insertions(+), 4 deletions(-)
rename operate/{view-statement-progress.mdx => monitor-statement-progress.mdx} (100%)
diff --git a/mint.json b/mint.json
index 3a41726e..215fa548 100644
--- a/mint.json
+++ b/mint.json
@@ -819,7 +819,7 @@
"group": "Operate",
"pages": [
"operate/monitor-risingwave-cluster",
- "operate/view-statement-progress",
+ "operate/monitor-statement-progress",
"operate/alter-streaming",
"operate/view-configure-system-parameters",
"operate/view-configure-runtime-parameters",
diff --git a/operate/view-statement-progress.mdx b/operate/monitor-statement-progress.mdx
similarity index 100%
rename from operate/view-statement-progress.mdx
rename to operate/monitor-statement-progress.mdx
diff --git a/sql/commands/sql-cancel-jobs.mdx b/sql/commands/sql-cancel-jobs.mdx
index f3e9e011..3932b32d 100644
--- a/sql/commands/sql-cancel-jobs.mdx
+++ b/sql/commands/sql-cancel-jobs.mdx
@@ -44,7 +44,7 @@ Id
diff --git a/sql/commands/sql-show-jobs.mdx b/sql/commands/sql-show-jobs.mdx
index 4988b7f7..3b2c499a 100644
--- a/sql/commands/sql-show-jobs.mdx
+++ b/sql/commands/sql-show-jobs.mdx
@@ -34,7 +34,7 @@ SHOW JOBS;
title="Monitor statement progress"
icon="chart-line"
iconType="solid"
- href="/docs/current/view-statement-progress/"
+ href="/docs/current/monitor-statement-progress/"
/>
Date: Tue, 19 Nov 2024 15:28:13 +0800
Subject: [PATCH 03/11] feat: add GA (#59)
---
mint.json | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mint.json b/mint.json
index 215fa548..4102a9cf 100644
--- a/mint.json
+++ b/mint.json
@@ -1013,5 +1013,10 @@
"linkedin": "https://go.risingwave.com/linkedin",
"slack": "https://go.risingwave.com/slack",
"youtube": "https://go.risingwave.com/youtube"
+ },
+ "analytics": {
+ "ga4": {
+ "measurementId": "G-VG98SVDEYE"
+ }
}
}
\ No newline at end of file
From cbfba2700eacac685142a9c90af8eed1d09124a8 Mon Sep 17 00:00:00 2001
From: IrisWan <150207222+WanYixian@users.noreply.github.com>
Date: Tue, 19 Nov 2024 16:22:22 +0800
Subject: [PATCH 04/11] Fix format error in table (#60)
* fix
* Update apache-iceberg.mdx
---
integrations/destinations/apache-iceberg.mdx | 5 ++---
integrations/destinations/delta-lake.mdx | 2 +-
integrations/sources/apache-iceberg.mdx | 2 +-
3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/integrations/destinations/apache-iceberg.mdx b/integrations/destinations/apache-iceberg.mdx
index 1076b427..469bf46a 100644
--- a/integrations/destinations/apache-iceberg.mdx
+++ b/integrations/destinations/apache-iceberg.mdx
@@ -31,7 +31,7 @@ WITH (
| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| type | Required. Allowed values: appendonly and upsert. |
| force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. |
-| s3.endpoint | Optional. Endpoint of the S3\. For MinIO object store backend, it should be http://${MINIO_HOST}:${MINIO_PORT}. For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html) |
+| s3.endpoint | Optional. Endpoint of the S3.
For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
| s3.region | Optional. The region where the S3 bucket is hosted. Either s3.endpoint or s3.region must be specified. |
| s3.access.key | Required. Access key of the S3 compatible object store. |
| s3.secret.key | Required. Secret key of the S3 compatible object store. |
@@ -42,8 +42,7 @@ WITH (
| warehouse.path | Conditional. The path of the Iceberg warehouse. Currently, only S3-compatible object storage systems, such as AWS S3 and MinIO, are supported. It's required if the catalog.type is not rest. |
| catalog.url | Conditional. The URL of the catalog. It is required when catalog.type is not storage. |
| primary\_key | The primary key for an upsert sink. It is only applicable to the upsert mode. |
-| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. |
-| create\_table\_if\_not\_exists| Optional. When set to `true`, it will automatically create a table for the Iceberg sink.|
+| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10. The behavior of this field also depends on the `sink_decouple` setting:
If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
|
## Data type mapping
diff --git a/integrations/destinations/delta-lake.mdx b/integrations/destinations/delta-lake.mdx
index 94099051..4c679419 100644
--- a/integrations/destinations/delta-lake.mdx
+++ b/integrations/destinations/delta-lake.mdx
@@ -32,7 +32,7 @@ WITH (
| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| type | Required. Currently, only append-only is supported. |
| location | Required. The file path that the Delta Lake table is reading data from, as specified when creating the Delta Lake table. For AWS, start with s3:// or s3a://;For GCS, start with gs://; For local files, start with file://. |
-| s3.endpoint | Required. Endpoint of the S3\. For MinIO object store backend, it should be http://${MINIO_HOST}:${MINIO_PORT}. For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html). |
+| s3.endpoint | Required. Endpoint of the S3.
For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
| s3.access.key | Required. Access key of the S3 compatible object store. |
| s3.secret.key | Required. Secret key of the S3 compatible object store. |
| gcs.service.account | Required for GCS. Specifies the service account JSON file as a string. |
diff --git a/integrations/sources/apache-iceberg.mdx b/integrations/sources/apache-iceberg.mdx
index b351e333..91a7cda4 100644
--- a/integrations/sources/apache-iceberg.mdx
+++ b/integrations/sources/apache-iceberg.mdx
@@ -30,7 +30,7 @@ You don’t need to specify the column name for the Iceberg source, as RisingWav
| Field | Notes |
| -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| type | Required. Allowed values: appendonly and upsert. |
-| s3.endpoint | Optional. Endpoint of the S3\. For MinIO object store backend, it should be http://${MINIO_HOST}:${MINIO_PORT}. For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html). |
+| s3.endpoint | Optional. Endpoint of the S3.
For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
| s3.region | Optional. The region where the S3 bucket is hosted. Either s3.endpoint or s3.region must be specified. |
| s3.access.key | Required. Access key of the S3 compatible object store. |
| s3.secret.key | Required. Secret key of the S3 compatible object store. |
From d70408321aed9df67cee646e5fb6c627c68d7435 Mon Sep 17 00:00:00 2001
From: IrisWan <150207222+WanYixian@users.noreply.github.com>
Date: Tue, 19 Nov 2024 16:38:44 +0800
Subject: [PATCH 05/11] Update the format of callout (#61)
* change view into monitor in file name
* sidebar
* remove redundant callout words
---
client-libraries/ruby.mdx | 8 +-
cloud/connect-to-a-project.mdx | 12 +-
cloud/connection-errors.mdx | 18 +--
cloud/export-metrics.mdx | 6 +-
cloud/manage-payment-methods.mdx | 7 +-
cloud/manage-sources.mdx | 10 +-
cloud/manage-users.mdx | 6 +-
cloud/manage-your-account.mdx | 6 +-
cloud/organization-rbac.mdx | 6 +-
cloud/privatelink-overview.mdx | 6 +-
cloud/project-byoc.mdx | 6 +-
cloud/scale-a-project-manually.mdx | 6 +-
cloud/stop-and-delete-projects.mdx | 2 -
cloud/update-database-version.mdx | 6 +-
delivery/overview.mdx | 9 +-
delivery/risingwave-as-postgres-fdw.mdx | 6 +-
delivery/subscription.mdx | 6 +-
deploy/risingwave-k8s-helm.mdx | 5 +-
deploy/risingwave-kubernetes.mdx | 19 +--
deploy/upgrade-risingwave-k8s.mdx | 6 +-
get-started/quickstart.mdx | 6 -
ingestion/format-and-encode-parameters.mdx | 9 +-
ingestion/generate-test-data.mdx | 56 +++++----
ingestion/modify-source-or-table-schemas.mdx | 18 +--
ingestion/supported-sources-and-formats.mdx | 18 +--
integrations/destinations/amazon-dynamodb.mdx | 6 +-
integrations/destinations/apache-doris.mdx | 7 +-
integrations/destinations/apache-kafka.mdx | 113 ++++++++----------
integrations/destinations/apache-pulsar.mdx | 12 +-
integrations/destinations/aws-kinesis.mdx | 29 ++---
integrations/destinations/clickhouse.mdx | 34 ++----
integrations/destinations/cockroachdb.mdx | 7 +-
integrations/destinations/elasticsearch.mdx | 29 ++---
integrations/destinations/google-pub-sub.mdx | 18 ++-
integrations/destinations/mongodb.mdx | 26 ++--
mint.json | 2 +-
36 files changed, 207 insertions(+), 339 deletions(-)
diff --git a/client-libraries/ruby.mdx b/client-libraries/ruby.mdx
index 06e41513..5504f1fe 100644
--- a/client-libraries/ruby.mdx
+++ b/client-libraries/ruby.mdx
@@ -25,11 +25,11 @@ require 'pg'
conn = PG.connect(host: '127.0.0.1', port: 4566, dbname: 'dev', user: 'root')
```
-:::note
+
The `BasicTypeMapForResults` class isn't supported currently, you need to cast RisingWave types into Ruby types manually.
-:::
+
## Create a source
@@ -57,11 +57,11 @@ EOF
conn.exec(sql) # Execute the query.
```
-:::note
+
All the code examples in this guide include a section for connecting to RisingWave. If you perform multiple actions within one connection session, you do not need to repeat this section.
-:::
+
## Create a materialized view
diff --git a/cloud/connect-to-a-project.mdx b/cloud/connect-to-a-project.mdx
index 3fc0cee3..0b6cd9d1 100644
--- a/cloud/connect-to-a-project.mdx
+++ b/cloud/connect-to-a-project.mdx
@@ -25,11 +25,9 @@ To connect with any local clients, follow the steps below:
* RisingWave Cloud creates a default user for every provisioned project since v1.7.1\. The default user is authenticated with a temporary token under the OAuth 2.0 protocol to ease the burden on developers. For default users, RisingWave Cloud offers the `psql` command and a general `Connection String` for a quick connection.
* Alternatively, you can create a new user, RisingWave Cloud offers `psql`, `Connection String`, `Parameters Only`, `Java`, `Node.js`, `Python`, and `Golang` as connection options.
-
-**NOTE**
-
+
To connect via `psql`, you need to [Install psql](/docs/current/install-psql-without-postgresql/) in your environment. `psql` is a command-line interface for interacting with PostgreSQL databases, including RisingWave.
-
+
3. You may need to set up a CA certificate to enable SSL connections. See the instructions displayed on the portal for more details.
4. Copy the command and run it in a terminal window.
@@ -39,11 +37,9 @@ To connect via `psql`, you need to [Install psql](/docs/current/install-psql-wit
|----------------|---------------|
| | |
-
-**NOTE**
-
+
If you choose `Java`, `Node.js`, `Python`, or `Golang` as the startup mode, replace `` in the command with the password you set when creating a new user.
-
+
## What's next
:@:/` directly.
-
-**NOTE**
-
+
Not all clients support the `options` field. If your client does not support the `options` field, you can use solution 2 or 3.
-
+
### Solution 2: Put the tenant identifier in the host
@@ -31,11 +29,9 @@ You can put the tenant identifier in the host in the format of `:@.:/
-**NOTE**
-
+
Not all clients support SNI routing. If your client does not support SNI routing, you can use solution 1 or 3.
-
+
### Solution 3: Put the tenant identifier in the username[](#solution-3-put-the-tenant-identifier-in-the-username "Direct link to Solution 3: Put the tenant identifier in the username")
@@ -45,11 +41,9 @@ You can also put the tenant identifier in the username in the format of `;:@:/
-**NOTE**
-
+
The server will use `AuthenticationCleartextPassword` response to authenticate the user. Learn more about the protocol in the [PostgreSQL documentation](https://www.postgresql.org/docs/current/protocol-flow.html).
-
+
## The tenant identifier is not specified
diff --git a/cloud/export-metrics.mdx b/cloud/export-metrics.mdx
index 33db2bc6..136dcbca 100644
--- a/cloud/export-metrics.mdx
+++ b/cloud/export-metrics.mdx
@@ -31,11 +31,9 @@ Get the corresponding `CLOUD_HOST` for your region and Cloud provider from the t
Choose one of the following methods to configure monitoring systems.
-
-**NOTE**
-
+
The metrics are formatted according to [Prometheus](https://prometheus.io/docs/concepts/metric%5Ftypes/) standards. If your monitoring collection mode is compatible with the Prometheus format, refer to the Prometheus section below to configure the collection.
-
+
diff --git a/cloud/manage-payment-methods.mdx b/cloud/manage-payment-methods.mdx
index a1afc858..97ebfbc6 100644
--- a/cloud/manage-payment-methods.mdx
+++ b/cloud/manage-payment-methods.mdx
@@ -1,13 +1,12 @@
---
title: "Manage payment methods"
-description: "You can manage your payment methods for your organization in RisingWave Cloud. The saved payment methods will be used to make automatic payments of the monthly bill after each billing period. You can also use the saved payment methods to settle outstanding payments manually if the automatic payment fails."
---
+You can manage your payment methods for your organization in RisingWave Cloud. The saved payment methods will be used to make automatic payments of the monthly bill after each billing period. You can also use the saved payment methods to settle outstanding payments manually if the automatic payment fails. All members of the organization can view and manage payment methods."
+
-**INFO**
+Currently, RisingWave Cloud only supports credit cards as the payment method.
-* Currently, RisingWave Cloud only supports credit cards as the payment method.
-* All members of the organization can view and manage payment methods.
## Add a payment method
diff --git a/cloud/manage-sources.mdx b/cloud/manage-sources.mdx
index e6b276e3..f15d9cf1 100644
--- a/cloud/manage-sources.mdx
+++ b/cloud/manage-sources.mdx
@@ -15,11 +15,11 @@ You can create a source with one of the following methods:
2. Specify the project and click its **Workspace**.
3. Next to **Source** tab, click **\+ Add new**.
4. Select the service you want to connect to.
-
-**NOTE**
+
-More services will be supported in future releases.
-
+ More services will be supported in future releases.
+
+
5. Configure the connector settings, source details, and schema according to the instructions of the guided setup.
6. Check the generated SQL statement and click **Confirm** to create the source in your database.
@@ -32,9 +32,9 @@ Refer to [CREARE SOURCE](/docs/current/sql-create-source/#supported-sources) in
Click on a source to view its details, including the connector settings, schema, throughput, errors, and running status.
-**TIP**
When checking throughput and errors, you can click **Last 30 minutes** on the right side to customize your time range.
+
## Drop a source
diff --git a/cloud/manage-users.mdx b/cloud/manage-users.mdx
index 69054da7..9c25301a 100644
--- a/cloud/manage-users.mdx
+++ b/cloud/manage-users.mdx
@@ -22,11 +22,9 @@ You can invite others to create a RisingWave Cloud account and join your organiz
2. Click **Invite new user**.
3. Enter the email address of the user you want to invite.
-
-**NOTE**
-
+
You cannot invite an existing user (whose email address is already registered on RisingWave Cloud) to join your organization.
-
+
1. Click **Send invite**.
diff --git a/cloud/manage-your-account.mdx b/cloud/manage-your-account.mdx
index 0be789ba..f0c7ea5f 100644
--- a/cloud/manage-your-account.mdx
+++ b/cloud/manage-your-account.mdx
@@ -30,11 +30,9 @@ To delete your RisingWave Cloud account and all its associated data:
3. Check **Yes, I want to delete the account**.
4. Click **Delete** to confirm the decision.
-
-**NOTE**
-
+
If you are the last admin of the organization, deleting your account will also delete the organization.
-
+
## Switch accounts
diff --git a/cloud/organization-rbac.mdx b/cloud/organization-rbac.mdx
index 2153d911..8c9f965a 100644
--- a/cloud/organization-rbac.mdx
+++ b/cloud/organization-rbac.mdx
@@ -41,9 +41,7 @@ Only the OrganizationAdmin has the permission to manage user's RoleBinding.
| Delete or add RoleBinding for a user | Go to **Organization** \> **Role management** \> **Users**, click the corresponding Edit Roles of the specific role. A popup window will appear, allowing you to uncheck the role or select the new ones. Click **Confirm** to save the change. |
| Delete or add RoleBinding for the service account | Go to **Organization** \> **Role management** \> **Users**, click the corresponding Edit Roles of the specific service account. A popup window will appear, allowing you to uncheck the role or select the new ones. Click **Confirm** to save the change. |
-
-**NOTE**
-
+
Every organization needs at least one OrganizationAdmin user. Any attempt to delete the last OrganizationAdmin RoleBinding will fail.
-
+
diff --git a/cloud/privatelink-overview.mdx b/cloud/privatelink-overview.mdx
index acd3c3e3..73db4ec8 100644
--- a/cloud/privatelink-overview.mdx
+++ b/cloud/privatelink-overview.mdx
@@ -12,11 +12,9 @@ RisingWave Cloud utilizes the private connection capability of the underlying Cl
* [GCP Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect)
* [Azure Private Link](https://learn.microsoft.com/en-us/azure/private-link/)
-
-**NOTE**
-
+
Azure Private Link integration is currently in development and will be available soon.
-
+
The diagram below depicts a high-level overview of how PrivateLink service works. All three platforms share the same pattern of network structure so that you can configure them in the same way automatically.
diff --git a/cloud/project-byoc.mdx b/cloud/project-byoc.mdx
index c6473fa2..f549a224 100644
--- a/cloud/project-byoc.mdx
+++ b/cloud/project-byoc.mdx
@@ -78,11 +78,11 @@ Before running the command-line interface to create or delete a BYOC environment
* [Service Account Admin](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountAdmin)
* [Service Account User](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser)
* [Storage Admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin)
-
-**NOTE**
+
These permissions are only required for creating or deleting a BYOC environment. Once the environment is up and running, limited permissions are needed to operate the services.
-
+
+
* **Resources provisioned in BYOC environment**
We will set up the following resources in a BYOC environment:
* 1 VPC: including VPC, its subnets, firewalls, IPs to host all BYOC resources.
diff --git a/cloud/scale-a-project-manually.mdx b/cloud/scale-a-project-manually.mdx
index 24176899..486c21ea 100644
--- a/cloud/scale-a-project-manually.mdx
+++ b/cloud/scale-a-project-manually.mdx
@@ -3,11 +3,9 @@ title: "Scale a project manually"
description: "After creating a project, you have the flexibility to scale its resources and capacity to meet your specific data processing and analysis needs. This can be achieved through two methods: increasing or decreasing the number of worker nodes (horizontal scaling) or adjusting the resource capacity of each node in the project (vertical scaling)."
---
-
-**NOTE**
-
+
You can scale the projects created in the Standard plan and the Advanced plan. The Trial plan has a fixed number of nodes and resources.
-
+
## Scale your project
diff --git a/cloud/stop-and-delete-projects.mdx b/cloud/stop-and-delete-projects.mdx
index 61c0f15d..108a67af 100644
--- a/cloud/stop-and-delete-projects.mdx
+++ b/cloud/stop-and-delete-projects.mdx
@@ -22,8 +22,6 @@ Please ensure that all critical tasks are safely paused before proceeding. You c
If you no longer need a project and its associated data, you can delete it to free up resources.
-**INFO**
-
You must delete all projects before [deleting your account](/cloud/manage-your-account/#delete-your-account).
diff --git a/cloud/update-database-version.mdx b/cloud/update-database-version.mdx
index 710e7e48..5199a3b2 100644
--- a/cloud/update-database-version.mdx
+++ b/cloud/update-database-version.mdx
@@ -17,8 +17,6 @@ Before the upgrade, ensure that all critical data are backed up and all critical
2. Click the rocket icon next to the project you want to update the database version.
3. Wait for the update to complete. This may take a few minutes.
-
-**NOTE**
-
+
You can only update the RisingWave version of a project to a newer version. You cannot downgrade it.
-
+
diff --git a/delivery/overview.mdx b/delivery/overview.mdx
index cd6b1a29..1b8f1738 100644
--- a/delivery/overview.mdx
+++ b/delivery/overview.mdx
@@ -6,7 +6,7 @@ sidebarTitle: Overview
To stream data out of RisingWave, you must create a sink. A sink is an external target that you can send data to. Use the [CREATE SINK](/docs/current/sql-create-sink/) statement to create a sink. You need to specify what data to be exported, the format, and the sink parameters.
-Sinks become visible right after you create them, regardless of the backfilling status. Therefore, it's important to understand that the data in the sinks may not immediately reflect the latest state of their upstream sources due to the latency of the sink, connector, and backfilling process. To determine whether the process is complete and the data in the sink is consistent, refer to [Monitor statement progress](/docs/current/view-statement-progress/).
+Sinks become visible right after you create them, regardless of the backfilling status. Therefore, it's important to understand that the data in the sinks may not immediately reflect the latest state of their upstream sources due to the latency of the sink, connector, and backfilling process. To determine whether the process is complete and the data in the sink is consistent, refer to [Monitor statement progress](/docs/current/monitor-statement-progress/).
Currently, RisingWave supports the following sink connectors:
@@ -121,9 +121,6 @@ WITH (
) FORMAT PLAIN ENCODE PARQUET(force_append_only='true');
```
-
-**NOTE**
-
+
File sink currently supports only append-only mode, so please change the query to `append-only` and specify this explicitly after the `FORMAT ... ENCODE ...` statement.
-
-
+
diff --git a/delivery/risingwave-as-postgres-fdw.mdx b/delivery/risingwave-as-postgres-fdw.mdx
index cd8e5d4a..6736b0a4 100644
--- a/delivery/risingwave-as-postgres-fdw.mdx
+++ b/delivery/risingwave-as-postgres-fdw.mdx
@@ -146,11 +146,9 @@ SELECT * FROM city_population;
seattle | 2
```
-
-**NOTE**
-
+
Currently, write operations to RisingWave through a foreign data wrapper are not supported. The data in the foreign table is read-only.
-
+
## Differences between sinking to Postgres and using FDW in Postgres
diff --git a/delivery/subscription.mdx b/delivery/subscription.mdx
index 8902cf9e..d22bc6ca 100644
--- a/delivery/subscription.mdx
+++ b/delivery/subscription.mdx
@@ -77,12 +77,10 @@ If you specify `FULL` instead of the `since_clause`, the subscription cursor sta
### Fetch from cursor
-
-**NOTE**
-
+
FETCH from cursor function is supported in the PSQL simple query mode and extended mode.
+
-
#### Non-blocking data fetch
```sql
diff --git a/deploy/risingwave-k8s-helm.mdx b/deploy/risingwave-k8s-helm.mdx
index 0dc3b758..d4cba9e2 100644
--- a/deploy/risingwave-k8s-helm.mdx
+++ b/deploy/risingwave-k8s-helm.mdx
@@ -47,11 +47,10 @@ Customize your configuration for the RisingWave deployment by editing the [value
* **Customize meta store**: The meta store in RisingWave holds metadata for cluster operations. See [Configuration](https://github.com/risingwavelabs/helm-charts/blob/main/docs/CONFIGURATION.md#customize-meta-store) for all the available options and [Examples](https://github.com/risingwavelabs/helm-charts/tree/main/examples/meta-stores) for detailed usage of meta stores.
* **Customize state store**: The state store in RisingWave serves as a fault-tolerant storage system for preserving system state. See [Configuration](https://github.com/risingwavelabs/helm-charts/blob/main/docs/CONFIGURATION.md#customize-state-store) for all the available options and [Examples](https://github.com/risingwavelabs/helm-charts/tree/main/examples/state-stores) for detailed usage of state stores.
* **Bundled PostgreSQL and MinIO**: If you want to use `PostgreSQL` as the meta store and `MinIO` as the state store, the Helm chart for RisingWave offers the option to bundle them together. This allows for a quick and easy setup of the Helm chart. See [Configuration](https://github.com/risingwavelabs/helm-charts/blob/main/docs/CONFIGURATION.md#bundled-etcdpostgresqlminio-as-stores) for more details. To enable this feature, set `tags.bundle=true`.
-
-**NOTE**
+
Before using the bundled `PostgreSQL` and `MinIO`, and any local stores, ensure that you have implemented the [Dynamic Volume Provisioning](https://kubernetes.io/docs/concepts/storage/dynamic-provisioning/).
-
+
Install the latest RisingWave Helm chart:
diff --git a/deploy/risingwave-kubernetes.mdx b/deploy/risingwave-kubernetes.mdx
index b58fef8a..3f5493a5 100644
--- a/deploy/risingwave-kubernetes.mdx
+++ b/deploy/risingwave-kubernetes.mdx
@@ -19,13 +19,10 @@ Ensure that [Docker](https://docs.docker.com/desktop/) is installed in your envi
## Create a Kubernetes cluster
-**INFO**
-
The steps in this section are intended for creating a Kubernetes cluster in your local environment.
If you are using a managed Kubernetes service such as AKS, GKE, and EKS, refer to the corresponding documentation for instructions.
-**Steps:**
@@ -57,7 +54,6 @@ Before the deployment, ensure that the following requirements are satisfied.
* `kubectl` version ≥ 1.18
* For Linux, set the value of the `sysctl` parameter [net.ipv4.ip\_forward](https://linuxconfig.org/how-to-turn-on-off-ip-forwarding-in-linux) to 1.
-**Steps:**
[Install cert-manager](https://cert-manager.io/docs/installation/) and wait a minute to allow for initialization.
@@ -82,8 +78,6 @@ kubectl apply --server-side -f https://github.com/risingwavelabs/risingwave-oper
You can find the release notes of each version [here](https://github.com/risingwavelabs/risingwave-operator/releases).
-**NOTE**
-
The following errors might occur if `cert-manager` is not fully initialized. Simply wait for another minute and rerun the command above.
```bash
Error from server (InternalError): Internal error occurred: failed calling webhook "webhook.cert-manager.io": failed to call webhook: Post "": dial tcp 10.105.102.32:443: connect: connection refused
@@ -184,8 +178,6 @@ spec:
-**NOTE**
-
The performance of MinIO is closely tied to the disk performance of the node where it is hosted. We have observed that AWS EBS does not perform well in our tests. For optimal performance, we recommend using S3 or a compatible cloud service.
```yaml
@@ -353,14 +345,14 @@ core-site.xml hdfs-site.xml
```
-1. Next, create a ConfigMap, where `hadoop-conf` is the name of ConfigMap:
+2. Next, create a ConfigMap, where `hadoop-conf` is the name of ConfigMap:
```bash
kubectl create configmap hadoop-conf --from-file $HADOOP_HOME/etc/hadoop
```
-1. Then mount the Hadoop configuration files using this ConfigMap:
+3. Then mount the Hadoop configuration files using this ConfigMap:
```yaml
@@ -486,7 +478,6 @@ You can check the status of the RisingWave instance by running the following com
```bash
kubectl get risingwave
-
```
If the instance is running properly, the output should look like this:
@@ -502,8 +493,6 @@ risingwave True postgresql S3 30s
By default, the Operator creates a service for the frontend component, through which you can interact with RisingWave, with the type of `ClusterIP`. But it is not accessible outside Kubernetes. Therefore, you need to create a standalone Pod for PostgreSQL inside Kubernetes.
-**Steps:**
-
```bash
@@ -525,8 +514,6 @@ psql -h risingwave-frontend -p 4567 -d dev -U root
You can connect to RisingWave from Nodes such as EC2 in Kubernetes
-**Steps:**
-
1. In the `risingwave.yaml` file that you use to deploy the RisingWave instance, add a `frontendServiceType` parameter to the configuration of the RisingWave service, and set its value to `NodePort`.
```bash
# ...
@@ -548,8 +535,6 @@ psql -h ${RISINGWAVE_HOST} -p ${RISINGWAVE_PORT} -d dev -U root
If you are using EKS, GCP, or other managed Kubernetes services provided by cloud vendors, you can expose the Service to the public network with a load balancer in the cloud.
-**Steps:**
-
1. In the `risingwave.yaml` file that you use to deploy the RisingWave instance, add a `frontendServiceType` parameter to the configuration of the RisingWave service, and set its value to `LoadBalancer`.
```bash
# ...
diff --git a/deploy/upgrade-risingwave-k8s.mdx b/deploy/upgrade-risingwave-k8s.mdx
index 8b7a195e..b307239c 100644
--- a/deploy/upgrade-risingwave-k8s.mdx
+++ b/deploy/upgrade-risingwave-k8s.mdx
@@ -10,11 +10,9 @@ description: "This topic describes upgrade RisingWave in a K8s deployment with t
When upgrading RisingWave, it's important to be aware that there may be breaking changes. If you require technical support during the process of upgrading RisingWave in your production environments, please don't hesitate to reach out to us.
-
-**NOTE**
-
+
Assuming that the Kubernetes namespace is `default`, if your RisingWave cluster is deployed in another namespace, please add the `-n ` argument to the `kubectl` and `helm` commands below. Remember to replace the `` with your own namespace.
-
+
## Upgrade RisingWave with Helm[](#upgrade-risingwave-with-helm "Direct link to Upgrade RisingWave with Helm")
diff --git a/get-started/quickstart.mdx b/get-started/quickstart.mdx
index 93f95c10..1f4d14fb 100644
--- a/get-started/quickstart.mdx
+++ b/get-started/quickstart.mdx
@@ -6,16 +6,10 @@ description: "This guide aims to provide a quick and easy way to get started wit
## Step 1: Start RisingWave
-
-**INFO**
-
The following options start RisingWave in the standalone mode. In this mode, data is stored in the file system and the metadata is stored in the embedded SQLite database. See [About RisingWave standalone mode](#about-risingwave-standalone-mode) for more details.
For extensive testing or single-machine deployment, consider [starting RisingWave via Docker Compose](/docs/current/risingwave-docker-compose/). For production environments, consider [RisingWave Cloud](/docs/current/risingwave-cloud/), our fully managed service, or [deployment on Kubernetes using the Operator](/docs/current/risingwave-kubernetes/) or [Helm Chart](/docs/current/risingwave-k8s-helm/).
-
-
-
### Script installation
Open a terminal and run the following `curl` command.
diff --git a/ingestion/format-and-encode-parameters.mdx b/ingestion/format-and-encode-parameters.mdx
index 1eb89e1b..f87b46de 100644
--- a/ingestion/format-and-encode-parameters.mdx
+++ b/ingestion/format-and-encode-parameters.mdx
@@ -32,9 +32,8 @@ The `ENCODE` parameter represents the data encoding and includes the following o
* `CSV`: Data serialized in CSV format in the message queue, compatible with `FORMAT PLAIN`.
* `Bytes`: Data exists in the message queue in raw bytes format, compatible with `FORMAT PLAIN`.
-
-**NOTE**
+
+We support `FORMAT UPSERT ENCODE PROTOBUF` but DON'T RECOMMEND using it, because this may disrupt the order of upserts. For more details, see the [documentation of Confluent](https://docs.confluent.io/platform/7.6/control-center/topics/schema.html#c3-schemas-best-practices-key-value-pairs).
-* We support `FORMAT UPSERT ENCODE PROTOBUF` but DON'T RECOMMEND using it, because this may disrupt the order of upserts. For more details, see the [documentation of Confluent](https://docs.confluent.io/platform/7.6/control-center/topics/schema.html#c3-schemas-best-practices-key-value-pairs).
-* Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse.
-
+Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse.
+
diff --git a/ingestion/generate-test-data.mdx b/ingestion/generate-test-data.mdx
index 510fe1de..ab23b481 100644
--- a/ingestion/generate-test-data.mdx
+++ b/ingestion/generate-test-data.mdx
@@ -37,11 +37,11 @@ The sequence load generator can generate numbers, incremented by 1, from the sta
Specify the following fields for every column.
-| column\_parameter | Description | Value | Required? |
-| ----------------- | ------------------------------------------------------ | ---------------------------------------------- | -------------------- |
-| kind | Generator type | Set to sequence. | FalseDefault: random |
-| start | Starting numberMust be smaller than the ending number. | Any number of the column data typeExample: 50 | FalseDefault: 0 |
-| end | Ending numberMust be larger than the starting number. | Any number of the column data typeExample: 100 | FalseDefault: 32767 |
+| column\_parameter | Description | Value | Required? |
+| :---------------- | :---------------- | :-------------------- | :------------------- |
+| kind | Generator type. | Set to `sequence`. | False. Default: `random` |
+| start | Starting number must be smaller than the ending number. | Any number of the column data type. Example: `50` | False. Default: `0` |
+| end | Ending number must be larger than the starting number. | Any number of the column data type. Example: `100` | False. Default: `32767` |
@@ -49,12 +49,12 @@ The random number generator produces random numbers within a certain range.
Specify the following fields for every column in the source you are creating.
-| column\_parameter | Description | Value | Required? |
-| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- | --------------------------------------------------------------------- |
-| kind | Generator type | Set to random. | FalseDefault: random |
-| min | The minimum number can be generated.Must be smaller than the maximum number. | Any number of the column data typeExample: 50 | FalseDefault: 0 |
-| max | The maximum number can be generated.Must be larger than the minimum number. | Any number of the column data typeExample: 100 | FalseDefault: 32767 |
-| seed | A seed number that initializes the random load generator. The sequence of the generated numbers is determined by the seed value. If given the same seed number, the generator will produce the same sequence of numbers. | A positive integerExample: 3 | FalseIf not specified, a fixed sequence of numbers will be generated. |
+| column\_parameter | Description | Value | Required? |
+| :---------------- | :---------------------- | :----------------- | :--------------------- |
+| kind | Generator type. | Set to random. | False. Default: `random`|
+| min | The minimum number can be generated. Must be smaller than the maximum number. | Any number of the column data type. Example: `50` | False. Default: `0` |
+| max | The maximum number can be generated. Must be larger than the minimum number. | Any number of the column data type. Example: `100` | False. Default: `32767` |
+| seed | A seed number that initializes the random load generator. The sequence of the generated numbers is determined by the seed value. If given the same seed number, the generator will produce the same sequence of numbers. | A positive integer. Example: `3` | False. If not specified, a fixed sequence of numbers will be generated. |
@@ -65,13 +65,13 @@ The random timestamp and timestamptz generator produces random timestamps and ti
Specify the following fields for every column in the source you are creating.
-| column\_parameter | Description | Value | Required? |
-| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |
-| kind | Generator type | Set to random. | FalseDefault: random |
-| max\_past | Specify the maximum deviation from the baseline timestamp or timestamptz to determine the earliest possible timestamp or timestamptz that can be generated. | An [interval](/docs/current/sql-data-types/)Example: 2h 37min | FalseDefault: 1 day |
-| max\_past\_mode | Specify the baseline timestamp or timestamptz. The range for generated timestamps or timestamptzs is \[base time - max\_past , base time\] | absolute — The base time is set to the execution time of the generator. The base time is fixed for each generation.relative — The base time is the system time obtained each time a new record is generated. | FalseDefault: absolute |
-| basetime | If set, the generator will ignore max\_past\_mode and use the specified time as the base time. | A [date and time string](https://docs.rs/chrono/latest/chrono/struct.DateTime.html#method.parse%5Ffrom%5Frfc3339)Example: 2023-04-01T16:39:57-08:00 | FalseDefault: generator execution time |
-| seed | A seed number that initializes the random load generator. The sequence of the generated timestamps or timestamptzs is determined by the seed value. If given the same seed number, the generator will produce the same sequence of timestamps or timestamptzs. | A positive integerExample: 3 | FalseIf not specified, a fixed sequence of timestamps or timestamptzs will be generated (if the system time is constant). |
+| column\_parameter | Description | Value | Required? |
+| :---------------- | :--------------- | :------------- | :----------------- |
+| kind | Generator type. | Set to `random`. | False. Default: `random` |
+| max\_past | Specify the maximum deviation from the baseline timestamp or timestamptz to determine the earliest possible timestamp or timestamptz that can be generated. | An [interval](/docs/current/sql-data-types/). Example: `2h 37min` | False. Default: `1 day` |
+| max\_past\_mode | Specify the baseline timestamp or timestamptz. The range for generated timestamps or timestamptzs is \[base time - `max_past`, base time\] | `absolute` — The base time is set to the execution time of the generator. The base time is fixed for each generation. `relative` — The base time is the system time obtained each time a new record is generated. | False. Default: `absolute` |
+| basetime | If set, the generator will ignore max\_past\_mode and use the specified time as the base time. | A [date and time string](https://docs.rs/chrono/latest/chrono/struct.DateTime.html#method.parse%5Ffrom%5Frfc3339). Example: `2023-04-01T16:39:57-08:00` | False. Default: generator execution time |
+| seed | A seed number that initializes the random load generator. The sequence of the generated timestamps or timestamptzs is determined by the seed value. If given the same seed number, the generator will produce the same sequence of timestamps or timestamptzs. | A positive integer. Example: `3` | False. If not specified, a fixed sequence of timestamps or timestamptzs will be generated (if the system time is constant). |
@@ -81,11 +81,11 @@ The random varchar generator produces random combination of uppercase and lowerc
Specify the following fields for every column in the source you are creating.
-| column\_parameter | Description | Value | Required? |
-| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------- | ------------------------------------------------------------------------ |
-| kind | Generator type | Set to random. | FalseDefault: random |
-| length | The length of the varchar to be generated. | A positive integerExample: 16 | FalseDefault: 10 |
-| seed | A seed number that initializes the random load generator. The sequence of the generated characters is determined by the seed value. If given the same seed number, the generator will produce the same sequence of characters. | A positive integerExample: 3 | FalseIf not specified, a fixed sequence of characters will be generated. |
+| column\_parameter | Description | Value | Required? |
+| :---------------- | :---------------- | :---------------- | :---------------- |
+| kind | Generator type. | Set to `random`. | False. Default: `random` |
+| length | The length of the varchar to be generated. | A positive integer. Example: `16` | False. Default: 10 |
+| seed | A seed number that initializes the random load generator. The sequence of the generated characters is determined by the seed value. If given the same seed number, the generator will produce the same sequence of characters. | A positive integer. Example: `3` | False. If not specified, a fixed sequence of characters will be generated. |
@@ -107,10 +107,9 @@ WITH (
```
-**INFO**
+You need to configure each nested column in the struct. Select other tabs according to the data type of the nested columns for information on column parameters.
-* You need to configure each nested column in the struct. Select other tabs according to the data type of the nested columns for information on column parameters.
-* When you configure a nested column, use `column.nested_column` to specify it. For example, `v1.v2` and `v1.v3` in the `WITH` clause above.
+When you configure a nested column, use `column.nested_column` to specify it. For example, `v1.v2` and `v1.v3` in the `WITH` clause above.
@@ -132,10 +131,9 @@ WITH (
```
-**INFO**
+You need to specify the number of elements in the array in the `WITH` clause. `fields.c1.length = '3'` in the example above means that `c1` is an array of three elements.
-* You need to specify the number of elements in the array in the `WITH` clause. `fields.c1.length = '3'` in the example above means that `c1` is an array of three elements.
-* When you configure the elements in an array, use `column._` to specify them. For example, `c1._` in the `WITH` clause above.
+When you configure the elements in an array, use `column._` to specify them. For example, `c1._` in the `WITH` clause above.
Select other tabs according to the data type of the array for information on column parameters.
diff --git a/ingestion/modify-source-or-table-schemas.mdx b/ingestion/modify-source-or-table-schemas.mdx
index fb38be7e..ffcb5831 100644
--- a/ingestion/modify-source-or-table-schemas.mdx
+++ b/ingestion/modify-source-or-table-schemas.mdx
@@ -23,11 +23,9 @@ ALTER TABLE ADD COLUMN ;
For details about these two commands, see [ALTER SOURCE](/docs/current/sql-alter-source/) and [ALTER TABLE](/docs/current/sql-alter-table/).
-
-**NOTE**
-
+
Note that you cannot add a primary key column to a source or table in RisingWave. To modify the primary key of a source or table, you need to recreate the table.
-
+
When you add a column to a source or table, the new column is not automatically picked up in a downstream materialized view.
@@ -93,11 +91,9 @@ ALTER SOURCE src_user FORMAT PLAIN ENCODE PROTOBUF(
);
```
-
-**NOTE**
-
+
Currently, it is not supported to modify the `data_format` and `data_encode`. Furthermore, when refreshing the schema registry of a source, it is not allowed to drop columns or change types.
-
+
In addition, when the [FORMAT and ENCODE options](/docs/current/formats-and-encode-parameters/) are not changed, the `REFRESH SCHEMA` clause of `ALTER SOURCE` can also be used to refresh the schema of a source.
@@ -142,11 +138,9 @@ Refresh schema of table
ALTER TABLE src_user REFRESH SCHEMA;
```
-
-**NOTE**
-
+
If a downstream fragment references a column that is either missing or has undergone a type change in the updated schema, the command will be declined.
-
+
## See also
diff --git a/ingestion/supported-sources-and-formats.mdx b/ingestion/supported-sources-and-formats.mdx
index 2a42ae9e..fbc03ffd 100644
--- a/ingestion/supported-sources-and-formats.mdx
+++ b/ingestion/supported-sources-and-formats.mdx
@@ -6,14 +6,12 @@ title: "Supported sources and formats"
Below is the complete list of connectors supported by RisingWave. Click a connector name to see the SQL syntax, options, and sample statement of connecting RisingWave to the connector.
-
-**NOTE**
-
+
To ingest data in formats marked with "T", you need to create tables (with connector settings). Otherwise, you can create either sources or tables (with connector settings).
-
+
-| Connector | Version | Format |
-| ----------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Connector | Version | Format |
+| :------------ | :------------ | :------------------- |
| [Kafka](/docs/current/ingest-from-kafka/) | 3.1.0 or later versions | [Avro](#avro), [JSON](#json), [protobuf](#protobuf), [Debezium JSON](#debezium-json) (T), [Debezium AVRO](#debezium-avro) (T), [DEBEZIUM\_MONGO\_JSON](#debezium-mongo-json) (T), [Maxwell JSON](#maxwell-json) (T), [Canal JSON](#canal-json) (T), [Upsert JSON](#upsert-json) (T), [Upsert AVRO](#upsert-avro) (T), [Bytes](#bytes) |
| [Redpanda](/docs/current/ingest-from-redpanda/) | Latest | [Avro](#avro), [JSON](#json), [protobuf](#protobuf) |
| [Pulsar](/docs/current/ingest-from-pulsar/) | 2.8.0 or later versions | [Avro](#avro), [JSON](#json), [protobuf](#protobuf), [Debezium JSON](#debezium-json) (T), [Maxwell JSON](#maxwell-json) (T), [Canal JSON](#canal-json) (T) |
@@ -26,11 +24,9 @@ To ingest data in formats marked with "T", you need to create tables (with conne
| [Google Pub/Sub](/docs/current/ingest-from-google-pubsub/) | [Avro](#avro), [JSON](#json), [protobuf](#protobuf), [Debezium JSON](#debezium-json) (T), [Maxwell JSON](#maxwell-json) (T), [Canal JSON](#canal-json) (T) | |
| [Google Cloud Storage](/docs/current/ingest-from-gcs/) | [JSON](#json) | |
-
-**NOTE**
-
+
When a source is created, RisingWave does not ingest data immediately. RisingWave starts to process data when a materialized view is created based on the source.
-
+
## Supported formats
@@ -211,8 +207,6 @@ For data in protobuf format, you must specify a message (fully qualified by pack
Optionally, you can define a `schema.registry.name.strategy` if `schema.registry` is set. Accepted options include `topic_name_strategy`, `record_name_strategy`, and `topic_record_name_strategy`. For additional details on name strategy, see [Subject name strategy](https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#subject-name-strategy).
-**INFO**
-
For protobuf data, you cannot specify the schema in the `schema_definition` section of a `CREATE SOURCE` or `CREATE TABLE` statement.
diff --git a/integrations/destinations/amazon-dynamodb.mdx b/integrations/destinations/amazon-dynamodb.mdx
index 708908a8..e83dd65b 100644
--- a/integrations/destinations/amazon-dynamodb.mdx
+++ b/integrations/destinations/amazon-dynamodb.mdx
@@ -89,8 +89,6 @@ This makes sure that the data structure in RisingWave aligns with the key defini
| array | list (L) |
| JSONB | string (S) |
-
-**NOTE**
-
+
The `struct` datatype in RisingWave will map to `map (M)` in DynamoDB in a recursive way. Refer to [source code](https://github.com/risingwavelabs/risingwave/blob/88bb14aa6eb481f1dc0e92ee190bafad089d2afd/src/connector/src/sink/dynamodb.rs#L386) for details.
-
+
diff --git a/integrations/destinations/apache-doris.mdx b/integrations/destinations/apache-doris.mdx
index fe001bf5..c7bdfb6f 100644
--- a/integrations/destinations/apache-doris.mdx
+++ b/integrations/destinations/apache-doris.mdx
@@ -100,9 +100,6 @@ In regards to `decimal` types, RisingWave will round to the nearest decimal plac
| JSONB | JSONB |
| BIGINT | SERIAL |
-
-**NOTE**
-
+
Before v1.9, when inserting data into an Apache Doris sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to the behavior. If a decimal value is out of bounds or represents "inf", "-inf", or "nan", we will insert null values.
-
-
+
diff --git a/integrations/destinations/apache-kafka.mdx b/integrations/destinations/apache-kafka.mdx
index 5d264690..5db529fd 100644
--- a/integrations/destinations/apache-kafka.mdx
+++ b/integrations/destinations/apache-kafka.mdx
@@ -22,32 +22,30 @@ FORMAT data_format ENCODE data_encode [ (
;
```
-
-**NOTE**
-
+
Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/).
-
+
## Basic parameters
All `WITH` options are required unless explicitly mentioned as optional.
-| Parameter or clause | Description |
-| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| sink\_name | Name of the sink to be created. |
+| Parameter or clause | Description |
+| :-------------------------- | :------------- |
+| sink\_name | Name of the sink to be created. |
| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
-| connector | Sink connector type must be 'kafka' for Kafka sink. |
-| properties.bootstrap.server | Address of the Kafka broker. Format: ‘ip:port’. If there are multiple brokers, separate them with commas. |
-| topic | Address of the Kafka topic. One sink can only correspond to one topic. |
+| connector | Sink connector type must be `kafka` for Kafka sink. |
+| properties.bootstrap.server | Address of the Kafka broker. Format: `ip:port`. If there are multiple brokers, separate them with commas. |
+| topic | Address of the Kafka topic. One sink can only correspond to one topic. |
| primary\_key | Conditional. The primary keys of the sink. Use ',' to delimit the primary key columns. This field is optional if creating a PLAIN sink, but required if creating a DEBEZIUM or UPSERT sink. |
## Additional Kafka parameters
When creating a Kafka sink in RisingWave, you can specify the following Kafka-specific parameters. To set the parameter, add the RisingWave equivalent of the Kafka parameter as a `WITH` option. For additional details on these parameters, see the [Configuration properties](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
-| Kafka parameter name | RisingWave parameter name | Type |
-| ------------------------------------- | ------------------------------------------------ | ------ |
+| Kafka parameter name | RisingWave parameter name | Type |
+| :----------------------- | :------------------------------ | :----- |
| allow.auto.create.topics | properties.allow.auto.create.topics | bool |
| batch.num.messages | properties.batch.num.messages | int |
| batch.size | properties.batch.size | int |
@@ -66,36 +64,33 @@ When creating a Kafka sink in RisingWave, you can specify the following Kafka-sp
| receive.message.max.bytes | properties.receive.message.max.bytes | int |
| ssl.endpoint.identification.algorithm | properties.ssl.endpoint.identification.algorithm | str |
-
-**NOTE**
-* Set `properties.ssl.endpoint.identification.algorithm` to `none` to bypass the verification of CA certificates and resolve SSL handshake failure. This parameter can be set to either `https` or `none`. By default, it is `https`.
-* Starting with version 2.0, the default value for `properties.message.timeout.ms` has changed from 5 seconds to **5 minutes**, aligning with the default setting in the [official Kafka library](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
-
+Set `properties.ssl.endpoint.identification.algorithm` to `none` to bypass the verification of CA certificates and resolve SSL handshake failure. This parameter can be set to either `https` or `none`. By default, it is `https`.
+
+Starting with version 2.0, the default value for `properties.message.timeout.ms` has changed from 5 seconds to **5 minutes**, aligning with the default setting in the [official Kafka library](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
-## FORMAT and ENCODE options
-
-**NOTE**
+## FORMAT and ENCODE options
-These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause
+
+These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause.
+
-
-| Field | Notes |
-| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
-| data\_encode | Data encode. Allowed encodes: JSON: Supports PLAIN JSON, UPSERT JSON and DEBEZIUM JSON sinks. AVRO: Supports UPSERT AVRO and PLAIN AVRO sinks. PROTOBUF: Supports PLAIN PROTOBUF and UPSERT PROTOBUF sinks. For UPSERT PROTOBUF sinks, you must specify key encode text, while it remains optional for other format/encode combinations. |
-| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. |
-| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding. \- If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z. \- When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000. |
-| schemas.enable | Only configurable for upsert JSON sinks. By default, this value is false for upsert JSON sinks and true for debezium JSON sinks. If true, RisingWave will sink the data with the schema to the Kafka sink. Note that this is not referring to a schema registry containing a JSON schema, but rather schema formats defined using [Kafka Connect](https://www.confluent.io/blog/kafka-connect-deep-dive-converters-serialization-explained/#json-schemas). |
-| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). |
+| Field | Notes |
+| :------------------------ | :-------------------------- |
+| data\_format | Data format. Allowed formats:
`PLAIN`: Output data with insert operations.
`DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
`UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
+| data\_encode | Data encode. Allowed encodes:
`JSON`: Supports `PLAIN JSON`, `UPSERT JSON` and `DEBEZIUM JSON` sinks.
`AVRO`: Supports `UPSERT AVRO` and `PLAIN AVRO` sinks.
`PROTOBUF`: Supports `PLAIN PROTOBUF` and `UPSERT PROTOBUF` sinks.
For `UPSERT PROTOBUF` sinks, you must specify `key encode text`, while it remains optional for other format/encode combinations. |
+| force\_append\_only | If true, forces the sink to be `PLAIN` (also known as append-only), even if it cannot be. |
+| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
If omitted, the output format of timestamptz is `2023-11-11T18:30:09.453000Z` which includes the UTC suffix `Z`.
When `utc_without_suffix` is specified, the format is changed to `2023-11-11 18:30:09.453000`.
|
+| schemas.enable | Only configurable for upsert JSON sinks. By default, this value is false for upsert JSON sinks and true for debezium JSON sinks. If true, RisingWave will sink the data with the schema to the Kafka sink. This is not referring to a schema registry containing a JSON schema, but rather schema formats defined using [Kafka Connect](https://www.confluent.io/blog/kafka-connect-deep-dive-converters-serialization-explained/#json-schemas). |
+| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. |
### Avro specific parameters
When creating an Avro sink, the following options can be used following `FORMAT UPSERT ENCODE AVRO` or `FORMAT PLAIN ENCODE AVRO`.
| Field | Notes |
-| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
+| :---------------------------- | :----------------------------------------------------------------------------------------------------------------------------- |
| schema.registry | Required. The address of the schema registry. |
| schema.registry.username | Optional. The user name used to access the schema registry. |
| schema.registry.password | Optional. The password associated with the user name. |
@@ -105,7 +100,7 @@ When creating an Avro sink, the following options can be used following `FORMAT
Syntax:
-```js
+```sql
FORMAT [ UPSERT | PLAIN ]
ENCODE AVRO (
schema.registry = 'schema_registry_url',
@@ -123,8 +118,8 @@ For data type mapping, the serial type is supported. We map the serial type to t
When creating an append-only Protobuf sink, the following options can be used following `FORMAT PLAIN ENCODE PROTOBUF` or `FORMAT UPSERT ENCODE PROTOBUF`.
-| Field | Notes |
-| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| Field | Notes |
+| :---------------------------- | :----------------------- |
| message | Required. Package qualified message name of the main Message in the schema definition. |
| schema.location | Required if schema.registry is not specified. Only one of schema.location or schema.registry can be defined. The schema location. This can be in either file://, http://, https:// format. |
| schema.registry | Required if schema.location is not specified. Only one of schema.location or schema.registry can be defined. The address of the schema registry. |
@@ -132,11 +127,9 @@ When creating an append-only Protobuf sink, the following options can be used fo
| schema.registry.password | Optional. The password associated with the user name. |
| schema.registry.name.strategy | Optional. Accepted options include topic\_name\_strategy (default), record\_name\_strategy, and topic\_record\_name\_strategy. |
-
-**NOTE**
-
+
The `file://` format is not recommended for production use. If it is used, it needs to be available for both meta and compute nodes.
-
+
Syntax:
@@ -245,7 +238,7 @@ If your Kafka sink service is located in a different VPC from RisingWave, use AW
To create a Kafka sink with a PrivateLink connection, in the WITH section of your `CREATE SINK` statement, specify the following parameters.
| Parameter | Notes |
-| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| privatelink.targets | The PrivateLink targets that correspond to the Kafka brokers. The targets should be in JSON format. Note that each target listed corresponds to each broker specified in the properties.bootstrap.server field. If the order is incorrect, there will be connectivity issues. |
| privatelink.endpoint | The DNS name of the VPC endpoint. If you're using RisingWave Cloud, you can find the auto-generated endpoint after you created a connection. See details in [Create a VPC connection](/cloud/create-a-connection/#whats-next). |
| connection.name | The name of the connection, which comes from the connection created using the [CREATE CONNECTION](/docs/current/sql-create-connection/) statement. Omit this parameter if you have provisioned a VPC endpoint using privatelink.endpoint (recommended). |
@@ -291,18 +284,16 @@ You need to specify encryption and authentication parameters in the WITH section
To sink data encrypted with SSL without SASL authentication, specify these parameters in the WITH section of your `CREATE SINK` statement.
| Parameter | Notes |
-| ----------------------------------- | ----------- |
+| :---------------------------------- | :---------- |
| properties.security.protocol | Set to SSL. |
| properties.ssl.ca.location | |
| properties.ssl.certificate.location | |
| properties.ssl.key.location | |
| properties.ssl.key.password | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
-
+
Here is an example of creating a sink encrypted with SSL without using SASL authentication.
@@ -323,18 +314,16 @@ FORMAT PLAIN ENCODE JSON;
| Parameter | Notes |
-| ---------------------------- | ---------------------------------------------------------------------------------------------- |
+| :--------------------------- | :--------------------------------------------------------------------------------------------- |
| properties.security.protocol | For SASL/PLAIN without SSL, set to SASL\_PLAINTEXT. For SASL/PLAIN with SSL, set to SASL\_SSL. |
| properties.sasl.mechanism | Set to PLAIN. |
| properties.sasl.username | |
| properties.sasl.password | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
+
-
For SASL/PLAIN with SSL, you need to include these SSL parameters:
* `properties.ssl.ca.location`
@@ -380,17 +369,15 @@ FORMAT PLAIN ENCODE JSON;
| Parameter | Notes |
-| ---------------------------- | ---------------------------------------------------------------------------------------------- |
+| :--------------------------- | :--------------------------------------------------------------------------------------------- |
| properties.security.protocol | For SASL/SCRAM without SSL, set to SASL\_PLAINTEXT. For SASL/SCRAM with SSL, set to SASL\_SSL. |
| properties.sasl.mechanism | Set to SCRAM-SHA-256 or SCRAM-SHA-512 depending on the encryption method used. |
| properties.sasl.username | |
| properties.sasl.password | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
-
+
For SASL/SCRAM with SSL, you also need to include these SSL parameters:
@@ -418,7 +405,7 @@ FORMAT PLAIN ENCODE JSON;
| Parameter | Notes |
-| ------------------------------------------------ | ---------------------------------------------------------------------------------- |
+| :----------------------------------------------- | :--------------------------------------------------------------------------------- |
| properties.security.protocol | Set to SASL\_PLAINTEXT, as RisingWave does not support using SASL/GSSAPI with SSL. |
| properties.sasl.mechanism | Set to GSSAPI. |
| properties.sasl.kerberos.service.name | |
@@ -427,11 +414,9 @@ FORMAT PLAIN ENCODE JSON;
| properties.sasl.kerberos.kinit.cmd | |
| properties.sasl.kerberos.min.time.before.relogin | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
-
+
Here is an example of creating a sink authenticated with SASL/GSSAPI without SSL encryption.
@@ -460,16 +445,14 @@ The implementation of SASL/OAUTHBEARER in RisingWave validates only [unsecured c
| Parameter | Notes |
-| ---------------------------------- | ---------------------------------------------------------------------------------------------------------- |
+| :--------------------------------- | :--------------------------------------------------------------------------------------------------------- |
| properties.security.protocol | For SASL/OAUTHBEARER without SSL, set to SASL\_PLAINTEXT. For SASL/OAUTHBEARER with SSL, set to SASL\_SSL. |
| properties.sasl.mechanism | Set to OAUTHBEARER. |
| properties.sasl.oauthbearer.config | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. Also, due to the limitation of the SASL/OAUTHBEARER implementation, you only need to specify one OAUTHBEARER parameter: `properties.sasl.oauthbearer.config`. Other OAUTHBEARER parameters are not applicable.
-
+
For SASL/OAUTHBEARER with SSL, you also need to include these SSL parameters:
@@ -499,7 +482,7 @@ WITH (
## Data type mapping - RisingWave and Debezium JSON
| RisingWave Data Type | Schema Type in JSON | Schema Name in JSON |
-| ---------------------- | ------------------- | --------------------------------------- |
+| :--------------------- | :------------------ | :-------------------------------------- |
| boolean | boolean | n/a |
| smallint | int16 | n/a |
| integer | int32 | n/a |
diff --git a/integrations/destinations/apache-pulsar.mdx b/integrations/destinations/apache-pulsar.mdx
index 7b7f32ab..161417d6 100644
--- a/integrations/destinations/apache-pulsar.mdx
+++ b/integrations/destinations/apache-pulsar.mdx
@@ -36,8 +36,8 @@ FORMAT data_format ENCODE data_encode [ (
## Parameters
-| Parameter Names | Description |
-| ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| Parameter names | Description |
+| :---------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| topic | Required. The address of the Pulsar topic. One source can only correspond to one topic. |
| service.url | Required. The address of the Pulsar service. |
| auth.token | Optional. A token for auth. If both auth.token and oauth are set, only oauth authorization is considered. |
@@ -53,14 +53,12 @@ FORMAT data_format ENCODE data_encode [ (
## FORMAT and ENCODE options
-
-**NOTE**
-
+
These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause
-
+
| Field | Notes |
-| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
| data\_encode | Data encode. Supported encode: JSON. |
| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. |
diff --git a/integrations/destinations/aws-kinesis.mdx b/integrations/destinations/aws-kinesis.mdx
index 521343a7..f50bb4f9 100644
--- a/integrations/destinations/aws-kinesis.mdx
+++ b/integrations/destinations/aws-kinesis.mdx
@@ -28,8 +28,8 @@ FORMAT data_format ENCODE data_encode [ (
## Basic parameters
-| Field | Notes |
-| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Field | Notes |
+| :---------------------------------- | :--------------------- |
| stream | Required. Name of the stream. |
| aws.region | Required. AWS service region. For example, US East (N. Virginia). |
| endpoint | Optional. URL of the entry point for the AWS Kinesis service. |
@@ -40,27 +40,22 @@ FORMAT data_format ENCODE data_encode [ (
| aws.credentials.role.external\_id | Optional. The [external id](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) used to authorize access to third-party resources. |
| primary\_key | Required. The primary keys of the sink. Use ',' to delimit the primary key columns. |
-
-**NOTE**
-
In the Kinesis sink, we use [PutRecords](https://docs.aws.amazon.com/kinesis/latest/APIReference/API%5FPutRecords.html) API to send multiple records in batches to achieve higher throughput. Due to the limitations of Kinesis, records might be out of order when using this API. Nevertheless, the current implementation of the Kinesis sink guarantees at-least-once delivery and eventual consistency.
-
-## FORMAT and ENCODE options
-
-**NOTE**
+## FORMAT and ENCODE options
+
These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause
-
+
-| Field | Notes |
-| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
-| data\_encode | Data encode. Supported encode: JSON. |
-| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. |
-| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding. \- If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z. \- When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000. |
-| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). |
+| Field | Notes |
+| :---------------------------------- | :--------------------- |
+| data\_format | Data format. Allowed formats:
`PLAIN`: Output data with insert operations.
`DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
`UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
+| data\_encode | Data encode. Supported encode: `JSON`. |
+| force\_append\_only | If `true`, forces the sink to be `PLAIN` (also known as `append-only`), even if it cannot be. |
+| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z.
When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000.
|
+| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. |
## Examples
diff --git a/integrations/destinations/clickhouse.mdx b/integrations/destinations/clickhouse.mdx
index 1ba3a2bd..d72fa9d4 100644
--- a/integrations/destinations/clickhouse.mdx
+++ b/integrations/destinations/clickhouse.mdx
@@ -11,11 +11,9 @@ ClickHouse is a high-performance, column-oriented SQL database management system
* Ensure you already have a ClickHouse table that you can sink data to. For additional guidance on creating a table and setting up ClickHouse, refer to this [quick start guide](https://clickhouse.com/docs/en/getting-started/quick-start).
* Ensure you have an upstream materialized view or source that you can sink data from.
-
-**NOTE**
-
+
We highly recommend using the deduplication engine, like ReplacingMergeTree, in ClickHouse. This is because it addresses the potential problem of duplicate writes in ClickHouse during RisingWave recovery when primary keys can be duplicated.
-
+
## Syntax
@@ -30,17 +28,17 @@ WITH (
## Parameters
-| Parameter Names | Description |
-| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, see the [Overview](/docs/current/data-delivery/) on when to define the primary key and Upsert sinks on limitations. |
+| Parameter Names | Description |
+| :--------------------------- | :------------------- |
+| type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, see the [Overview](/docs/current/data-delivery/) on when to define the primary key and [Upsert sinks](#upsert-sinks) on limitations. |
| primary\_key | Optional. A string of a list of column names, separated by commas, that specifies the primary key of the ClickHouse sink. |
| clickhouse.url | Required. Address of the ClickHouse server that you want to sink data to. Format: `http://ip:port`. The default port is 8123. |
| clickhouse.user | Required. User name for accessing the ClickHouse server. |
| clickhouse.password | Required. Password for accessing the ClickHouse server. |
| clickhouse.database | Required. Name of the ClickHouse database that you want to sink data to. |
| clickhouse.table | Required. Name of the ClickHouse table that you want to sink data to. |
-| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. |
-| clickhouse.delete.column | Optional. Add this parameter when using ClickHouse's ReplacingMergeTree and setting up the delete column. You can run an upsert sink using the ReplacingMergeTree engine. |
+| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10. The behavior of this field also depends on the `sink_decouple` setting:
If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
|
+| clickhouse.delete.column | Optional. You can run an upsert sink using the ReplacingMergeTree engine. When using the ReplacingMergeTree engine, you can specify the delete column with this parameter. |
### Upsert sinks
@@ -170,7 +168,7 @@ WITH (
## Data type mapping
| RisingWave Data Type | ClickHouse Data Type |
-| ---------------------- | --------------------------------------------------------------------------------------------- |
+| :--------------------- | :------------------------------ |
| boolean | Bool |
| smallint | Int16 or UInt16 |
| integer | Int32 or UInt32 |
@@ -182,28 +180,20 @@ WITH (
| bytea | Not supported |
| date | Date32 |
| time without time zone | Not supported |
-| timestamp | Not supported. You need to convert timestamp to timestamptz within RisingWave before sinking. |
+| timestamp | Not supported. Please convert timestamp to timestamptz within RisingWave before sinking. |
| timestamptz | DateTime64 |
| interval | Not supported |
| struct | Nested |
| array | Array |
| JSONB | Not supported |
-
-**NOTE**
-
-In ClickHouse, the `Nested` data type doe sn't support multiple levels of nesting. Therefore, when sinking RisingWave's `struct` data to ClickHouse, you need to flatten or restructure the nested data to align with ClickHouse's requirement.
-
-
-**NOTE**
+In ClickHouse, the `Nested` data type doesn't support multiple levels of nesting. Therefore, when sinking RisingWave's `struct` data to ClickHouse, you need to flatten or restructure the nested data to align with ClickHouse's requirement.
Before v1.9, when inserting data into a ClickHouse sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to this behavior. If the ClickHouse column is nullable, we will insert null values in such cases. If the column is not nullable, we will insert `0` instead.
-
-
Please be aware that the range of specific values varies among ClickHouse types and RisingWave types. Refer to the table below for detailed information.
-| ClickHouse type | RisingWave type | ClickHouse range | RisingWave range |
-| --------------- | --------------- | --------------------------------------------------- | ------------------------------------------ |
+| ClickHouse type | RisingWave type | ClickHouse range | RisingWave range |
+| :-------------- | :-------------- | :------------------- | :---------------------- |
| Date32 | DATE | 1900-01-01 to 2299-12-31 | 0001-01-01 to 9999-12-31 |
| DateTime64 | TIMESTAMPTZ | 1900-01-01 00:00:00 to 2299-12-31 23:59:59.99999999 | 0001-01-01 00:00:00 to 9999-12-31 23:59:59 |
diff --git a/integrations/destinations/cockroachdb.mdx b/integrations/destinations/cockroachdb.mdx
index 11c7576c..bca9161e 100644
--- a/integrations/destinations/cockroachdb.mdx
+++ b/integrations/destinations/cockroachdb.mdx
@@ -51,12 +51,9 @@ WITH (
| interval | INTERVAL |
| JSONB | JSONB |
| array | ARRAY |
-| struct | unsupported |
-
-
-**NOTE**
+| struct | Unsupported |
Only one-dimensional arrays in RisingWave can be sinked to CockroachDB.
For array type, we only support `smallint`, `integer`, `bigint`, `real`, `double precision`, and `varchar` type now.
-
+
diff --git a/integrations/destinations/elasticsearch.mdx b/integrations/destinations/elasticsearch.mdx
index 4e119638..ec80a035 100644
--- a/integrations/destinations/elasticsearch.mdx
+++ b/integrations/destinations/elasticsearch.mdx
@@ -1,10 +1,10 @@
---
title: "Sink data from RisingWave to Elasticsearch"
sidebarTitle: Elasticsearch
-description: You can deliver the data that has been ingested and transformed in RisingWave to Elasticsearch to serve searches or analytics.
+description: This guide describes how to sink data from RisingWave to Elasticsearch using the Elasticsearch sink connector in RisingWave.
---
-This guide describes how to sink data from RisingWave to Elasticsearch using the Elasticsearch sink connector in RisingWave.
+You can deliver the data that has been ingested and transformed in RisingWave to Elasticsearch to serve searches or analytics.
[Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine capable of addressing a growing number of use cases. It centrally stores your data for lightning-fast search, fine‑tuned relevancy, and powerful analytics that scale with ease.
@@ -14,18 +14,14 @@ The Elasticsearch sink connecter in RisingWave will perform index operations via
* 5mb of updates
* 5 seconds since the last flush (assuming new actions are queued)
+The Elasticsearch sink connector in RisingWave provides at-least-once delivery semantics. Events may be redelivered in case of failures.
+
**PUBLIC PREVIEW**
This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](/product-lifecycle/#features-in-the-public-preview-stage).
-
-**NOTE**
-
-The Elasticsearch sink connector in RisingWave provides at-least-once delivery semantics. Events may be redelivered in case of failures.
-
-
## Prerequisites
* Ensure the Elasticsearch cluster (version 7.x or 8.x) is accessible from RisingWave.
@@ -51,8 +47,8 @@ WITH (
## Parameters
-| Parameter | Description |
-| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Parameter | Description |
+| :------------------- | :---------------- |
| sink\_name | Name of the sink to be created. |
| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
@@ -69,17 +65,12 @@ WITH (
| password | Optional. Password for accessing the Elasticsearch endpoint. It must be used with username. |
| delimiter | Optional. Delimiter for Elasticsearch ID when the sink's primary key has multiple columns. |
-
-**NOTE**
-
For versions under 8.x, there was once a parameter `type`. In Elasticsearch 6.x, users could directly set the type, but starting from 7.x, it is set to not recommended and the default value is unified to `_doc`. In version 8.x, the type has been completely removed. See [Elasticsearch's official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/removal-of-types.html) for more details.
So, if you are using Elasticsearch 7.x, we set it to the official's recommended value, which is `_doc`. If you are using Elasticsearch 8.x, this parameter has been removed by the Elasticsearch official, so no setting is required.
-
-
-### Notes about primary keys and Elasticsearch IDs
+## Primary keys and Elasticsearch IDs
The Elasticsearch sink defaults to the `upsert` sink type. It does not support the `append-only` sink type.
@@ -92,7 +83,7 @@ If you don't want to customize your Elasticsearch ID, RisingWave will use the fi
ElasticSearch uses a mechanism called [dynamic field mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html) to dynamically create fields and determine their types automatically. It treats all integer types as long and all floating-point types as float. To ensure data types in RisingWave are mapped to the data types in Elasticsearch correctly, we recommend that you specify the mapping via [index templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html) or [dynamic templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-templates.html) before creating the sink.
| RisingWave Data Type | ElasticSearch Field Type |
-| --------------------------- | --------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------------- | :------------------------ |
| boolean | boolean |
| smallint | long |
| integer | long |
@@ -111,11 +102,7 @@ ElasticSearch uses a mechanism called [dynamic field mapping](https://www.elasti
| array | array |
| JSONB | object (RisingWave's Elasticsearch sink will send JSONB as a JSON string, and Elasticsearch will convert it into an object) |
-
-**NOTE**
-
Elasticsearch doesn't require users to explicitly `CREATE TABLE`. Instead, it infers the schema on-the-fly based on the first record ingested. For example, if a record contains a jsonb `{v1: 100}`, v1 will be inferred as a long type. However, if the next record is `{v1: "abc"}`, the ingestion will fail because `"abc"` is inferred as a string and the two types are incompatible.
This behavior should be noted, or your data may be less than it should be. In terms of monitoring, you can check out Grafana, where there is a panel for all sink write errors.
-
diff --git a/integrations/destinations/google-pub-sub.mdx b/integrations/destinations/google-pub-sub.mdx
index 42db5f36..5f5258cc 100644
--- a/integrations/destinations/google-pub-sub.mdx
+++ b/integrations/destinations/google-pub-sub.mdx
@@ -23,7 +23,7 @@ FORMAT data_format ENCODE data_encode [ (
## Basic parameter
| Parameter | Description |
-| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| pubsub.project\_id | Required. The Pub/Sub Project ID. |
| pubsub.topic | Required. The Pub/Sub topic to publish messages. |
| pubsub.endpoint | Required. The Pub/Sub endpoint URL. |
@@ -32,18 +32,16 @@ FORMAT data_format ENCODE data_encode [ (
## FORMAT and ENCODE option
-
-**NOTE**
-
+
These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause.
-
+
| Field | Note |
-| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| data\_format | Data format. Allowed format: PLAIN. |
-| data\_encode | Data encode. Supported encode: JSON. |
-| force\_append\_only | Required by default and must be true, which forces the sink to be PLAIN (also known as append-only). |
-| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). |
+| :------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| data\_format | Data format. Allowed format: `PLAIN`. |
+| data\_encode | Data encode. Supported encode: `JSON`. |
+| force\_append\_only | Required by default and must be `true`, which forces the sink to be `PLAIN` (also known as append-only). |
+| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data\_encode ( ... )`. |
## Example
You can test the function locally before you deploying it. See guide on how to [Test locally with the Pub/Sub emulator](https://cloud.google.com/functions/docs/local-development).
diff --git a/integrations/destinations/mongodb.mdx b/integrations/destinations/mongodb.mdx
index 0eca7929..c0671d29 100644
--- a/integrations/destinations/mongodb.mdx
+++ b/integrations/destinations/mongodb.mdx
@@ -22,19 +22,19 @@ WITH (
## Parameters
-| **Parameter Name** | **Description** |
-| -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Parameter Name | Description |
+| :------------------------------- | :----------- |
| mongodb.url | The URL of MongoDB. |
-| type | Defines the type of the sink. Options include append-only or upsert. |
-| collection.name | The collection name where data should be written to or read from. For sinks, the format is db\_name.collection\_name. Data can also be written to dynamic collections; see collection.name.field below for more information. |
-| collection.name.field | Optional. The dynamic collection name where data should be sunk to. If specified, the field value will be used as the collection name. The collection name format is the same as collection.name. If the field value is null or an empty string, then the collection.name will be used as a fallback destination. |
-| collection.name.field.drop | Optional. Controls whether the field value of collection.name.field should be dropped when sinking. Set this option to true to avoid the duplicate values of collection.name.field being written to the result collection. |
+| type | Defines the type of the sink. Options include `append-only` or `upsert`. |
+| collection.name | The collection name where data should be written to or read from. For sinks, the format is `db_name.collection_name`. Data can also be written to dynamic collections, see `collection.name.field` below for more information. |
+| collection.name.field | Optional. The dynamic collection name where data should be sunk to.
If specified, the field value will be used as the collection name. The collection name format is the same as `collection.name`.
If the field value is null or an empty string, then the `collection.name` will be used as a fallback destination.
|
+| collection.name.field.drop | Optional. Controls whether the field value of `collection.name.field` should be dropped when sinking. Set this option to `true` to avoid the duplicate values of `collection.name.field` being written to the result collection. |
## Data type mapping
-| **MongoDB Type** | **RisingWave Type** |
-| ---------------- | --------------------------- |
+| MongoDB Type | RisingWave Type |
+| :--------------- | :-------------------------- |
| Boolean | BOOLEAN |
| 32-bit integer | SMALLINT |
| 32-bit integer | INTEGER |
@@ -92,14 +92,14 @@ WITH (
Assuming the schema of `t2` is:
| name | type | pk |
-| ----- | ---- | -- |
+| :---- | :--- | :- |
| id | int | ✔ |
| value | text | |
Given the record:
| id | value |
-| -- | ------------------- |
+| :- | :------------------ |
| 1 | 'example of record' |
The record written to MongoDB will be:
@@ -108,11 +108,9 @@ The record written to MongoDB will be:
{ "_id": 1, "id": 1, "value": "example of record" }
```
-
-**NOTE**
-
+
No redundant `id` field will exist if the primary key of `t2` is `_id`.
-
+
```sql compound key
CREATE TABLE t3(
diff --git a/mint.json b/mint.json
index 4102a9cf..3f192204 100644
--- a/mint.json
+++ b/mint.json
@@ -138,7 +138,7 @@
{"source": "/docs/current/ingest-from-s3", "destination": "/integrations/sources/s3"},
{"source": "/docs/current/ingest-from-azure-blob", "destination": "/integrations/sources/azure-blob"},
{"source": "/docs/current/ingest-from-gcs", "destination": "/integrations/sources/google-cloud-storage"},
- {"source": "/docs/current/ingest-from-datagen", "destination": "/integrations/sources/datagen"},
+ {"source": "/docs/current/ingest-from-datagen", "destination": "/ingestion/generate-test-data"},
{"source": "/docs/current/confluent-kafka-source", "destination": "/integrations/sources/confluent-cloud"},
{"source": "/docs/current/connector-amazon-msk", "destination": "/integrations/sources/amazon-msk"},
{"source": "/docs/current/ingest-from-automq-kafka", "destination": "/integrations/sources/automq-kafka"},
From e587df4548a4f8f21b21e08464959b61c0422f16 Mon Sep 17 00:00:00 2001
From: Ad-Bean
Date: Tue, 19 Nov 2024 15:46:24 -0500
Subject: [PATCH 06/11] feat(ci): typos for spell check (#64)
* ci: typos
* fix: correct typo in description field
* feat: add spellcheck configuration and custom wordlist
* feat: add new words to custom wordlist
* fix: normalize capitalization of 'Customizations' to 'customizations' in wordlist
---
.github/workflows/spellcheck.yml | 56 ++++++++
.spellcheck.yml | 23 ++++
.wordlist.txt | 229 +++++++++++++++++++++++++++++++
cloud/check-spending-details.mdx | 2 +-
typos.toml | 19 +++
5 files changed, 328 insertions(+), 1 deletion(-)
create mode 100644 .github/workflows/spellcheck.yml
create mode 100644 .spellcheck.yml
create mode 100644 .wordlist.txt
create mode 100644 typos.toml
diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
new file mode 100644
index 00000000..0fcdfdf5
--- /dev/null
+++ b/.github/workflows/spellcheck.yml
@@ -0,0 +1,56 @@
+name: spellcheck
+on:
+ pull_request:
+
+jobs:
+ run:
+ name: Spell Check with Typos
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: typos-action
+ id: typos-output
+ uses: crate-ci/typos@v1.27.3
+ with:
+ config: ./typos.toml
+
+ - name: Find Comment
+ if: ${{ failure() }}
+ uses: peter-evans/find-comment@v3
+ id: find-comment
+ with:
+ issue-number: ${{ github.event.pull_request.number }}
+ comment-author: "github-actions[bot]"
+ body-includes: The CI check for spelling has failed
+
+ - name: Create comment on PR if typos fail
+ if: ${{ failure() && steps.find-comment.outputs.comment-id == '' }}
+ uses: peter-evans/create-or-update-comment@v4
+ with:
+ issue-number: ${{ github.event.pull_request.number }}
+ body: |
+ ### CI Check Failed
+ The CI check for spelling has failed. Please review the errors and correct any spelling mistakes.
+
+ For more errors and details, you can check the [CI Log](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) or you can install [typos](https://github.com/crate-ci/typos?tab=readme-ov-file#install) and run `typos` locally to check for and fix spelling issues.
+
+ - name: Update comment if typos fail
+ if: ${{ failure() && steps.find-comment.outputs.comment-id != '' }}
+ uses: peter-evans/create-or-update-comment@v4
+ with:
+ comment-id: ${{ steps.find-comment.outputs.comment-id }}
+ issue-number: ${{ github.event.pull_request.number }}
+ body: |
+ ### CI Check Failed
+ The CI check for spelling has failed. Please review the errors and correct any spelling mistakes.
+
+ For more errors and details, you can check the [CI Log](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) or you can install [typos](https://github.com/crate-ci/typos?tab=readme-ov-file#install) and run `typos` locally to check for and fix spelling issues.
+ edit-mode: replace
+
+ - name: Check Spelling
+ uses: rojopolis/spellcheck-github-actions@0.35.0
+ with:
+ config_path: .spellcheck.yml
+ task_name: Markdown
diff --git a/.spellcheck.yml b/.spellcheck.yml
new file mode 100644
index 00000000..35738429
--- /dev/null
+++ b/.spellcheck.yml
@@ -0,0 +1,23 @@
+matrix:
+ - name: Markdown
+ apsell:
+ ignore-case: true
+ lang: en
+ dictionary:
+ wordlists:
+ - .wordlist.txt
+ output: wordlist.dic
+ encoding: utf-8
+ pipeline:
+ - pyspelling.filters.markdown:
+ markdown_extensions:
+ - pymdownx.superfences
+ - pyspelling.filters.html:
+ comments: false
+ ignores:
+ - code
+ - pre
+ sources:
+ - "*.mdx"
+ - "*.md"
+ default_encoding: utf-8
diff --git a/.wordlist.txt b/.wordlist.txt
new file mode 100644
index 00000000..a9d39701
--- /dev/null
+++ b/.wordlist.txt
@@ -0,0 +1,229 @@
+https
+github
+risingwavelabs
+RisingWave
+Redpanda
+Kinesis
+Astra
+Debezium
+debezium
+JSON
+struct
+varchar
+TabItem
+RailroadDiagram
+rr
+SSL
+SASL
+OAUTHBEARER
+Docusaurus
+docusaurus
+Postgres
+postgres
+datagen
+Grafana
+Etcd
+MinIO
+CMake
+OpenSSL
+psql
+RiseDev
+Tmux
+Kubernetes
+frontend
+NodePort
+kubectl
+uptime
+Avro
+Protobuf
+Prebuilt
+Observability
+CSV
+DML
+Alluxio
+Superset
+DBeaver
+Jupyter
+Metabase
+Clickhouse
+CockroachDB
+DataStax
+Pinot
+TiDB
+Hudi
+Trino
+Airbyte
+Fivetran
+Hightouch
+dbt
+ELT
+ETL
+DataStax
+StreamNative
+integrations
+macOS
+quickstart
+substring
+substrings
+gz
+dev
+CTEs
+namespace
+deserialization
+scalability
+changelog
+failover
+risingwave
+sql
+js
+rw
+pgwire
+json
+mv
+mysql
+Redash
+JDBC
+Redash
+analytics
+Flink
+JVM
+APIs
+stateful
+runtime
+disaggregated
+PrivateLink
+VPCs
+VPC
+DataSet
+FlinkSQL
+LSM
+natively
+ad-hoc
+hoc
+RocksDB
+checkpointing
+checkpointed
+UDF
+APIs
+DAGs
+acyclic
+MapReduce
+dataflow
+pipelined
+RisingWave's
+Redash
+TiCDC
+upsert
+JSONB
+boolean
+Citus
+CLI
+Chandy
+OpenDAL
+WebHDFS
+ChatGPT
+clickstream
+cryptocurrency
+dataset
+HDFS
+flink
+Flink's
+Homebrew
+IoT
+Lamport
+microservice
+microservices
+multibyte
+protobuf
+Protobuf
+timestamptz
+timestamptzs
+unary
+zstd
+http
+pre
+toc
+latencies
+thoroughputs
+VPC
+bigint
+bytea
+TopN
+UDFs
+avro
+kafka
+Paimon
+TPC
+Greenplum
+updateable
+ClickHouse
+JetStream
+MSK
+msk
+NATS
+ScyllaDB
+OOM
+DataGrip
+PgAdmin
+clickhouse
+Supabase
+BigQuery
+transactional
+OLAP
+ksqlDB
+backfilling
+GraphQL
+src
+img
+jpg
+StarRocks
+starrocks
+md
+Bytebase
+GCS
+gcs
+faq
+OLTP
+Napa
+superset
+Napa
+www
+DDL
+backfill
+backfills
+MVs
+Nats
+Psycopg
+Datadog
+Hasura
+Liquibase
+EMQX
+HiveMQ
+MQTT
+RabbitMQ
+Standalone's
+localhost
+prometheus
+datasources
+OpenSearch
+codebase
+Databricks
+SDKs
+RWUs
+roadmap
+terraform
+Serverless
+WASM
+schemas
+risingwavecloud
+Rockset
+personalization
+DefaultButton
+LightButton
+VoteNotify
+SharedMergeTree
+JWT
+TOML
+mintlify
+customizations
+repo
\ No newline at end of file
diff --git a/cloud/check-spending-details.mdx b/cloud/check-spending-details.mdx
index 68e78c52..3dc0a89e 100644
--- a/cloud/check-spending-details.mdx
+++ b/cloud/check-spending-details.mdx
@@ -1,6 +1,6 @@
---
title: "Check spending details"
-descriptin: You can view the usage and the corresponding charges for each project during the ongoing billing period. You can also download a PDF version of the invoice for your records.
+description: You can view the usage and the corresponding charges for each project during the ongoing billing period. You can also download a PDF version of the invoice for your records.
---
diff --git a/typos.toml b/typos.toml
new file mode 100644
index 00000000..8add8b6b
--- /dev/null
+++ b/typos.toml
@@ -0,0 +1,19 @@
+[default]
+extend-ignore-identifiers-re = [
+ # base64
+ "\\b[0-9A-Za-z+/]{64}(=|==)?\\b",
+ # ingest/ingest-from-datagen.md
+ "\\b[0-9A-Za-z]{16}\\b",
+]
+
+[default.extend-identifiers]
+# sql/functions-operators/sql-function-string.md
+1b69b4ba630f34e = "1b69b4ba630f34e"
+
+[default.extend-words]
+Iy = "Iy"
+YTO = "YTO"
+# Azure Kubernetes Service
+AKS = "AKS"
+# schema.history.internal.skip.unparseable.ddl
+unparseable="unparseable"
\ No newline at end of file
From 99818e3c8c609fe812e92f2f31da1097f3c09ff2 Mon Sep 17 00:00:00 2001
From: hengm3467 <100685635+hengm3467@users.noreply.github.com>
Date: Wed, 20 Nov 2024 13:17:22 +0800
Subject: [PATCH 07/11] Update README.md
Signed-off-by: hengm3467 <100685635+hengm3467@users.noreply.github.com>
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index bd4f57f5..2e300633 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# Note
-This repository is now published to: https://risingwavelabs.mintlify.app/docs/current/intro/introduction, and will be switched to our documentation domain once all testing and customizations are completed.
+This repository contains the latest RisingWave documentation. [The old repository] (https://github.com/risingwavelabs/risingwave-docs) now hosts the archived documentation up to v2.0 of RisingWave.
From f0bc5df0af81e01fd773549c53a98f7b38dc47b5 Mon Sep 17 00:00:00 2001
From: hengm3467 <100685635+hengm3467@users.noreply.github.com>
Date: Wed, 20 Nov 2024 13:17:36 +0800
Subject: [PATCH 08/11] Update README.md
Signed-off-by: hengm3467 <100685635+hengm3467@users.noreply.github.com>
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 2e300633..3831d258 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# Note
-This repository contains the latest RisingWave documentation. [The old repository] (https://github.com/risingwavelabs/risingwave-docs) now hosts the archived documentation up to v2.0 of RisingWave.
+This repository contains the latest RisingWave documentation. [The old repository](https://github.com/risingwavelabs/risingwave-docs) now hosts the archived documentation up to v2.0 of RisingWave.
From aad98e0be980ae617536f903445c0c98415228e5 Mon Sep 17 00:00:00 2001
From: xxchan
Date: Wed, 20 Nov 2024 13:34:02 +0800
Subject: [PATCH 09/11] enhance rate limit doc (#62)
* enhance rate limit doc
Signed-off-by: xxchan
* Update sql/commands/sql-alter-source.mdx
Co-authored-by: Eric Fu
Signed-off-by: xxchan
---------
Signed-off-by: xxchan
Co-authored-by: Eric Fu
---
operate/view-configure-runtime-parameters.mdx | 4 ++--
sql/commands/sql-alter-materialized-view.mdx | 9 ++++++---
sql/commands/sql-alter-source.mdx | 10 +++++++++-
sql/commands/sql-alter-table.mdx | 10 +++++-----
4 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/operate/view-configure-runtime-parameters.mdx b/operate/view-configure-runtime-parameters.mdx
index c2b9ab37..ecb93796 100644
--- a/operate/view-configure-runtime-parameters.mdx
+++ b/operate/view-configure-runtime-parameters.mdx
@@ -67,8 +67,8 @@ Below is the detailed information about the parameters you may see after using t
| cdc\_source\_wait\_streaming\_start\_timeout | 30 | For limiting the startup time of a shareable CDC streaming source when the source is being created. Unit: seconds. |
| row\_security | true/false | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-ROW-SECURITY) for details. Unused in RisingWave, support for compatibility. |
| standard\_conforming\_strings | on | See [here](https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-STANDARD-CONFORMING-STRINGS) for details. |
-| source\_rate\_limit | default/ A positive integer / 0 | Set the maximum number of records per second per source, for each parallelism. The source here refers to an upstream source. This parameter is applied to tables and tables with sources. The value can be default, 0, or a positive integer. SET SOURCE\_RATE\_LIMIT TO 0 will pause the source read for sources. SET SOURCE\_RATE\_LIMIT TO DEFAULT will disable the rate limit within the session, but it will not change the rate limits of existing DDLs.Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the streaming\_parallelism runtime parameter in this table. Additionally, we support altering rate limits in [sources](/docs/current/sql-alter-source/#set-source-rate-limit) and [tables that have source](/docs/current/sql-alter-table/#set-source-rate-limit). |
-| backfill\_rate\_limit | default/ A positive integer / 0 | Set the maximum number of records per second per parallelism for the backfill process of materialized views, sinks, and indexes. This parameter throttles the snapshot read stream for backfill. The value can be default, 0, or a positive integer. SET BACKFILL\_RATE\_LIMIT TO 0 will pause the snapshot read stream for backfill. SET BACKFILL\_RATE\_LIMIT TO default will disable the backfill rate limit within the session, but it will not change the backfill rate limit of existing DDLs. To obtain the parallelism value for a streaming job, you can refer to the streaming\_parallelism runtime parameter in this table. Additionally, we support altering backfill rate limits in [materialized views](/docs/current/sql-alter-materialized-view/#set-backfill%5Frate%5Flimit) and [CDC tables](/docs/current/sql-alter-table/#set-backfill%5Frate%5Flimit). |
+| source\_rate\_limit | default/positive integer/0 | Set the maximum number of records per second per source, for each parallelism. This parameter is applied when creating new sources and tables with sources.
The value can be default, 0, or a positive integer. SET SOURCE\_RATE\_LIMIT TO 0 will pause the source read for sources. SET SOURCE\_RATE\_LIMIT TO DEFAULT will remove the rate limit.
Setting this variable will only affect new DDLs within the session, but not change the rate limits of existing jobs. Use `ALTER` to change the rate limits in existing [sources](/sql/commands/sql-alter-source/#set-source-rate-limit) and [tables that have source](/sql/commands/sql-alter-table/#set-source-rate-limit).
Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the streaming\_parallelism runtime parameter in this table. |
+| backfill\_rate\_limit | default/positive integer/0 | Set the maximum number of records per second per parallelism for the backfill process of materialized views, sinks, and indexes. This parameter is applied when creating new jobs, and throttles the backfill from upstream materialized views and sources.
The value can be default, 0, or a positive integer. SET BACKFILL\_RATE\_LIMIT TO 0 will pause the backfill. SET BACKFILL\_RATE\_LIMIT TO default will remove the backfill rate limit.
Setting this variable will only affect new DDLs within the session, but not change the rate limits of existing jobs. Use `ALTER` to change the backfill rate limits in existing [materialized views](//sql-alter-materialized-view/#set-backfill%5Frate%5Flimit) and [CDC tables](/sql/commands/sql-alter-table/#set-backfill%5Frate%5Flimit).
Note that the total throughput of a streaming job is determined by multiplying the parallelism with the throttle rate. To obtain the parallelism value for a streaming job, you can refer to the streaming\_parallelism runtime parameter in this table. |
| rw\_streaming\_over\_window\_cache\_policy | full | Cache policy for partition cache in streaming over window. Can be "full", "recent", "recent\_first\_n" or "recent\_last\_n". |
| background\_ddl | true/false | Run DDL statements in background. |
| server\_encoding | UTF8 | Show the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time. |
diff --git a/sql/commands/sql-alter-materialized-view.mdx b/sql/commands/sql-alter-materialized-view.mdx
index 894682cb..099ba5c1 100644
--- a/sql/commands/sql-alter-materialized-view.mdx
+++ b/sql/commands/sql-alter-materialized-view.mdx
@@ -35,7 +35,6 @@ ALTER MATERIALIZED VIEW materialized_view_name
```
-- Change the owner of the materialized view named "materialized_view1" to user "user1"
ALTER MATERIALIZED VIEW materialized_view1 OWNER TO user1;
-
```
### `SET SCHEMA`
@@ -96,7 +95,8 @@ ALTER MATERIALIZED VIEW mv_name
SET BACKFILL_RATE_LIMIT { TO | = } { default | rate_limit_number };
```
-Use this statement to modify the backfill rate limit of a materialized view being created. For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
+This statement controls the rate limit of a newly created materialized view's backfilling process from upstream materialized views and sources.
+For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/operate/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
```sql Examples
-- Pause the backfill
@@ -107,5 +107,8 @@ ALTER MATERIALIZED VIEW mv1 SET BACKFILL_RATE_LIMIT=1;
-- Disable the backfill
ALTER MATERIALIZED VIEW mv1 SET BACKFILL_RATE_LIMIT=DEFAULT;
-
```
+
+
+To modify the rate limit of the sources used in the materialized view, please refer to [SET SOURCE_RATE_LIMIT](/sql/commands/sql-alter-source#set-source-rate-limit).
+
diff --git a/sql/commands/sql-alter-source.mdx b/sql/commands/sql-alter-source.mdx
index 77905251..fcd6f1fd 100644
--- a/sql/commands/sql-alter-source.mdx
+++ b/sql/commands/sql-alter-source.mdx
@@ -173,7 +173,15 @@ ALTER SOURCE source_name
SET SOURCE_RATE_LIMIT { TO | = } { default | rate_limit_number };
```
-Use this statement to modify the rate limit of a source. For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
+Use this statement to modify the rate limit of a source. For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/operate/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
+
+
+For a newly created materialized view on a source with historical data e.g. Kafka source, it will backfill from
+the source. The backfilling process will not be affected by the `SOURCE_RATE_LIMIT`
+of the source.
+
+To modify the rate limit of the backfilling process, please refer to [SET BACKFILL_RATE_LIMIT](/sql/commands/sql-alter-materialized-view#set-backfill-rate-limit).
+
```sql Example
-- Alter the rate limit of a source to default
diff --git a/sql/commands/sql-alter-table.mdx b/sql/commands/sql-alter-table.mdx
index ed5198b6..1360bb8b 100644
--- a/sql/commands/sql-alter-table.mdx
+++ b/sql/commands/sql-alter-table.mdx
@@ -203,8 +203,6 @@ ALTER TABLE t_user REFRESH SCHEMA;
```
-**NOTE**
-
If a downstream fragment references a column that is either missing or has undergone a type change in the updated schema, the command will be declined.
@@ -215,7 +213,8 @@ ALTER TABLE table_name
SET SOURCE_RATE_LIMIT { TO | = } { default | rate_limit_number };
```
-Use this statement to modify the rate limit of tables that have a source. For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
+For tables with connector, this statement controls the rate limit of the associated source.
+For the specific value of `SOURCE_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
```sql Example
-- Create a table with source
@@ -238,13 +237,14 @@ ALTER TABLE kafka_source SET source_rate_limit TO default;
ALTER TABLE kafka_source SET source_rate_limit TO 1000;
```
-### `SET BACKFILL_RATE_LIMIT`[]
+### `SET BACKFILL_RATE_LIMIT`
```sql
ALTER TABLE table_name
SET BACKFILL_RATE_LIMIT { TO | = } { default | rate_limit_number };
```
-Use this statement to modify the backfill rate limit of a CDC table being created from a CDC source. For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/docs/current/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
+For CDC table created from a CDC source, this statement controls the rate limit of backfilling from the CDC database.
+For the specific value of `BACKFILL_RATE_LIMIT`, refer to [How to view runtime parameters](/operate/view-configure-runtime-parameters/#how-to-view-runtime-parameters).
```sql Examples
-- Pause the backfill
From 60ca1dad02b448c119b0ef5ba3bf67bd3e62cbf5 Mon Sep 17 00:00:00 2001
From: IrisWan <150207222+WanYixian@users.noreply.github.com>
Date: Wed, 20 Nov 2024 14:01:44 +0800
Subject: [PATCH 10/11] Update callout syntax (#63)
* update callout first batch
* second batch
---------
Signed-off-by: IrisWan <150207222+WanYixian@users.noreply.github.com>
---
client-libraries/overview.mdx | 2 +-
cloud/choose-a-project-plan.mdx | 2 +-
cloud/connection-errors.mdx | 2 +-
cloud/export-metrics.mdx | 2 +-
cloud/manage-sources.mdx | 4 +-
cloud/organization-rbac.mdx | 4 +-
cloud/pricing.mdx | 2 +-
cloud/review-and-pay-invoices.mdx | 2 +-
cloud/service-account.mdx | 2 +-
cloud/sso.mdx | 2 +-
delivery/risingwave-as-postgres-fdw.mdx | 2 +-
deploy/node-specific-configurations.mdx | 6 +-
deploy/risingwave-kubernetes.mdx | 14 +--
deploy/upgrade-risingwave-k8s.mdx | 2 +-
faq/risingwave-flink-comparison.mdx | 2 +-
ingestion/generate-test-data.mdx | 2 +-
integrations/destinations/amazon-dynamodb.mdx | 4 +-
integrations/destinations/apache-doris.mdx | 10 +-
integrations/destinations/apache-iceberg.mdx | 8 +-
integrations/destinations/apache-kafka.mdx | 7 +-
integrations/destinations/apache-pulsar.mdx | 10 +-
integrations/destinations/aws-kinesis.mdx | 4 +-
integrations/destinations/azure-blob.mdx | 2 +-
integrations/destinations/bigquery.mdx | 14 +--
.../destinations/cassandra-or-scylladb.mdx | 8 +-
integrations/destinations/cockroachdb.mdx | 2 +-
integrations/destinations/delta-lake.mdx | 6 +-
integrations/destinations/elasticsearch.mdx | 2 +-
.../destinations/google-cloud-storage.mdx | 2 +-
integrations/destinations/google-pub-sub.mdx | 2 +-
integrations/destinations/mqtt.mdx | 12 +--
integrations/destinations/mysql.mdx | 20 ++--
.../destinations/nats-and-nats-jetstream.mdx | 22 ++---
integrations/destinations/opensearch.mdx | 17 ++--
integrations/destinations/postgresql.mdx | 17 ++--
integrations/destinations/redis.mdx | 27 +++---
integrations/destinations/snowflake.mdx | 14 ++-
integrations/destinations/sql-server.mdx | 4 +-
integrations/destinations/starrocks.mdx | 22 ++---
integrations/destinations/tidb.mdx | 2 +-
integrations/other/dbt.mdx | 2 +-
integrations/sources/amazon-msk.mdx | 5 +-
integrations/sources/apache-iceberg.mdx | 20 ++--
integrations/sources/automq-kafka.mdx | 12 +--
integrations/sources/azure-blob.mdx | 6 +-
integrations/sources/citus-cdc.mdx | 2 +-
integrations/sources/google-cloud-storage.mdx | 14 +--
integrations/sources/google-pub-sub.mdx | 8 +-
integrations/sources/hivemq.mdx | 5 -
integrations/sources/kafka.mdx | 93 +++++++------------
integrations/sources/kinesis.mdx | 10 +-
integrations/sources/mongodb-cdc.mdx | 4 +-
integrations/sources/mqtt.mdx | 12 +--
integrations/sources/mysql-cdc.mdx | 4 +-
integrations/sources/nats-jetstream.mdx | 12 +--
integrations/sources/postgresql-cdc.mdx | 25 ++---
integrations/sources/pulsar.mdx | 27 +++---
integrations/sources/s3.mdx | 22 ++---
integrations/sources/sql-server-cdc.mdx | 7 +-
.../visualization/beekeeper-studio.mdx | 6 +-
integrations/visualization/grafana.mdx | 6 +-
operate/access-control.mdx | 2 +-
operate/cluster-limit.mdx | 2 +-
operate/dedicated-compute-node.mdx | 6 +-
...anage-a-large-number-of-streaming-jobs.mdx | 2 +-
operate/manage-secrets.mdx | 6 +-
operate/meta-backup.mdx | 16 +---
operate/secure-connections-with-ssl-tls.mdx | 6 +-
operate/view-configure-system-parameters.mdx | 12 +--
.../maintain-wide-table-with-table-sinks.mdx | 5 +-
processing/sql/joins.mdx | 18 ++--
processing/sql/temporal-filters.mdx | 8 +-
processing/sql/time-windows.mdx | 6 +-
processing/sql/top-n-by-group.mdx | 10 +-
processing/time-travel-queries.mdx | 12 +--
processing/watermarks.mdx | 10 +-
sql/commands/sql-alter-connection.mdx | 2 +-
sql/commands/sql-alter-database.mdx | 4 +-
sql/commands/sql-alter-function.mdx | 2 +-
sql/commands/sql-alter-index.mdx | 2 +-
sql/commands/sql-alter-materialized-view.mdx | 8 +-
sql/commands/sql-alter-schema.mdx | 4 +-
sql/commands/sql-alter-sink.mdx | 8 +-
sql/commands/sql-alter-source.mdx | 20 ++--
sql/commands/sql-alter-table.mdx | 28 +++---
sql/commands/sql-alter-user.mdx | 8 +-
sql/commands/sql-alter-view.mdx | 6 +-
sql/commands/sql-comment-on.mdx | 2 +-
sql/commands/sql-create-aggregate.mdx | 2 +-
sql/commands/sql-create-connection.mdx | 8 +-
sql/commands/sql-create-database.mdx | 8 +-
sql/commands/sql-create-function.mdx | 2 +-
sql/commands/sql-create-index.mdx | 2 +-
sql/commands/sql-create-mv.mdx | 14 +--
sql/commands/sql-create-schema.mdx | 8 +-
sql/commands/sql-create-secret.mdx | 2 +-
sql/commands/sql-create-sink-into.mdx | 14 +--
sql/commands/sql-create-sink.mdx | 14 +--
sql/commands/sql-create-source.mdx | 14 +--
sql/commands/sql-create-table.mdx | 20 ++--
sql/commands/sql-create-user.mdx | 14 +--
sql/commands/sql-create-view.mdx | 8 +-
sql/commands/sql-delete.mdx | 4 +-
sql/commands/sql-describe.mdx | 4 +-
sql/commands/sql-discard.mdx | 2 +-
sql/commands/sql-drop-aggregate.mdx | 2 +-
sql/commands/sql-drop-connection.mdx | 2 +-
sql/commands/sql-drop-database.mdx | 4 +-
sql/commands/sql-drop-function.mdx | 2 +-
sql/commands/sql-drop-index.mdx | 2 +-
sql/commands/sql-drop-mv.mdx | 2 +-
sql/commands/sql-drop-schema.mdx | 2 +-
sql/commands/sql-drop-secret.mdx | 2 +-
sql/commands/sql-drop-sink.mdx | 2 +-
sql/commands/sql-drop-source.mdx | 2 +-
sql/commands/sql-drop-table.mdx | 2 +-
sql/commands/sql-drop-user.mdx | 2 +-
sql/commands/sql-drop-view.mdx | 2 +-
sql/commands/sql-explain.mdx | 10 +-
sql/commands/sql-grant.mdx | 2 +-
sql/commands/sql-insert.mdx | 4 +-
sql/commands/sql-revoke.mdx | 2 +-
sql/commands/sql-select.mdx | 2 +-
sql/commands/sql-set-time-zone.mdx | 2 +-
sql/commands/sql-set.mdx | 2 +-
sql/commands/sql-show-columns.mdx | 2 +-
sql/commands/sql-show-connections.mdx | 2 +-
sql/commands/sql-show-create-index.mdx | 2 +-
sql/commands/sql-show-create-mv.mdx | 2 +-
sql/commands/sql-show-create-sink.mdx | 2 +-
sql/commands/sql-show-create-source.mdx | 2 +-
sql/commands/sql-show-create-table.mdx | 2 +-
sql/commands/sql-show-create-view.mdx | 2 +-
sql/commands/sql-show-databases.mdx | 2 +-
sql/commands/sql-show-functions.mdx | 2 +-
sql/commands/sql-show-indexes.mdx | 2 +-
sql/commands/sql-show-internal-tables.mdx | 2 +-
sql/commands/sql-show-jobs.mdx | 2 +-
sql/commands/sql-show-mv.mdx | 2 +-
sql/commands/sql-show-processlist.mdx | 4 +-
sql/commands/sql-show-schemas.mdx | 2 +-
sql/commands/sql-show-sources.mdx | 2 +-
sql/commands/sql-show-tables.mdx | 2 +-
sql/commands/sql-show-views.mdx | 2 +-
sql/commands/sql-update.mdx | 4 +-
sql/data-types/casting.mdx | 8 +-
sql/data-types/overview.mdx | 8 +-
sql/data-types/rw-int256.mdx | 6 +-
sql/data-types/supported-protobuf-types.mdx | 2 +-
sql/functions/aggregate.mdx | 18 ++--
sql/functions/comparison.mdx | 4 +-
sql/functions/conditional.mdx | 4 +-
sql/functions/cryptographic.mdx | 6 +-
sql/functions/datetime.mdx | 14 ++-
sql/functions/json.mdx | 12 +--
sql/functions/logical.mdx | 4 +-
sql/functions/mathematical.mdx | 8 +-
sql/functions/string.mdx | 22 ++---
sql/functions/sys-admin.mdx | 6 +-
sql/functions/window-functions.mdx | 6 +-
sql/psql-commands.mdx | 2 +-
sql/query-syntax/generated-columns.mdx | 6 +-
sql/query-syntax/group-by-clause.mdx | 2 +-
sql/query-syntax/literals.mdx | 2 +-
sql/query-syntax/set-operations.mdx | 20 ++--
sql/query-syntax/value-exp.mdx | 20 ++--
sql/system-catalogs/information-schema.mdx | 20 ++--
sql/system-catalogs/pg-catalog.mdx | 8 +-
sql/system-catalogs/rw-catalog.mdx | 2 +-
sql/udfs/embedded-python-udfs.mdx | 8 +-
sql/udfs/sql-udfs.mdx | 14 +--
sql/udfs/use-udfs-in-java.mdx | 10 +-
sql/udfs/use-udfs-in-javascript.mdx | 2 +-
sql/udfs/use-udfs-in-python.mdx | 12 +--
sql/udfs/use-udfs-in-rust.mdx | 2 +-
troubleshoot/troubleshoot-high-latency.mdx | 2 +-
troubleshoot/troubleshoot-oom.mdx | 2 +-
troubleshoot/troubleshoot-source-sink.mdx | 2 +-
178 files changed, 545 insertions(+), 762 deletions(-)
diff --git a/client-libraries/overview.mdx b/client-libraries/overview.mdx
index e36aa54b..1ca9c8ef 100644
--- a/client-libraries/overview.mdx
+++ b/client-libraries/overview.mdx
@@ -13,7 +13,7 @@ Here is an overview of the available options. We provide detailed example guides
This table will be continuously updated to ensure compatibility.
| Language | Driver | Latest tested version |
-| ---------- | ------------------------------------------------------------------------------------------------------------ | --------------------- |
+| :--------- | :----------------------------------------------------------------------------------------------------------- | :-------------------- |
| C | [libpq](https://www.postgresql.org/docs/current/libpq.html) | |
| C# (.NET) | [Npgsql](https://www.npgsql.org/) | 8.0.2 |
| Go | [pgx](https://pkg.go.dev/github.com/jackc/pgx/v5). See the [example guide](/client-libraries/go). | v5.4.3 |
diff --git a/cloud/choose-a-project-plan.mdx b/cloud/choose-a-project-plan.mdx
index b236c751..d0ad11ef 100644
--- a/cloud/choose-a-project-plan.mdx
+++ b/cloud/choose-a-project-plan.mdx
@@ -8,7 +8,7 @@ Each project type is associated with the corresponding features, capabilities, r
Currently, RisingWave Cloud offers three types of projects: **Trial**, **Standard**, and **Advanced**. The table below describes a high-level comparison of features and restrictions across three project types.
| Service type | Trial | Standard | Advanced |
-| ----------------- | --------------------------------------------- | ---------------------------------------------------------------- | ----------------------------------------------- |
+| :---------------- | :-------------------------------------------- | :--------------------------------------------------------------- | :---------------------------------------------- |
| Deployment type | Multi-tenancy deployment, single-node project | Multi-tenancy deployment, multi-node project | Multi-tenancy deployment, multi-node project |
| Description | Standalone deployment with 2-RWU resources. | Deployed on shared Kubernetes service with customized resources. | Customized project deployment based on requests |
| Pricing | Free | Pay-as-you-go | Customized, contact sales |
diff --git a/cloud/connection-errors.mdx b/cloud/connection-errors.mdx
index 834e6f51..74b45038 100644
--- a/cloud/connection-errors.mdx
+++ b/cloud/connection-errors.mdx
@@ -52,7 +52,7 @@ This error occurs when your Postgres client doesn't support Server Name Indicati
RisingWave Cloud has tested the following drivers for SNI support:
| Driver | Language | SNI support | Note |
-| ---------- | -------- | ----------- | -------------------------------------------- |
+| :--------- | :------- | :---------- | :------------------------------------------- |
| psycopg2 | Python | | Requires the 'sslmode': 'require' option |
| asyncpg | Python | | Requires the 'sslmode': 'verify-full' option |
| SQLAlchemy | Python | | Requires the 'sslmode': 'require' option |
diff --git a/cloud/export-metrics.mdx b/cloud/export-metrics.mdx
index 136dcbca..51fa11bd 100644
--- a/cloud/export-metrics.mdx
+++ b/cloud/export-metrics.mdx
@@ -18,7 +18,7 @@ Generate the API key ID and API key secret in the Cloud Portal. See [Generate an
Get the corresponding `CLOUD_HOST` for your region and Cloud provider from the table below:
| Region/CloudProvider | CLOUD\_HOST |
-| -------------------- | ------------------------------------ |
+| :------------------- | :----------------------------------- |
| useast2/aws | canary-useast2-mgmt.risingwave.cloud |
| us-east-1/aws | prod-aws-usea1-mgmt.risingwave.cloud |
| us-west-2/aws | prod-aws-uswe2-mgmt.risingwave.cloud |
diff --git a/cloud/manage-sources.mdx b/cloud/manage-sources.mdx
index f15d9cf1..6a117a4e 100644
--- a/cloud/manage-sources.mdx
+++ b/cloud/manage-sources.mdx
@@ -15,11 +15,11 @@ You can create a source with one of the following methods:
2. Specify the project and click its **Workspace**.
3. Next to **Source** tab, click **\+ Add new**.
4. Select the service you want to connect to.
-
+
More services will be supported in future releases.
-
+
5. Configure the connector settings, source details, and schema according to the instructions of the guided setup.
6. Check the generated SQL statement and click **Confirm** to create the source in your database.
diff --git a/cloud/organization-rbac.mdx b/cloud/organization-rbac.mdx
index 8c9f965a..63946482 100644
--- a/cloud/organization-rbac.mdx
+++ b/cloud/organization-rbac.mdx
@@ -18,7 +18,7 @@ Below are permissions and limitations for roles to ensure that each service acco
To grant a role to your account, go to [Organization](https://www.risingwave.cloud/project/organization/service-account/) \> Role management, then click **Edit roles** on the right side.
| Role | Permissions | Limitations |
-| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :----------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| OrganizationAdmin | Full control over tenants and related resources.Management of service accounts, users, invitations, and RoleBinding.Access to all billing resources. | Cannot modify their own admin RoleBinding. |
| OrganizationMember | View access to all tenants.View service accounts, users, and invitations. | No permissions for tenant-related operations (create, update, delete).No permissions for service accounts, users, or invitations operations (create, update, delete).No access to billing resources. |
| BillingManager | Full access to all billing resources. | No access to any other operations outside of billing. |
@@ -35,7 +35,7 @@ Only the OrganizationAdmin has the permission to manage user's RoleBinding.
### Scenario
| User scenarios | Description |
-| ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Invite a user to the organization | Currently, you can only invite a new user as an OrganizationMember. If you want to grant more permissions to the target user, please go to **Organization** \> **Role management** \> **Users** to modify after the user accepts the invitation. |
| Create a service account in the organization | The service account RoleBinding is used for authorization when accessing Cloud APIs using the service account's API keys. By default, the service account is assigned the read-only OrganizationMember role. If you need to assign more permissions to the service account, please go to **Organization** \> **Role management** \> **Service Accounts** to add other roles. |
| Delete or add RoleBinding for a user | Go to **Organization** \> **Role management** \> **Users**, click the corresponding Edit Roles of the specific role. A popup window will appear, allowing you to uncheck the role or select the new ones. Click **Confirm** to save the change. |
diff --git a/cloud/pricing.mdx b/cloud/pricing.mdx
index 514f5e5a..07b96b9b 100644
--- a/cloud/pricing.mdx
+++ b/cloud/pricing.mdx
@@ -8,7 +8,7 @@ description: RisingWave Cloud offers a flexible pricing model based on your usag
RisingWave Cloud charges the cost of each project individually. The pricing model of each project varies depending on its plan.
| Plan | Pricing model | Pricing precision |
-| -------------------------- | ------------- | ----------------- |
+| :------------------------- | :------------ | :---------------- |
| [Trial](#trial-plan) | Free | / |
| [Standard](#standard-plan) | Pay-as-you-go | 30-second basis |
| [Advanced](#advanced-plan) | Contact sales | Contact sales |
diff --git a/cloud/review-and-pay-invoices.mdx b/cloud/review-and-pay-invoices.mdx
index 65d8b880..53021da0 100644
--- a/cloud/review-and-pay-invoices.mdx
+++ b/cloud/review-and-pay-invoices.mdx
@@ -40,7 +40,7 @@ The automatic payment process could fail if your payment methods are invalid or
To pay an invoice manually, click the **Pay Now** button on the invoice you want to pay and follow the instructions on the screen.
-**CAUTION**
+
If you don't settle the outstanding payments within 10 days of the "Overdue" period, all services that you’re currently using will be stopped and you won't be able to create any new projects. You have to contact the support team to resume the services.
diff --git a/cloud/service-account.mdx b/cloud/service-account.mdx
index 9b17023f..a9891d01 100644
--- a/cloud/service-account.mdx
+++ b/cloud/service-account.mdx
@@ -41,7 +41,7 @@ You can generate multiple API keys for a service account. Each API key will have
3. A new API key will be generated. Please save the secret along with the ID to a safe place.
-**CAUTION**
+
The secret will be shown only once. After you close the dialog, you won't be able to retrieve it. If you lose the secret, you have to generate a new API key.
diff --git a/cloud/sso.mdx b/cloud/sso.mdx
index abd40c34..daccf33b 100644
--- a/cloud/sso.mdx
+++ b/cloud/sso.mdx
@@ -22,7 +22,7 @@ During the setup, provide placeholder values for the following fields:
Configure the properties below on the IdP platform:
| Property | Description |
-| -------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **IdP Single Sign-On URL** | URL of the receiver of the SAML AuthNRequest. Use a placeholder value initially. You'll get the actual value from your IdP after providing it with the Atlas metadata. |
| **IdP Signature Certificate** | PEM-encoded public key certificate of the IdP. You can obtain this value from your IdP. You can either upload the certificate from your computer or paste the contents into a text box. |
| **Request Binding** | SAML Authentication Request Protocol binding used to send the AuthNRequest. It can be either **HTTP POST** or **HTTP REDIRECT**. |
diff --git a/delivery/risingwave-as-postgres-fdw.mdx b/delivery/risingwave-as-postgres-fdw.mdx
index 6736b0a4..5b2ff019 100644
--- a/delivery/risingwave-as-postgres-fdw.mdx
+++ b/delivery/risingwave-as-postgres-fdw.mdx
@@ -155,7 +155,7 @@ Currently, write operations to RisingWave through a foreign data wrapper are not
There are two main methods to interact between RisingWave and PostgreSQL: sinking data to PostgreSQL and utilizing a foreign data wrapper of PostgreSQL to access data in RisingWave. The table below provides a summary of the differences between these two methods. Your choice between these methods will depend on your specific requirements, data architecture, and performance considerations.
| Aspect | Sinking to PostgreSQL | Using PostgreSQL FDW to access data |
-| -------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------ |
+| :------------------------- | :------------------------------------------------------ | :----------------------------------------------------------------- |
| Data Access | Data is physically stored in PostgreSQL | Data is physically stored in RisingWave |
| Performance | Potential latency for RisingWave to write to PostgreSQL | Potential latency when reading data from RisingWave |
| Message Delivery Guarantee | At-least-once while sinking into PostgreSQL tables | Exactly-once for MVs and the data is not moved |
diff --git a/deploy/node-specific-configurations.mdx b/deploy/node-specific-configurations.mdx
index d91b2321..16c46e97 100644
--- a/deploy/node-specific-configurations.mdx
+++ b/deploy/node-specific-configurations.mdx
@@ -87,7 +87,7 @@ unsafe_enable_strict_consistency = true
RisingWave now supports the following configurations:
| Configuration | Default | Description |
-| ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :---------------------------------- | :------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| unsafe\_enable\_strict\_consistency | true | Control the strictness of stream consistency. When set to false, data inconsistency like double-insertion or double-deletion with the same primary keys will be tolerated. |
### Storage configurations
@@ -129,7 +129,7 @@ The configuration of the file cache and the block cache refilling is separated i
Below is an example of the data file cache configuration for your reference. Please be aware that the data file cache configuration and the meta file cache configuration share the same options.
| Configuration | Default | Description |
-| ----------------------- | ------- | ----------------------------------------------------------------------------------------------------------------- |
+| :---------------------- | :------ | :---------------------------------------------------------------------------------------------------------------- |
| dir | "" | The directory for the file cache. If left empty, the file cache will be disabled. |
| capacity\_mb | 1024 | The file cache capacity in MB. |
| file\_capacity\_mb | 64 | The capacity for each cache file in MB. |
@@ -147,7 +147,7 @@ RisingWave uses a recent filter to decide whether to fill a block or unit. The r
Below is an example of the cache refill configuration for your reference.
| Configuration | Default | Description |
-| ------------------------------------ | ------- | ------------------------------------------------------------------------------------- |
+| :----------------------------------- | :------ | :------------------------------------------------------------------------------------ |
| data\_refill\_levels | \[\] | Only blocks in the given levels will be refilled. |
| timeout\_ms | 6000 | The metadata update will be delayed at most timeout\_ms to wait for refilling. |
| concurrency | 10 | Block refilling concurrency (by unit level). |
diff --git a/deploy/risingwave-kubernetes.mdx b/deploy/risingwave-kubernetes.mdx
index 3f5493a5..be451032 100644
--- a/deploy/risingwave-kubernetes.mdx
+++ b/deploy/risingwave-kubernetes.mdx
@@ -18,10 +18,10 @@ Ensure that [Docker](https://docs.docker.com/desktop/) is installed in your envi
## Create a Kubernetes cluster
-
+
The steps in this section are intended for creating a Kubernetes cluster in your local environment.
If you are using a managed Kubernetes service such as AKS, GKE, and EKS, refer to the corresponding documentation for instructions.
-
+
@@ -72,17 +72,17 @@ kubectl apply --server-side -f https://github.com/risingwavelabs/risingwave-oper
```
**Compatibility table**
| Operator | RisingWave | Kubernetes |
-| -------- | ---------- | ---------- |
+| :------- | :--------- | :--------- |
| v0.4.0 | v0.18.0+ | v1.21+ |
| v0.3.6 | v0.18.0+ | v1.21+ |
You can find the release notes of each version [here](https://github.com/risingwavelabs/risingwave-operator/releases).
-
+
The following errors might occur if `cert-manager` is not fully initialized. Simply wait for another minute and rerun the command above.
```bash
Error from server (InternalError): Internal error occurred: failed calling webhook "webhook.cert-manager.io": failed to call webhook: Post "": dial tcp 10.105.102.32:443: connect: connection refused
```
-
+
_**Optional:**_ Check if the Pods are running.
@@ -177,9 +177,9 @@ spec:
-
+
The performance of MinIO is closely tied to the disk performance of the node where it is hosted. We have observed that AWS EBS does not perform well in our tests. For optimal performance, we recommend using S3 or a compatible cloud service.
-
+
```yaml
spec:
stateStore:
diff --git a/deploy/upgrade-risingwave-k8s.mdx b/deploy/upgrade-risingwave-k8s.mdx
index b307239c..5b3a3bc4 100644
--- a/deploy/upgrade-risingwave-k8s.mdx
+++ b/deploy/upgrade-risingwave-k8s.mdx
@@ -5,7 +5,7 @@ description: "This topic describes upgrade RisingWave in a K8s deployment with t
---
-**CAUTION**
+
When upgrading RisingWave, it's important to be aware that there may be breaking changes. If you require technical support during the process of upgrading RisingWave in your production environments, please don't hesitate to reach out to us.
diff --git a/faq/risingwave-flink-comparison.mdx b/faq/risingwave-flink-comparison.mdx
index e9ff72cd..4b207455 100644
--- a/faq/risingwave-flink-comparison.mdx
+++ b/faq/risingwave-flink-comparison.mdx
@@ -9,7 +9,7 @@ We periodically update this article to keep up with the rapidly evolving landsca
## Summary
| Apache Flink | RisingWave | |
-| -------------------------------- | --------------------------------------------------------------------- | -------------------------------------------------------------------------- |
+| :------------------------------- | :-------------------------------------------------------------------- | :------------------------------------------------------------------------- |
| Version | 1.17 | Latest version |
| License | Apache License 2.0 | Apache License 2.0 |
| System category | Stream processing framework | Streaming database |
diff --git a/ingestion/generate-test-data.mdx b/ingestion/generate-test-data.mdx
index ab23b481..5c8b70be 100644
--- a/ingestion/generate-test-data.mdx
+++ b/ingestion/generate-test-data.mdx
@@ -23,7 +23,7 @@ WITH (
The following table shows the data types that can be generated for each load generator type.
| Generator \\ Data | Number | Timestamp | Timestamptz | Varchar | Struct | Array |
-| ----------------- | ------ | --------- | ----------- | ------- | ------ | ----- |
+| :---------------- | :----- | :-------- | :---------- | :------ | :----- | :---- |
| **Sequence** | | | | | | |
| **Random** | | | | | | |
diff --git a/integrations/destinations/amazon-dynamodb.mdx b/integrations/destinations/amazon-dynamodb.mdx
index e83dd65b..811f9f93 100644
--- a/integrations/destinations/amazon-dynamodb.mdx
+++ b/integrations/destinations/amazon-dynamodb.mdx
@@ -36,7 +36,7 @@ FORMAT data_format ENCODE data_encode [ (
## Parameters
| Field | Note |
-| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :---------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| table | Required. Name of the DynamoDB table where you want to write the data. |
| primary\_key | Required. A pair of columns representing the partition key and sort key of DynamoDB, e.g., key1,key2, separated by comma. |
| aws.region | Required. AWS region where your DynamoDB table is hosted. |
@@ -69,7 +69,7 @@ This makes sure that the data structure in RisingWave aligns with the key defini
## Data type mapping
| RisingWave Data Type | DynamoDB Data Type |
-| --------------------------- | ------------------ |
+| :-------------------------- | :----------------- |
| boolean | Bool |
| smallint | number (N) |
| integer | number (N) |
diff --git a/integrations/destinations/apache-doris.mdx b/integrations/destinations/apache-doris.mdx
index c7bdfb6f..92d2116f 100644
--- a/integrations/destinations/apache-doris.mdx
+++ b/integrations/destinations/apache-doris.mdx
@@ -24,7 +24,7 @@ WITH (
## Parameters
| Parameter Names | Description |
-| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, the table you are sinking to needs to have a UNIQUE KEY. |
| doris.url | Required. The connection port for the frontend of Doris. This is not the MySQL connection port. |
| doris.username | Required. The user name of the Doris user. |
@@ -33,7 +33,7 @@ WITH (
| doris.table | Required. The Doris table you want to sink data to. |
| doris.partial\_column | Optional. Defaults to false. If true, you can perform partial updates on the columns of a table, see the [Partial update](https://doris.apache.org/docs/2.0/data-operate/update/update-of-unique-model/#partial-update) in the Doris documentation for more details. |
| force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. |
-| primary\_key | Optional. The primary keys of the sink. Use ',' to delimit the primary key columns. |
+| primary\_key | Optional. The primary keys of the sink. Use `,` to delimit the primary key columns. |
## Examples
@@ -80,7 +80,7 @@ The following table shows the corresponding data types between RisingWave and Do
In regards to `decimal` types, RisingWave will round to the nearest decimal place to ensure that its precision matches that of Doris. Ensure that the length of decimal types being imported into Doris does not exceed Doris's decimal length. Otherwise, it will fail to import.
| Doris type | RisingWave type |
-| --------------- | --------------------------- |
+| :-------------- | :-------------------------- |
| BOOLEAN | BOOLEAN |
| SMALLINT | SMALLINT |
| INT | INTEGER |
@@ -100,6 +100,4 @@ In regards to `decimal` types, RisingWave will round to the nearest decimal plac
| JSONB | JSONB |
| BIGINT | SERIAL |
-
-Before v1.9, when inserting data into an Apache Doris sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to the behavior. If a decimal value is out of bounds or represents "inf", "-inf", or "nan", we will insert null values.
-
+If a decimal value is out of bounds or represents `inf`, `-inf`, or `nan`, RisingWave will insert null values.
\ No newline at end of file
diff --git a/integrations/destinations/apache-iceberg.mdx b/integrations/destinations/apache-iceberg.mdx
index 469bf46a..81839525 100644
--- a/integrations/destinations/apache-iceberg.mdx
+++ b/integrations/destinations/apache-iceberg.mdx
@@ -28,7 +28,7 @@ WITH (
## Parameters
| Parameter Names | Description |
-| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| type | Required. Allowed values: appendonly and upsert. |
| force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. |
| s3.endpoint | Optional. Endpoint of the S3.
For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
@@ -49,7 +49,7 @@ WITH (
RisingWave converts risingwave data types from/to Iceberg according to the following data type mapping table:
| RisingWave Type | Iceberg Type |
-| --------------- | ------------ |
+| :-------------- | :----------- |
| boolean | boolean |
| int | integer |
| bigint | long |
@@ -167,9 +167,11 @@ with (
### Glue catalog
-Premium Edition Feature
+
+**PREMIUM EDITION FEATURE**
This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com).
+
RisingWave supports the Glue catalog. You should use AWS S3 if you use the Glue catalog. Below are example codes for using this catalog:
diff --git a/integrations/destinations/apache-kafka.mdx b/integrations/destinations/apache-kafka.mdx
index 5db529fd..6057407c 100644
--- a/integrations/destinations/apache-kafka.mdx
+++ b/integrations/destinations/apache-kafka.mdx
@@ -33,12 +33,12 @@ All `WITH` options are required unless explicitly mentioned as optional.
| Parameter or clause | Description |
| :-------------------------- | :------------- |
| sink\_name | Name of the sink to be created. |
-| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
+| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
| connector | Sink connector type must be `kafka` for Kafka sink. |
| properties.bootstrap.server | Address of the Kafka broker. Format: `ip:port`. If there are multiple brokers, separate them with commas. |
| topic | Address of the Kafka topic. One sink can only correspond to one topic. |
-| primary\_key | Conditional. The primary keys of the sink. Use ',' to delimit the primary key columns. This field is optional if creating a PLAIN sink, but required if creating a DEBEZIUM or UPSERT sink. |
+| primary\_key | Conditional. The primary keys of the sink. Use `,` to delimit the primary key columns. This field is optional if creating a `PLAIN` sink, but required if creating a `DEBEZIUM` or `UPSERT` sink. |
## Additional Kafka parameters
@@ -121,7 +121,7 @@ When creating an append-only Protobuf sink, the following options can be used fo
| Field | Notes |
| :---------------------------- | :----------------------- |
| message | Required. Package qualified message name of the main Message in the schema definition. |
-| schema.location | Required if schema.registry is not specified. Only one of schema.location or schema.registry can be defined. The schema location. This can be in either file://, http://, https:// format. |
+| schema.location | Required if schema.registry is not specified. Only one of schema.location or schema.registry can be defined. The schema location. This can be in either `file://`, `http://`, `https://` format. |
| schema.registry | Required if schema.location is not specified. Only one of schema.location or schema.registry can be defined. The address of the schema registry. |
| schema.registry.username | Optional. The user name used to access the schema registry. |
| schema.registry.password | Optional. The password associated with the user name. |
@@ -439,7 +439,6 @@ WITH (
-**CAUTION**
The implementation of SASL/OAUTHBEARER in RisingWave validates only [unsecured client side tokens](https://docs.confluent.io/platform/current/kafka/authentication%5Fsasl/authentication%5Fsasl%5Foauth.html#unsecured-client-side-token-creation-options-for-sasl-oauthbearer), and does not support OpenID Connect (OIDC) authentication. Therefore, it should not be used in production environments.
diff --git a/integrations/destinations/apache-pulsar.mdx b/integrations/destinations/apache-pulsar.mdx
index 161417d6..9c58ec36 100644
--- a/integrations/destinations/apache-pulsar.mdx
+++ b/integrations/destinations/apache-pulsar.mdx
@@ -42,14 +42,14 @@ FORMAT data_format ENCODE data_encode [ (
| service.url | Required. The address of the Pulsar service. |
| auth.token | Optional. A token for auth. If both auth.token and oauth are set, only oauth authorization is considered. |
| oauth.issuer.url | Optional. The issuer URL for OAuth2\. This field must be filled if other oauth fields are specified. |
-| oauth.credentials.url | Optional. The path for credential files, which starts with file://. This field must be filled if other oauth fields are specified. |
+| oauth.credentials.url | Optional. The path for credential files, which starts with `file://`. This field must be filled if other oauth fields are specified. |
| oauth.audience | Optional. The audience for OAuth2\. This field must be filled if other oauth fields are specified. |
| oauth.scope | Optional. The scope for OAuth2. |
| aws.credentials.access\_key\_id | Optional. The AWS access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. |
| aws.credentials.secret\_access\_key | Optional. The AWS secret access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. |
| max\_retry\_num | Optional. The maximum number of times to retry sending a batch to Pulsar. This allows retrying in case of transient errors. The default value is 3. |
| retry\_interval | Optional. The time in milliseconds to wait after a failure before retrying to send a batch. The default value is 100ms. |
-| primary\_key | Optional. The primary keys of the sink. Use ',' to delimit the primary key columns. Primary keys are optional when creating a PLAIN sink but required for UPSERT and DEBEZIUM sinks. |
+| primary\_key | Optional. The primary keys of the sink. Use `,` to delimit the primary key columns. Primary keys are optional when creating a PLAIN sink but required for UPSERT and DEBEZIUM sinks. |
## FORMAT and ENCODE options
@@ -59,11 +59,11 @@ These options should be set in `FORMAT data_format ENCODE data_encode (key = 'va
| Field | Notes |
| :------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. DEBEZIUM: Output change data capture (CDC) log in Debezium format. UPSERT: Output data as a changelog stream. primary\_key must be specified in this case. To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
+| data\_format | Data format. Allowed formats:
`PLAIN`: Output data with insert operations.
`DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
`UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
| data\_encode | Data encode. Supported encode: JSON. |
| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. |
-| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding. \- If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z. \- When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000. |
-| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). |
+| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
If omitted, the output format of timestamptz is `2023-11-11T18:30:09.453000Z` which includes the UTC suffix `Z`.
When `utc_without_suffix` is specified, the format is changed to `2023-11-11 18:30:09.453000`.
|
+| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. |
## Example
diff --git a/integrations/destinations/aws-kinesis.mdx b/integrations/destinations/aws-kinesis.mdx
index f50bb4f9..6d5eacb1 100644
--- a/integrations/destinations/aws-kinesis.mdx
+++ b/integrations/destinations/aws-kinesis.mdx
@@ -38,7 +38,7 @@ FORMAT data_format ENCODE data_encode [ (
| aws.credentials.session\_token | Optional. The session token associated with the temporary security credentials. |
| aws.credentials.role.arn | Optional. The Amazon Resource Name (ARN) of the role to assume. |
| aws.credentials.role.external\_id | Optional. The [external id](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) used to authorize access to third-party resources. |
-| primary\_key | Required. The primary keys of the sink. Use ',' to delimit the primary key columns. |
+| primary\_key | Required. The primary keys of the sink. Use `,` to delimit the primary key columns. |
In the Kinesis sink, we use [PutRecords](https://docs.aws.amazon.com/kinesis/latest/APIReference/API%5FPutRecords.html) API to send multiple records in batches to achieve higher throughput. Due to the limitations of Kinesis, records might be out of order when using this API. Nevertheless, the current implementation of the Kinesis sink guarantees at-least-once delivery and eventual consistency.
@@ -54,7 +54,7 @@ These options should be set in `FORMAT data_format ENCODE data_encode (key = 'va
| data\_format | Data format. Allowed formats:
`PLAIN`: Output data with insert operations.
`DEBEZIUM`: Output change data capture (CDC) log in Debezium format.
`UPSERT`: Output data as a changelog stream. `primary_key` must be specified in this case.
To learn about when to define the primary key if creating an UPSERT sink, see the [Overview](/docs/current/data-delivery/). |
| data\_encode | Data encode. Supported encode: `JSON`. |
| force\_append\_only | If `true`, forces the sink to be `PLAIN` (also known as `append-only`), even if it cannot be. |
-| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
If omitted, the output format of timestamptz is 2023-11-11T18:30:09.453000Z which includes the UTC suffix Z.
When utc\_without\_suffix is specified, the format is changed to 2023-11-11 18:30:09.453000.
|
+| timestamptz.handling.mode | Controls the timestamptz output format. This parameter specifically applies to append-only or upsert sinks using JSON encoding.
If omitted, the output format of timestamptz is `2023-11-11T18:30:09.453000Z` which includes the UTC suffix `Z`.
When `utc_without_suffix` is specified, the format is changed to `2023-11-11 18:30:09.453000`.
|
| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. |
## Examples
diff --git a/integrations/destinations/azure-blob.mdx b/integrations/destinations/azure-blob.mdx
index 56d5d8f3..440620f9 100644
--- a/integrations/destinations/azure-blob.mdx
+++ b/integrations/destinations/azure-blob.mdx
@@ -28,7 +28,7 @@ WITH (
## Parameters
| Parameter names | Description |
-| -------------------------------- | ------------------------------------------------------------------------------ |
+| :------------------------------- | :----------------------------------------------------------------------------- |
| azblob.container\_name | Required. The name of the Azure Blob Storage container. |
| azblob.path | Required. The directory where the sink file is located. |
| azblob.credentials.account\_name | Optional. The Azure Storage account name for authentication. |
diff --git a/integrations/destinations/bigquery.mdx b/integrations/destinations/bigquery.mdx
index 12321794..73d88f6e 100644
--- a/integrations/destinations/bigquery.mdx
+++ b/integrations/destinations/bigquery.mdx
@@ -41,14 +41,14 @@ WITH (
## Parameters
| Parameter Names | Description |
-| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :---------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _sink\_name_ | Name of the sink to be created. |
-| _sink\_from_ | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or _select\_query_ query must be specified. |
-| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a _sink\_from_ clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
-| type | Required. Data format. Allowed formats: append-only: Output data with insert operations.upsert: For this type, you need to set corresponding permissions and primary keys based on the [Document of BigQuery](https://cloud.google.com/bigquery/docs/change-data-capture). |
+| _sink\_from_ | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or `select_query` query must be specified. |
+| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a `sink_from` clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
+| type | Required. Data format. Allowed formats:
`append-only`: Output data with insert operations.
`upsert`: For this type, you need to set corresponding permissions and primary keys based on the [Document of BigQuery](https://cloud.google.com/bigquery/docs/change-data-capture).
|
| force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. |
-| bigquery.local.path | Optional. The file path leading to the JSON key file located in your local server. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. Either bigquery.local.path or bigquery.s3.path must be specified. |
-| bigquery.s3.path | Optional. The file path leading to the JSON key file located in S3\. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. At least one of bigquery.local.path or bigquery.s3.path must be specified. |
+| bigquery.local.path | Optional. The file path leading to the JSON key file located in your local server. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. Either `bigquery.local.path` or `bigquery.s3.path` must be specified. |
+| bigquery.s3.path | Optional. The file path leading to the JSON key file located in S3\. Details can be found in [Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) under your Google Cloud account. At least one of `bigquery.local.path` or `bigquery.s3.path` must be specified. |
| bigquery.project | Required. The BigQuery project ID. |
| bigquery.dataset | Required. The BigQuery dataset ID. |
| bigquery.table | Required. The BigQuery table you want to sink to. |
@@ -98,7 +98,7 @@ WITH (
## Data type mapping
| RisingWave Data Type | BigQuery Data Type |
-| --------------------------- | ------------------ |
+| :-------------------------- | :----------------- |
| boolean | bool |
| smallint | int64 |
| integer | int64 |
diff --git a/integrations/destinations/cassandra-or-scylladb.mdx b/integrations/destinations/cassandra-or-scylladb.mdx
index 2f9d9da3..b3506b7e 100644
--- a/integrations/destinations/cassandra-or-scylladb.mdx
+++ b/integrations/destinations/cassandra-or-scylladb.mdx
@@ -39,10 +39,10 @@ Once the sink is created, data changes will be streamed to the specified table.
## Parameters
| Parameter Names | Description |
-| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :----------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| _sink\_name_ | Name of the sink to be created. |
-| _sink\_from_ | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or _select\_query_ query must be specified. |
-| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a _sink\_from_ clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
+| _sink\_from_ | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or `select_query` query must be specified. |
+| AS _select\_query_ | A SELECT query that specifies the data to be output to the sink. Either this query or a `sink_from` clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
| type | Required. Specify if the sink should be upsert or append-only. If creating an upsert sink, you must specify a primary key. |
| primary\_key | Optional. A string of a list of column names, separated by commas, that specifies the primary key of the Cassandra sink. |
| force\_append\_only | If true, forces the sink to be append-only, even if it cannot be. |
@@ -58,7 +58,7 @@ Once the sink is created, data changes will be streamed to the specified table.
## Data type mapping - RisingWave and Cassandra
| RisingWave Data Type | Cassandra Data Type |
-| --------------------------- | --------------------------------------------------------------------------------------- |
+| :-------------------------- | :-------------------------------------------------------------------------------------- |
| boolean | boolean |
| smallint | smallint |
| integer | int |
diff --git a/integrations/destinations/cockroachdb.mdx b/integrations/destinations/cockroachdb.mdx
index bca9161e..5574d413 100644
--- a/integrations/destinations/cockroachdb.mdx
+++ b/integrations/destinations/cockroachdb.mdx
@@ -34,7 +34,7 @@ WITH (
## Data type mapping
| RisingWave Data Type | CockroachDB Data Type |
-| --------------------------- | --------------------- |
+| :-------------------------- | :-------------------- |
| boolean | BOOL |
| smallint | INT2 |
| integer | INT4 |
diff --git a/integrations/destinations/delta-lake.mdx b/integrations/destinations/delta-lake.mdx
index 4c679419..e2242816 100644
--- a/integrations/destinations/delta-lake.mdx
+++ b/integrations/destinations/delta-lake.mdx
@@ -29,14 +29,14 @@ WITH (
## Parameters
| Parameter Names | Description |
-| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| type | Required. Currently, only append-only is supported. |
-| location | Required. The file path that the Delta Lake table is reading data from, as specified when creating the Delta Lake table. For AWS, start with s3:// or s3a://;For GCS, start with gs://; For local files, start with file://. |
+| location | Required. The file path that the Delta Lake table is reading data from, as specified when creating the Delta Lake table. For AWS, start with `s3://` or `s3a://`;For GCS, start with `gs://`; For local files, start with `file://`. |
| s3.endpoint | Required. Endpoint of the S3.
For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
| s3.access.key | Required. Access key of the S3 compatible object store. |
| s3.secret.key | Required. Secret key of the S3 compatible object store. |
| gcs.service.account | Required for GCS. Specifies the service account JSON file as a string. |
-| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. |
+| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10. The behavior of this field also depends on the `sink_decouple` setting:
If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
|
## Example
diff --git a/integrations/destinations/elasticsearch.mdx b/integrations/destinations/elasticsearch.mdx
index ec80a035..a44be2cf 100644
--- a/integrations/destinations/elasticsearch.mdx
+++ b/integrations/destinations/elasticsearch.mdx
@@ -50,7 +50,7 @@ WITH (
| Parameter | Description |
| :------------------- | :---------------- |
| sink\_name | Name of the sink to be created. |
-| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
+| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
| primary\_key | Optional. The primary keys of the sink. If the primary key has multiple columns, set a delimiter in the delimiter parameter below to join them. |
| index | Required if `index_column` is not set. Name of the Elasticsearch index that you want to write data to. |
diff --git a/integrations/destinations/google-cloud-storage.mdx b/integrations/destinations/google-cloud-storage.mdx
index 2c2ee318..cba12043 100644
--- a/integrations/destinations/google-cloud-storage.mdx
+++ b/integrations/destinations/google-cloud-storage.mdx
@@ -23,7 +23,7 @@ WITH (
|-|-|
| connector | Required. Support the GCS connector only.|
| gcs.bucket_name | Required. The name of the bucket where the sink data is stored in. |
-| gcs.credential | Required. Base64-encoded credential key obtained from the GCS service account key JSON file. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console). To encode it in base64, run the following command: cat ~/Downloads/rwc-byoc-test-464bdd851bce.json | base64 -b 0 | pbcopy, and then paste the output as the value for this parameter. If this field is not specified, ADC (application default credentials) will be used.|
+| gcs.credential | Required. Base64-encoded credential key obtained from the GCS service account key JSON file. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console).
To encode it in base64, run the following command: cat ~/Downloads/rwc-byoc-test-464bdd851bce.json | base64 -b 0 | pbcopy, and then paste the output as the value for this parameter.
If this field is not specified, ADC (application default credentials) will be used.
|
| gcs.service_account| Optional. The service account of the GCS sink. If `gcs.credential` or ADC is not specified, the credentials will be derived from the service account.|
| gcs.path | Required. The directory where the sink file is located. |
| type | Required. Defines the type of the sink. Options include `append-only` or `upsert`. |
diff --git a/integrations/destinations/google-pub-sub.mdx b/integrations/destinations/google-pub-sub.mdx
index 5f5258cc..9b0ff62e 100644
--- a/integrations/destinations/google-pub-sub.mdx
+++ b/integrations/destinations/google-pub-sub.mdx
@@ -41,7 +41,7 @@ These options should be set in `FORMAT data_format ENCODE data_encode (key = 'va
| data\_format | Data format. Allowed format: `PLAIN`. |
| data\_encode | Data encode. Supported encode: `JSON`. |
| force\_append\_only | Required by default and must be `true`, which forces the sink to be `PLAIN` (also known as append-only). |
-| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data\_encode ( ... )`. |
+| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`; When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`. |
## Example
You can test the function locally before you deploying it. See guide on how to [Test locally with the Pub/Sub emulator](https://cloud.google.com/functions/docs/local-development).
diff --git a/integrations/destinations/mqtt.mdx b/integrations/destinations/mqtt.mdx
index 5e32c23e..8035dffa 100644
--- a/integrations/destinations/mqtt.mdx
+++ b/integrations/destinations/mqtt.mdx
@@ -59,16 +59,16 @@ After the sink is created, you will continuously consume the data in the MQTT to
### Parameters
| Field | Notes |
-| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with tcp://, mqtt://, ssl://, or mqtts:// to denote the protocol. mqtts:// and ssl:// use native certificates if no CA is specified. |
+| :----------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with `tcp://`, `mqtt://`, `ssl://`, or `mqtts://` to denote the protocol. `mqtts://` and `ssl://` use native certificates if no CA is specified. |
| qos | Optional. The quality of service for publishing messages. Defaults to at\_most\_once. Options include at\_most\_once, at\_least\_once, or exactly\_once. |
| username | Optional. Username for the MQTT broker. |
| password | Optional. Password for the MQTT broker. |
| client\_prefix | Optional. Prefix for the MQTT client ID. Defaults to "risingwave". |
-| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects. If true, the broker clears all client states upon disconnect; if false, the broker retains the client state and resumes pending operations upon reconnection. |
+| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects.
If true, the broker clears all client states upon disconnect;
If false, the broker retains the client state and resumes pending operations upon reconnection.
|
| inflight\_messages | Optional. Maximum number of inflight messages. Defaults to 100. |
-| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use fs:// prefix for file paths. |
-| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use fs:// prefix for file paths. |
-| topic | Required. The topic name to subscribe or publish to. Can include wildcard topics, e.g., /topic/#. |
+| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use `fs://` prefix for file paths. |
+| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use `fs://` prefix for file paths. |
+| topic | Required. The topic name to subscribe or publish to. Can include wildcard topics, e.g., `/topic/#`. |
| retain | Optional. Whether the message should be retained by the broker. |
| r#type | Required. Type identifier. |
diff --git a/integrations/destinations/mysql.mdx b/integrations/destinations/mysql.mdx
index 36be65e9..a15d9aa6 100644
--- a/integrations/destinations/mysql.mdx
+++ b/integrations/destinations/mysql.mdx
@@ -1,14 +1,14 @@
---
title: "Sink data from RisingWave to MySQL with the JDBC connector"
sidebarTitle: MySQL
-description: This guide will introduce how to sink data from RisingWave to JDBC-available databases using the JDBC sink connector. MySQL is a commonly used RDS with a JDBC driver and it is available as a cloud database through AWS for easy setup and maintenance. We will show you how to configure MySQL and RisingWave to create a MySQL sink. The configurations for RisingWave when connecting to any JDBC-available database will be the same.
+description: This guide will introduce how to sink data from RisingWave to JDBC-available databases using the JDBC sink connector.
---
-
-**NOTE**
+MySQL is a commonly used RDS with a JDBC driver and it is available as a cloud database through AWS for easy setup and maintenance. We will show you how to configure MySQL and RisingWave to create a MySQL sink. The configurations for RisingWave when connecting to any JDBC-available database will be the same.
+
The supported MySQL versions are 5.7 and 8.0.x
-
+
## Set up a MySQL database
@@ -118,15 +118,15 @@ WITH (
All `WITH` options are required.
| Parameter or clause | Description |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------- |
| sink\_name | Name of the sink to be created. |
-| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
-| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified.See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
-| connector | Sink connector type must be 'jdbc' for MySQL sink. |
+| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
+| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
+| connector | Sink connector type must be `jdbc` for MySQL sink. |
| jdbc.url | The JDBC URL of the destination database necessary for the driver to recognize and connect to the database. |
| jdbc.query.timeout | Specifies the timeout for the operations to downstream. If not set, the default is 10 minutes. |
| table.name | The table in the destination database you want to sink to. |
-| type | Data format. Allowed formats: append-only: Output data with insert operations. upsert: Output data as a changelog stream. |
+| type | Data format. Allowed formats:
`append-only`: Output data with insert operations.
`upsert`: Output data as a changelog stream.
|
| primary\_key | Required if type is upsert. The primary key of the downstream table. |
## Sink data from RisingWave to MySQL
@@ -184,6 +184,6 @@ For the MySQL data type mapping table, see the [Data type mapping table](/docs/c
Additional notes regarding sinking data to MySQL:
-* Note that array data types in RisingWave when sinked to MySQL will be converted to a string. Only one-dimensional arrays can be sinked to MySQL. For instance, `ARRAY['Value 1', 'Value 2']` when sinked to MySQL will be converted to the string `Value 1, Value 2`.
+* Array data types in RisingWave when sinked to MySQL will be converted to a string. Only one-dimensional arrays can be sinked to MySQL. For instance, `ARRAY['Value 1', 'Value 2']` when sinked to MySQL will be converted to the string `Value 1, Value 2`.
* For array type, we only support `smallint`, `integer`, `bigint`, `real`, `double precision`, and `varchar` type now.
* It's better to set `connectionTimeZone=UTC` in `jdbc.url` to get the correct `timestamptz` type data. For more details, see [MySQL's documentation](https://dev.mysql.com/doc/connector-j/en/connector-j-usagenotes-known-issues-limitations.html).
diff --git a/integrations/destinations/nats-and-nats-jetstream.mdx b/integrations/destinations/nats-and-nats-jetstream.mdx
index c3c56923..eb6c398a 100644
--- a/integrations/destinations/nats-and-nats-jetstream.mdx
+++ b/integrations/destinations/nats-and-nats-jetstream.mdx
@@ -45,14 +45,9 @@ WITH (
After the sink is created, RisingWave will continuously sink data to the NATS subject in append-only mode.
-
-**NOTE**
-
+
The NATS sink connector in RisingWave provides at-least-once delivery semantics. Events may be redelivered in case of failures.
-
-
-
-**NOTE**
+
According to the [NATS documentation](https://docs.nats.io/running-a-nats-service/nats%5Fadmin/jetstream%5Fadmin/naming), stream names must adhere to subject naming rules as well as be friendly to the file system. Here are the recommended guidelines for stream names:
@@ -62,15 +57,14 @@ According to the [NATS documentation](https://docs.nats.io/running-a-nats-servic
* Keep the name length limited to 32 characters as the JetStream storage directories include the account, stream name, and consumer name.
* Avoid using reserved file names like `NUL` or `LPT1`.
* Be cautious of case sensitivity in file systems. To prevent collisions, ensure that stream or account names do not clash due to case differences. For example, `Foo` and `foo` would collide on Windows or macOS systems.
-
-### Parameters
+## Parameters
| Field | Notes |
-| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| server\_url | Required. URLs of the NATS server, in the format of _address_:_port_. If multiple addresses are specified, use commas to separate them. |
+| :-------------------- | :--------------------------- |
+| server\_url | Required. URLs of the NATS server, in the format of `address:port`. If multiple addresses are specified, use commas to separate them. |
| subject | Required. NATS subject that you want to sink data to. |
-| connect\_mode | Required. Authentication mode for the connection. Allowed values: plain: No authentication; user\_and\_password: Use user name and password for authentication. For this option, username and password must be specified; credential: Use JSON Web Token (JWT) and NKeys for authentication. For this option, jwt and nkey must be specified. |
+| connect\_mode | Required. Authentication mode for the connection. Allowed values:
`plain`: No authentication;
`user_and_password`: Use user name and password for authentication. For this option, username and password must be specified;
`credential`: Use JSON Web Token (JWT) and NKeys for authentication. For this option, jwt and nkey must be specified.
|
| jwt and nkey | JWT and NKEY for authentication. For details, see [JWT](https://docs.nats.io/running-a-nats-service/configuration/securing%5Fnats/auth%5Fintro/jwt) and [NKeys](https://docs.nats.io/running-a-nats-service/configuration/securing%5Fnats/auth%5Fintro/nkey%5Fauth). |
-| username and password | Conditional. The client user name and password. Required when connect\_mode is user\_and\_password. |
-| type | Required. Sink data type. Its value should be append-only. |
+| username and password | Conditional. The client user name and password. Required when `connect_mode` is `user_and_password`. |
+| type | Required. Sink data type. Its value should be `append-only`. |
diff --git a/integrations/destinations/opensearch.mdx b/integrations/destinations/opensearch.mdx
index 6a2a0048..9802eb67 100644
--- a/integrations/destinations/opensearch.mdx
+++ b/integrations/destinations/opensearch.mdx
@@ -6,9 +6,11 @@ description: This guide describes how to sink data from RisingWave to OpenSearch
OpenSearch is the flexible, scalable, open-source way to build solutions for data-intensive applications. For more information about OpenSearch, see [OpenSearch official website](https://opensearch.org/).
-Premium Edition Feature
+
+**PREMIUM EDITION FEATURE**
This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com).
+
## Prerequisites
@@ -35,14 +37,14 @@ WITH (
## Parameters
-| Parameter | Description |
-| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Parameter | Description |
+| :--------------- | :-------------------- |
| sink\_name | Name of the sink to be created. |
-| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
+| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
| primary\_key | Optional. The primary keys of the sink. If the primary key has multiple columns, set a delimiter in the delimiter parameter below to join them. |
| index | Required if index\_column is not set. Name of the OpenSearch index that you want to write data to. |
-| index\_column | This parameter enables you to create a sink that writes to multiple indexes dynamically. The sink decides which index to write to based on a column. It is mutually exclusive with the parameter index. Only one of them **can and must** be set. When index is set, the write index of OpenSearch is index. When index\_column is set, the index of OpenSearch is the value of this column, which must be the string type. Since OpenSearch sink defaults to the first column as the key, it is not recommended to place this column as the first column. |
+| index\_column | This parameter enables you to create a sink that writes to multiple indexes dynamically. The sink decides which index to write to based on a column. It is mutually exclusive with the parameter index. Only one of them **can and must** be set.
When `index` is set, the write index of OpenSearch is index.
When `index_column` is set, the index of OpenSearch is the value of this column, which must be the string type.
Since OpenSearch sink defaults to the first column as the key, it is not recommended to place this column as the first column. |
| url | Required. URL of the OpenSearch REST API endpoint. |
| username | Optional. opensearch user name for accessing the OpenSearch endpoint. It must be used with password. |
| password | Optional. Password for accessing the OpenSearch endpoint. It must be used with username. |
@@ -61,7 +63,7 @@ If you don't want to customize your OpenSearch ID, RisingWave will use the first
OpenSearch uses a mechanism called [dynamic field mapping](https://opensearch.org/docs/latest/field-types/#dynamic-mapping) to dynamically create fields and determine their types automatically. It treats all integer types as long and all floating-point types as float. To ensure data types in RisingWave are mapped to the data types in OpenSearch correctly, we recommend that you specify the mapping via [index templates](https://opensearch.org/docs/latest/im-plugin/index-templates/).
| RisingWave Data Type | OpenSearch Field Type |
-| --------------------------- | --------------------------------------------------------------------------------------------------------------------- |
+| :-------------------------- | :-------------------------------------------------------------------------------------------------------------------- |
| boolean | boolean |
| smallint | long |
| integer | long |
@@ -80,10 +82,7 @@ OpenSearch uses a mechanism called [dynamic field mapping](https://opensearch.or
| array | array |
| JSONB | object (RisingWave's OpenSearch sink will send JSONB as a JSON string, and OpenSearch will convert it into an object) |
-
-**NOTE**
OpenSearch doesn't require users to explicitly `CREATE TABLE`. Instead, it infers the schema on-the-fly based on the first record ingested. For example, if a record contains a jsonb `{v1: 100}`, v1 will be inferred as a long type. However, if the next record is `{v1: "abc"}`, the ingestion will fail because `"abc"` is inferred as a string and the two types are incompatible.
-
This behavior may lead to missing records. For monitoring, see Grafana, where there is a panel for all sink write errors.
diff --git a/integrations/destinations/postgresql.mdx b/integrations/destinations/postgresql.mdx
index c0b4c4bc..fd655579 100644
--- a/integrations/destinations/postgresql.mdx
+++ b/integrations/destinations/postgresql.mdx
@@ -1,9 +1,10 @@
---
title: "Sink data from RisingWave to PostgreSQL"
sidebarTitle: PostgreSQL
-description: This guide will show you how to sink data from RisingWave to PostgreSQL using the JDBC connector. The sink parameters are similar to those for other JDBC-available databases, such as MySQL. However, we will cover the configurations specific to PostgreSQL and how to verify that data is successfully sunk.
---
+This guide will show you how to sink data from RisingWave to PostgreSQL using the JDBC connector. The sink parameters are similar to those for other JDBC-available databases, such as MySQL. However, we will cover the configurations specific to PostgreSQL and how to verify that data is successfully sunk.
+
You can test out this process on your own device by using the `postgres-sink` demo in the [integration\_test directory](https://github.com/risingwavelabs/risingwave/tree/main/integration%5Ftests) of the RisingWave repository.
## Set up a PostgreSQL database
@@ -38,15 +39,11 @@ For more login options, refer to the [RDS connection guide](https://docs.aws.ama
To install PostgreSQL locally, see their [download options](https://www.postgresql.org/download/).
-
-**NOTE**
-
If you are using the demo version, connect to PostgreSQL with the following command. Ensure that all other programs are disconnected from port 5432.
```bash
psql postgresql://myuser:123456@127.0.0.1:5432/mydb
```
-
Ensure that the Postgres user is granted the following privileges on the used table with the following SQL query.
@@ -96,16 +93,16 @@ WITH (
All `WITH` options are required unless noted.
| Parameter or clause | Description |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :-------------- |
| sink\_name | Name of the sink to be created. |
-| sink\_from | A clause that specifies the direct source from which data will be output. _sink\_from_ can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
-| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified.See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
-| connector | Sink connector type must be 'jdbc' for PostgresQL sink. |
+| sink\_from | A clause that specifies the direct source from which data will be output. `sink_from` can be a materialized view or a table. Either this clause or a SELECT query must be specified. |
+| AS select\_query | A SELECT query that specifies the data to be output to the sink. Either this query or a FROM clause must be specified. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
+| connector | Sink connector type must be `jdbc` for PostgresQL sink. |
| jdbc.url | The JDBC URL of the destination database necessary for the driver to recognize and connect to the database. |
| jdbc.query.timeout | Specifies the timeout for the operations to downstream. If not set, the default is 10 minutes. |
| table.name | The table in the destination database you want to sink to. |
| schema.name | Optional. The schema in the destination database you want to sink to. The default value is public. |
-| type | Sink data type. Supported types: append-only: Sink data as INSERT operations. upsert: Sink data as UPDATE, INSERT and DELETE operations. |
+| type | Sink data type. Supported types:
`append-only`: Sink data as INSERT operations.
`upsert`: Sink data as UPDATE, INSERT and DELETE operations.
|
| primary\_key | Required if type is upsert. The primary key of the sink, which should match the primary key of the downstream table. |
## Sink data from RisingWave to PostgreSQL
diff --git a/integrations/destinations/redis.mdx b/integrations/destinations/redis.mdx
index 6155e3ec..adf54994 100644
--- a/integrations/destinations/redis.mdx
+++ b/integrations/destinations/redis.mdx
@@ -32,27 +32,26 @@ FORMAT data_format ENCODE data_encode [ (
## Parameters
-| Parameter Names | Description |
-| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| redis.url | Required. Choose either the Redis cluster address or a non-cluster Redis address. If the address is a cluster address, it should be in the form of a JSON array, like redis.url= '\["redis://redis-server:6379/"\]'. If the address is a non-cluster address, it should be in the form of a string, like redis.url= 'redis://redis-server:6379/'. |
-| primary\_key | Required. The primary keys of the sink. If necessary, use ',' to delimit the primary key columns. |
+| Name | Description |
+| :-------------- | :------------------ |
+| redis.url | Required. Choose either the Redis cluster address or a non-cluster Redis address.
If the address is a cluster address, it should be in the form of a JSON array, like `redis.url= '["redis://redis-server:6379/"]'`.
If the address is a non-cluster address, it should be in the form of a string, like `redis.url= 'redis://redis-server:6379/'`.
|
+| primary\_key | Required. The primary keys of the sink. If necessary, use `,` to delimit the primary key columns. |
## FORMAT and ENCODE options
-
-**NOTE**
+
These options should be set in `FORMAT data_format ENCODE data_encode (key = 'value')`, instead of the `WITH` clause
-
+
| Field | Notes |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| data\_format | Data format. Allowed formats: PLAIN: Output data with insert operations. UPSERT: Output data as a changelog stream. |
-| data\_encode | Data encoding. Supported encodings: JSON:date: number of days since the Common Era (CE).interval: P\Y\M\DT\H\M\S format string.time without time zone: number of milliseconds past the last midnight.timestamp: number of milliseconds since the Epoch.TEMPLATE: converts data to the string specified by key\_format/value\_format. |
-| force\_append\_only | If true, forces the sink to be PLAIN (also known as append-only), even if it cannot be. |
-| key\_format | Required if data\_encode is TEMPLATE. Specify the format for the key as a string. |
-| value\_format | Required if data\_encode is TEMPLATE. Specify the format for the value as a string. |
-| key\_encode | Optional. When specified, the key encode can only be TEXT, and the primary key should be one and only one of the following types: varchar, bool, smallint, int, and bigint; When absent, both key and value will use the same setting of ENCODE data\_encode ( ... ). |
+| :------------------ | :-------------------------------------------------- |
+| data\_format | Data format. Allowed formats:
`PLAIN`: Output data with insert operations.
`UPSERT`: Output data as a changelog stream.
|
+| data\_encode | Data encoding. Supported encodings:
`JSON`:
`date`: number of days since the Common Era (CE).
`interval`: `PYMDTHMS` format string.
`time without time zone`: number of milliseconds past the last midnight.
`timestamp`: number of milliseconds since the Epoch.
`TEMPLATE`: converts data to the string specified by `key_format`/`value_format`.
|
+| force\_append\_only | If true, forces the sink to be `PLAIN` (also known as append-only), even if it cannot be. |
+| key\_format | Required if `data_encode` is `TEMPLATE`. Specify the format for the key as a string. |
+| value\_format | Required if `data_encode` is `TEMPLATE`. Specify the format for the value as a string. |
+| key\_encode | Optional.
When specified, the key encode can only be `TEXT`, and the primary key should be one and only one of the following types: `varchar`, `bool`, `smallint`, `int`, and `bigint`;
When absent, both key and value will use the same setting of `ENCODE data_encode ( ... )`.
|
## Example
diff --git a/integrations/destinations/snowflake.mdx b/integrations/destinations/snowflake.mdx
index 651555ea..3b0e6fd6 100644
--- a/integrations/destinations/snowflake.mdx
+++ b/integrations/destinations/snowflake.mdx
@@ -27,11 +27,9 @@ This feature is in the public preview stage, meaning it's nearing the final prod
* Ensure the S3 user account has `WRITE` permission.
* Ensure that Snowflake and S3 are set up in the same manner as described in the [Automating Snowpipe for Amazon S3](https://docs.snowflake.com/en/user-guide/data-load-snowpipe-auto-s3), as RisingWave is only responsible for writing data to S3.
-
-**NOTE**
-
+
RisingWave will not be responsible for deleting data already imported by S3\. You can manually set the lifecycle configuration of your S3 bucket to clear out unnecessary data. See [Lifecycle configuration](https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-set-lifecycle-configuration-intro.html) and [Delete staged files](https://docs.snowflake.com/en/user-guide/data-load-snowpipe-manage#deleting-staged-files-after-snowpipe-loads-the-datafor) for more details.
-
+
## Syntax
Use the following syntax to create a sink in RisingWave:
@@ -50,12 +48,12 @@ WITH (
All parameters are required unless specified otherwise.
| Parameter | Description |
-| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :------------- |
| s3.bucket\_name | The S3 bucket where intermediate sink files will be stored. |
-| s3.path | Optional. The S3 path to be specified. If specified, the actual file location would be \://\/\. Otherwise, it would be \://\. |
+| s3.path | Optional. The S3 path to be specified.
If specified, the actual file location would be `:///`.
If not, it would be `://`.
|
| s3.credentials.access | S3 access credentials. |
| s3.credentials.secret | S3 secret credentials. |
-| s3.region\_name | The S3 region, e.g., us-east-2. |
+| s3.region\_name | The S3 region, e.g., `us-east-2`. |
| force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. |
## Data type mapping
@@ -63,7 +61,7 @@ All parameters are required unless specified otherwise.
The following table shows the corresponding data types between RisingWave and Snowflake. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/).
| RisingWave type | Snowflake type |
-| --------------- | ----------------------------------------------------------------- |
+| :-------------- | :---------------------------------------------------------------- |
| SMALLINT | SMALLINT |
| INTEGER | INTEGER |
| BIGINT | BIGINT |
diff --git a/integrations/destinations/sql-server.mdx b/integrations/destinations/sql-server.mdx
index 43c2a96c..2e8cc291 100644
--- a/integrations/destinations/sql-server.mdx
+++ b/integrations/destinations/sql-server.mdx
@@ -33,7 +33,7 @@ WITH (
## Parameters
| Parameter Names | Description |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- |
| type | Required. Allowed values: append-only and upsert. |
| force\_append\_only | Optional. If true, forces the sink to be append-only, even if it cannot be. |
| primary\_key | Conditional. The primary keys of the sink. Use ',' to delimit the primary key columns. Primary keys are required for upsert sinks. |
@@ -49,7 +49,7 @@ WITH (
The following table shows the corresponding data types between RisingWave and SQL Server that should be specified when creating a sink. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/).
| SQL Server type | RisingWave type |
-| --------------- | --------------------------- |
+| :-------------- | :-------------------------- |
| bit | boolean |
| smallint | smallint |
| int | integer |
diff --git a/integrations/destinations/starrocks.mdx b/integrations/destinations/starrocks.mdx
index 26b2c532..1e072696 100644
--- a/integrations/destinations/starrocks.mdx
+++ b/integrations/destinations/starrocks.mdx
@@ -30,8 +30,8 @@ WITH (
All parameters are required unless specified otherwise.
-| Parameter names | Description |
-| ---------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Parameter names | Description |
+| :--------------------------- | :------------- |
| starrocks.host | The StarRocks host address. |
| starrocks.query\_port | The port to the MySQL server of the StarRocks frontend. |
| starrocks.http\_port | The port to the HTTP server of the StarRocks frontend. |
@@ -39,11 +39,11 @@ All parameters are required unless specified otherwise.
| starrocks.password | The password associated with the user. |
| starrocks.database | The StarRocks database where the target table is located |
| starrocks.table | The StarRocks table you want to sink data to. |
-| starrocks.partial\_update | Optional. If you set the value to "true", the partial update optimization feature of StarRocks will be enabled. This feature enhances ingestion performance in scenarios where there is a need to update a large number of rows with only a small number of columns. You can learn more about this feature in the [partial update optimization](https://docs.starrocks.io/docs/sql-reference/sql-statements/data-manipulation/UPDATE/#partial-updates-in-column-mode-since-v31) section of the StarRocks documentation. |
-| type | Data format. Allowed formats: append-only: Output data with insert operations. upsert: Output data as a chagelog stream. In StarRocks, Primary Key table must be selected. |
-| force\_append\_only | If true, forces the sink to be append-only, even if it cannot be. |
-| primary\_key | Required if type is upsert. The primary key of the downstream table. |
-| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10\. The behavior of this field also depends on the sink\_decouple setting:If sink\_decouple is true (the default), the default value of commit\_checkpoint\_interval is 10. If sink\_decouple is set to false, the default value of commit\_checkpoint\_interval is 1. If sink\_decouple is set to false and commit\_checkpoint\_interval is set to larger than 1, an error will occur. |
+| starrocks.partial\_update | Optional. Set it to `true` to improve performance when you need to update many rows but only change a few columns in each row.|
+| type | Data format. Allowed formats:
`append-only`: Output data with insert operations.
`upsert`: Output data as a chagelog stream. In StarRocks, Primary Key table must be selected.
|
+| force\_append\_only | If `true`, forces the sink to be append-only, even if it cannot be. |
+| primary\_key | Required if type is `upsert`. The primary key of the downstream table. |
+| commit\_checkpoint\_interval | Optional. Commit every N checkpoints (N > 0). Default value is 10. The behavior of this field also depends on the `sink_decouple` setting:
If `sink_decouple` is true (the default), the default value of `commit_checkpoint_interval` is 10.
If `sink_decouple` is set to false, the default value of `commit_checkpoint_interval` is 1.
If `sink_decouple` is set to false and `commit_checkpoint_interval` is set to larger than 1, an error will occur.
|
## Examples
@@ -69,7 +69,7 @@ FROM bhv_mv WITH (
The following table shows the corresponding data type in RisingWave that should be specified when creating a sink. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/).
| StarRocks type | RisingWave type |
-| -------------- | ------------------------------------------------------------------------------------------------- |
+| :------------- | :---------------------------------- |
| BOOLEAN | BOOLEAN |
| SMALLINT | SMALLINT |
| INT | INTEGER |
@@ -89,9 +89,5 @@ The following table shows the corresponding data type in RisingWave that should
| JSON | JSONB |
| BIGINT | SERIAL |
-
-**NOTE**
-Before v1.9, when inserting data into a StarRocks sink, an error would be reported if the values were "nan (not a number)", "inf (infinity)", or "-inf (-infinity)". Since v1.9, we have made a change to the behavior. If a decimal value is out of bounds or represents "inf", "-inf", or "nan", we will insert null values.
-
-
+If a decimal value is out of bounds or represents `inf`, `-inf`, or `nan`, RisingWave will insert null values.
\ No newline at end of file
diff --git a/integrations/destinations/tidb.mdx b/integrations/destinations/tidb.mdx
index 0016ea8d..62f7100a 100644
--- a/integrations/destinations/tidb.mdx
+++ b/integrations/destinations/tidb.mdx
@@ -11,7 +11,7 @@ For the syntax, settings, and examples, see [Sink data from RisingWave to MySQL
The following table shows the corresponding data types between RisingWave and TiDB. For details on native RisingWave data types, see [Overview of data types](/docs/current/sql-data-types/).
| RisingWave type | TiDB type |
-| --------------- | -------------------------------------------------- |
+| :-------------- | :------------------------------------------------- |
| BOOLEAN | BOOLEAN |
| SMALLINT | TINYINT/SMALLINT |
| INT | INT/MEDIUMINT |
diff --git a/integrations/other/dbt.mdx b/integrations/other/dbt.mdx
index 859a869c..1c9bebba 100644
--- a/integrations/other/dbt.mdx
+++ b/integrations/other/dbt.mdx
@@ -58,7 +58,7 @@ The dbt models for managing data transformations in RisingWave are similar to ty
RisingWave accepts these [materializations](https://docs.getdbt.com/docs/build/materializations).
| Materializations | Notes |
-| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :--------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| table | This materialization creates a table. To use this materialization, add `{{ config(materialized='table') }}` to your model SQL files. |
| view | Create a view. To use this materialization, add `{{ config(materialized='view') }}` to your model SQL files. |
| ephemeral | This materialization uses [common table expressions](/docs/current/query-syntax-with-clause/) in RisingWave under the hood. To use this materialization, add `{{ config(materialized='ephemeral') }}` to your model SQL files. |
diff --git a/integrations/sources/amazon-msk.mdx b/integrations/sources/amazon-msk.mdx
index 3efcdd08..12f5aaf3 100644
--- a/integrations/sources/amazon-msk.mdx
+++ b/integrations/sources/amazon-msk.mdx
@@ -183,9 +183,8 @@ WITH (
Then, you can count the records for accuracy.
-```
+```sql
SELECT * FROM s;
-
```
## Access MSK using IAM
@@ -208,7 +207,7 @@ RisingWave requires the following permissions to access MSK:
To access MSK using IAM, you need to use the `AWS_MSK_IAM` SASL mechanism. You also need to specify the following parameters.
| Parameter | Notes |
-| ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :---------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| aws.region | Required. AWS service region. For example, US East (N. Virginia). |
| aws.endpoint | Optional. URL of the entry point for the AWS Kinesis service. |
| aws.credentials.access\_key\_id | Required. This field indicates the access key ID of AWS. |
diff --git a/integrations/sources/apache-iceberg.mdx b/integrations/sources/apache-iceberg.mdx
index 91a7cda4..7be6ffa8 100644
--- a/integrations/sources/apache-iceberg.mdx
+++ b/integrations/sources/apache-iceberg.mdx
@@ -20,33 +20,31 @@ WITH (
);
```
-
-**NOTE**
-
+
You don’t need to specify the column name for the Iceberg source, as RisingWave can derive it from the Iceberg table metadata directly. Use [DESCRIBE](/docs/current/sql-describe/) statement to view the column names and data types.
-
+
## Parameters
| Field | Notes |
-| -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| type | Required. Allowed values: appendonly and upsert. |
+| :------------- | :---------------------------------- |
+| type | Required. Allowed values: `append-only` and `upsert`. |
| s3.endpoint | Optional. Endpoint of the S3.
For MinIO object store backend, it should be `http://${MINIO_HOST}:${MINIO_PORT}`.
For AWS S3, refer to [S3](https://docs.aws.amazon.com/general/latest/gr/s3.html).
|
-| s3.region | Optional. The region where the S3 bucket is hosted. Either s3.endpoint or s3.region must be specified. |
+| s3.region | Optional. The region where the S3 bucket is hosted. Either `s3.endpoint` or `s3.region` must be specified. |
| s3.access.key | Required. Access key of the S3 compatible object store. |
| s3.secret.key | Required. Secret key of the S3 compatible object store. |
| database.name | Required. Name of the database that you want to ingest data from. |
| table.name | Required. Name of the table that you want to ingest data from. |
| catalog.name | Conditional. The name of the Iceberg catalog. It can be omitted for storage catalog but required for other catalogs. |
-| catalog.type | Optional. The catalog type used in this table. Currently, the supported values are storage, rest, hive, jdbc, and glue. If not specified, storage is used. For details, see [Catalogs](#catalogs). |
-| warehouse.path | Conditional. The path of the Iceberg warehouse. Currently, only S3-compatible object storage systems, such as AWS S3 and MinIO, are supported. It's required if the catalog.type is not rest. |
-| catalog.url | Conditional. The URL of the catalog. It is required when catalog.type is not storage. |
+| catalog.type | Optional. The catalog type used in this table. Currently, the supported values are `storage`, `rest`, `hive`, `jdbc`, and `glue`. If not specified, `storage` is used. For details, see [Catalogs](#catalogs). |
+| warehouse.path | Conditional. The path of the Iceberg warehouse. Currently, only S3-compatible object storage systems, such as AWS S3 and MinIO, are supported. It's required if the `catalog.type` is not `rest`. |
+| catalog.url | Conditional. The URL of the catalog. It is required when `catalog.type` is not `storage`. |
## Data type mapping
RisingWave converts data types from Iceberg to RisingWave according to the following data type mapping table.
| Iceberg Type | RisingWave Type |
-| ------------ | --------------- |
+| :----------- | :-------------- |
| boolean | boolean |
| integer | int |
| long | bigint |
diff --git a/integrations/sources/automq-kafka.mdx b/integrations/sources/automq-kafka.mdx
index 5cc51652..eb9e6bae 100644
--- a/integrations/sources/automq-kafka.mdx
+++ b/integrations/sources/automq-kafka.mdx
@@ -20,11 +20,9 @@ Use Kafka’s command-line tools to create a topic. Ensure you have access to th
./kafka-topics.sh --create --topic example_topic --bootstrap-server 10.0.96.4:9092 --partitions 1 --replication-factor 1
```
-
-**NOTE**
-
+
In this guide, `example_topic` and `10.0.96.4:9092` are used as examples of topic name and Kafka server address respectively. Please replace them with your actual topic name and Kafka server address.
-
+
To check the result of the topic creation, use this command:
@@ -79,10 +77,8 @@ In this guide, you can use JSON format and set the startup mode to `earliest` to
SELECT * from your_source_name limit 1;
```
-
-**NOTE**
-
+
Replace `your_source_name` with the name you defined when creating the source.
-
+
When you see actual results, that means that you have successfully ingested data from AutoMQ Kafka into RisingWave Cloud. You can now write more data into the topic, or transform the ingested data by creating materialized views in RisingWave Cloud.
diff --git a/integrations/sources/azure-blob.mdx b/integrations/sources/azure-blob.mdx
index d8851408..f2107d25 100644
--- a/integrations/sources/azure-blob.mdx
+++ b/integrations/sources/azure-blob.mdx
@@ -40,7 +40,7 @@ FORMAT data_format ENCODE data_encode (
| azblob.credentials.account\_key | Optional. The account key for the Azure Blob Storage account. |
| azblob.endpoint\_url | Required. The URL of the Azure Blob Storage service endpoint. |
| match\_pattern | Conditional. Set to find object keys in azblob.container\_name that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. |
-| compression\_format | Optional. Specifies the compression format of the file being read. When set to gzip or gz, the file reader reads all files with the .gz suffix; when set to None or not defined, the file reader will automatically read and decompress .gz and .gzip files. |
+| compression\_format | Optional. Specifies the compression format of the file being read. When set to gzip or gz, the file reader reads all files with the `.gz` suffix; when set to `None` or not defined, the file reader will automatically read and decompress `.gz` and `.gzip` files. |
### Other parameters
@@ -48,8 +48,8 @@ FORMAT data_format ENCODE data_encode (
| :---------------- | :---------------- |
| _data\_format_ | Supported data format: PLAIN. |
| _data\_encode_ | Supported data encodes: CSV, JSON, PARQUET. |
-| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: 'true', 'false'. Default: 'true'. |
-| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is \\n; for CSV encode, the delimiter can be one of ,, ;, E'\\t'. |
+| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: `true`, `false`. Default is `true`. |
+| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is `\n`; for CSV encode, the delimiter can be one of `,`, `;`, `E'\t'`. |
### Additional columns
diff --git a/integrations/sources/citus-cdc.mdx b/integrations/sources/citus-cdc.mdx
index 6e0d87cc..44a95929 100644
--- a/integrations/sources/citus-cdc.mdx
+++ b/integrations/sources/citus-cdc.mdx
@@ -65,7 +65,7 @@ WITH (
Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks.
| Field | Notes |
-| ---------------- | ------------------------------------------------------------------------------------ |
+| :--------------- | :----------------------------------------------------------------------------------- |
| hostname | Hostname of the coordinator node. |
| port | Port number of the coordinator node. |
| username | Username of the database. |
diff --git a/integrations/sources/google-cloud-storage.mdx b/integrations/sources/google-cloud-storage.mdx
index 12a42350..a9258daa 100644
--- a/integrations/sources/google-cloud-storage.mdx
+++ b/integrations/sources/google-cloud-storage.mdx
@@ -32,29 +32,29 @@ FORMAT data_format ENCODE data_encode (
### Connector parameters
| Field | Notes |
-| ---------- | ------------ |
+| :--------- | :----------- |
| gcs.bucket\_name | Required. The name of the bucket the data source is stored in. |
-| gcs.credential | Base64-encoded credential key obtained from the GCS service account key JSON file. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console). To encode it in base64, run the following command: cat ~/Downloads/rwc-byoc-test-464bdd851bce.json | base64 -b 0 | pbcopy, and then paste the output as the value for this parameter. If this field is not specified, ADC (application default credentials) will be used. |
+| gcs.credential | Required. Base64-encoded credential key obtained from the GCS service account key JSON file. To get this JSON file, refer to the [guides of GCS documentation](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console).
To encode it in base64, run the following command: cat ~/Downloads/rwc-byoc-test-464bdd851bce.json | base64 -b 0 | pbcopy, and then paste the output as the value for this parameter.
If this field is not specified, ADC (application default credentials) will be used.
|
| gcs.service\_account | Optional. The service account of the target GCS source. If gcs.credential or ADC is not specified, the credentials will be derived from the service account. |
| match\_pattern | Conditional. This field is used to find object keys in the bucket that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. |
-| compression\_format | Optional. This field specifies the compression format of the file being read. You can define compression\_format in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the .gz suffix. When set to None or not defined, the file reader will automatically read and decompress .gz and .gzip files. |
+| compression\_format | Optional. This field specifies the compression format of the file being read. You can define `compression_format` in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the `.gz` suffix. When set to None or not defined, the file reader will automatically read and decompress `.gz` and `.gzip` files. |
| refresh.interval.sec | Optional. Configure the time interval between operations of listing files. It determines the delay in discovering new files, with a default value of 60 seconds. |
### Other parameters
| Field | Notes |
-| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------- |
| _data\_format_ | Supported data format: PLAIN. |
| _data\_encode_ | Supported data encodes: CSV, JSON, PARQUET. |
| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: 'true', 'false'. Default: 'true'. |
-| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is \\n; for CSV encode, the delimiter can be one of ,, ;, E'\\t'. |
+| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is `\n`; for CSV encode, the delimiter can be one of `,`, `;`, `E'\t'`. |
### Additional columns
| Field | Notes |
-| -------- | --------------------------------------------------------------------------------------------------------------------------- |
+| :------- | :-------------------------------------------------------------------------------------------------------------------------- |
| _file_ | Optional. The column contains the file name where current record comes from. |
-| _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins |
+| _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins. |
## Loading order of GCS files
diff --git a/integrations/sources/google-pub-sub.mdx b/integrations/sources/google-pub-sub.mdx
index 75d8fcda..a03971a0 100644
--- a/integrations/sources/google-pub-sub.mdx
+++ b/integrations/sources/google-pub-sub.mdx
@@ -24,15 +24,15 @@ FORMAT data_format ENCODE data_encode (
## Parameters
| Field | Note |
-| -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| pubsub.subscription | Required. Specifies where the Pub/Sub subscription to consume messages from. Pub/Sub is used to load-balance messages among all readers pulling from the same subscription, so one subscription (i.e., one source) can only be used for one materialized view (MV) that is shared between the actors of its fragment. Otherwise, different MVs on the same source will both receive part of the messages. |
| pubsub.credentials | Required. A JSON string containing the service account credentials for authorization, see the [service-account credentials guide](https://developers.google.com/workspace/guides/create-credentials#create%5Fcredentials%5Ffor%5Fa%5Fservice%5Faccount). The provided account credential must have the pubsub.subscriber [role](https://cloud.google.com/pubsub/docs/access-control#pubsub.subscriber) and pubsub.viewer [role](https://cloud.google.com/pubsub/docs/access-control#pubsub.viewer). |
-| pubsub.start\_offset.nanos | Optional. Cannot be set together with pubsub.start\_snapshot. Specifies a numeric timestamp in nanoseconds, ideally the publish timestamp of a message in the subscription. If present, the connector seeks the subscription to the timestamp and starts consuming from there. Note that the seek operation is subject to limitations based on the message retention policy of the subscription. |
-| pubsub.start\_snapshot | Optional. Cannot be set together with pubsub.start\_offset.nanos. If present, the connector first seeks to the specified snapshot before starting consumption. |
+| pubsub.start\_offset.nanos | Optional. Cannot be set together with `pubsub.start_snapshot`. Specifies a numeric timestamp in nanoseconds, ideally the publish timestamp of a message in the subscription. If present, the connector seeks the subscription to the timestamp and starts consuming from there. Note that the seek operation is subject to limitations based on the message retention policy of the subscription. |
+| pubsub.start\_snapshot | Optional. Cannot be set together with `pubsub.start_offset.nanos`. If present, the connector first seeks to the specified snapshot before starting consumption. |
| pubsub.parallelism | Optional. Specifies the number of parallel consumers to run for the subscription. If not specified, the default value is 1. |
-**INFO**
+
We can only achieve at-least-once semantic for the Pub/Sub source rather than exactly once because the SDK cannot seek back to a specific message offset.
diff --git a/integrations/sources/hivemq.mdx b/integrations/sources/hivemq.mdx
index dfa30f22..4a377f06 100644
--- a/integrations/sources/hivemq.mdx
+++ b/integrations/sources/hivemq.mdx
@@ -1,12 +1,7 @@
---
title: "Ingest data from Coreflux broker"
-description: "You can ingest data from [HiveMQ](https://www.hivemq.com/)."
sidebarTitle: HiveMQ
---
----
-
-
-
You can ingest data from HiveMQ, a leading MQTT platform renowned for its reliability, scalability, and flexibility. HiveMQ extends the MQTT standard to provide a comprehensive IoT messaging solution, trusted by brands like Air France-KLM, BMW, Mercedes-Benz, and ZF. It is widely adopted across industries such as automotive, energy, logistics, and smart manufacturing. The core of HiveMQ is its high-performance, MQTT-compliant broker, ensuring fast and reliable data transmission.
diff --git a/integrations/sources/kafka.mdx b/integrations/sources/kafka.mdx
index 6a917107..a83dc04a 100644
--- a/integrations/sources/kafka.mdx
+++ b/integrations/sources/kafka.mdx
@@ -13,10 +13,9 @@ RisingWave supports exactly-once semantics by reading transactional messages onl
**GUIDED SETUP**
-RisingWave Cloud provides an intuitive guided setup for creating a Kafka source. For more information, see [Create a source using guided setup](/cloud/manage-sources/#using-guided-setup) in the RisingWave Cloud documentation.
-
+[RisingWave Cloud](https://cloud.risingwave.com/auth/signup/) provides an intuitive guided setup for creating a Kafka source. For more information, see [Create a source using guided setup](/cloud/manage-sources/#using-guided-setup) in the RisingWave Cloud documentation.
-Sign up for RisingWave Cloud
+
## Syntax
@@ -43,56 +42,47 @@ FORMAT data_format ENCODE data_encode (
)
```
-
-**INFO**
-
For Avro and Protobuf data, do not specify `schema_definition` in the `CREATE SOURCE` statement.
-
-
-**NOTE**
-
-RisingWave performs primary key constraint checks on tables but not on sources. If you need the checks to be performed, please create a table.
+RisingWave performs primary key constraint checks on tables but not on sources. If you need the checks to be performed, please create a table. For tables with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record.
-For tables with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record.
+## Parameters
-
### Connector parameters
-| Field | Notes |
-| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Field | Notes |
+| :---------------------------- | :--------------------------------------------- |
| topic | Required. Address of the Kafka topic. One source can only correspond to one topic. |
-| properties.bootstrap.server | Required. Address of the Kafka broker. Format: 'ip:port,ip:port'. |
-| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are earliest (read from low watermark) and latest (read from high watermark). If not specified, the default value earliest will be used. |
-| scan.startup.timestamp.millis | Optional. RisingWave will start to consume data from the specified UNIX timestamp (milliseconds). If this field is specified, the value for scan.startup.mode will be ignored. |
-| group.id.prefix | Optional. Specify a custom group ID prefix for the source. The default prefix is rw-consumer. Each job (materialized view) will have a separate consumer group with a generated suffix in the group ID, so the format of the consumer group is {group_id_prefix}-{fragment_id}. This is used to monitor progress in external Kafka tools and for authorization purposes. RisingWave does not rely on committed offsets or join the consumer group. It only reports offsets to the group. |
+| properties.bootstrap.server | Required. Address of the Kafka broker. Format: `ip:port,ip:port`. |
+| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are `earliest` (read from low watermark) and `latest` (read from high watermark). If not specified, the default value `earliest` will be used. |
+| scan.startup.timestamp.millis | Optional. RisingWave will start to consume data from the specified UNIX timestamp (milliseconds). If this field is specified, the value for `scan.startup.mode` will be ignored. |
+| group.id.prefix | Optional. Specify a custom group ID prefix for the source. The default prefix is `rw-consumer`. Each job (materialized view) will have a separate consumer group with a generated suffix in the group ID, so the format of the consumer group is `{group_id_prefix}-{fragment_id}`. This is used to monitor progress in external Kafka tools and for authorization purposes. RisingWave does not rely on committed offsets or join the consumer group. It only reports offsets to the group. |
| properties.sync.call.timeout | Optional. Specify the timeout. By default, the timeout is 5 seconds. |
| properties.client.id | Optional. Client ID associated with the Kafka client. |
### Other parameters
| Field | Notes |
-| ------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------------ | :--------------------------- |
| _data\_format_ | Data format. Supported formats: DEBEZIUM, MAXWELL, CANAL, UPSERT, PLAIN. |
| _data\_encode_ | Data encode. Supported encodes: JSON, AVRO, PROTOBUF, CSV. |
| _message_ | Message name of the main Message in schema definition. Required for Protobuf. |
-| _location_ | Web location of the schema file in http://..., https://..., or S3://... format. This option is not supported for Avro data. For Protobuf data, you must specify either a schema location or a schema registry but not both. |
-| _schema.registry_ | Confluent Schema Registry URL. Example: http://127.0.0.1:8081. For Avro data, you must specify a Confluent Schema Registry or an AWS Glue Schema Registry. For Protobuf data, you must specify either a schema location or a Confluent Schema Registry but not both. |
+| _location_ | Web location of the schema file in `http://...`, `https://...`, or `S3://...` format.
This option is not supported for Avro data.
For Protobuf data, you must specify either a schema location or a schema registry but not both.
For Avro data, you must specify a Confluent Schema Registry or an AWS Glue Schema Registry.
For Protobuf data, you must specify either a schema location or a Confluent Schema Registry but not both.
|
| _schema.registry.username_ | Conditional. User name for the schema registry. It must be specified with schema.registry.password. |
| _schema.registry.password_ | Conditional. Password for the schema registry. It must be specified with schema.registry.username. |
-| _schema.registry.name.strategy_ | Optional. Accepts topic\_name\_strategy (default), record\_name\_strategy, topic\_record\_name\_strategy. If it is set to either record\_name\_strategy or topic\_record\_name\_strategy, the message parameter must also be set. It can only be specified with _schema.registry_. |
| _access\_key_ | Required if loading descriptors from S3\. The access key ID of AWS. |
| _secret\_key_ | Required if loading descriptors from S3\. The secret access key of AWS. |
| _region_ | Required if loading descriptors from S3\. The AWS service region. |
| _arn_ | Optional. The Amazon Resource Name (ARN) of the role to assume. |
| _external\_id_ | Optional. The [external](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) id used to authorize access to third-party resources. |
-## Additional Kafka parameters
+### Additional Kafka parameters
When creating a source in RisingWave, you can specify the following Kafka parameters. To set the parameter, add the RisingWave equivalent of the Kafka parameter under the `WITH options`. For an example of the usage of these parameters, see the JSON example. For additional details on these parameters, see the [Configuration properties](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
| Kafka parameter name | RisingWave parameter name | Type |
-| ------------------------------------- | ------------------------------------------------ | ------- |
+| :------------------------------------ | :----------------------------------------------- | :------ |
| enable.auto.commit | properties.enable.auto.commit | boolean |
| enable.ssl.certificate.verification | properties.enable.ssl.certificate.verification | bool |
| fetch.max.bytes | properties.fetch.max.bytes | int |
@@ -104,25 +94,21 @@ When creating a source in RisingWave, you can specify the following Kafka parame
| receive.message.max.bytes | properties.receive.message.max.bytes | int |
| ssl.endpoint.identification.algorithm | properties.ssl.endpoint.identification.algorithm | str |
-
-**NOTE**
-
+
Set `properties.ssl.endpoint.identification.algorithm` to `none` to bypass the verification of CA certificates and resolve SSL handshake failure. This parameter can be set to either `https` or `none`. By default, it is `https`.
-
+
### Specific parameters for Amazon MSK
-There are some specific parameters for Amazon Managed Streaming for Apache Kafka (MSK), please see[Access MSK in RisingWave](/docs/current/connector-amazon-msk/#access-msk-in-risingwave) for more details.
+There are some specific parameters for Amazon Managed Streaming for Apache Kafka (MSK), please see [Access MSK in RisingWave](/docs/current/connector-amazon-msk/#access-msk-in-risingwave) for more details.
## Examples[](#examples "Direct link to Examples")
Here are examples of connecting RisingWave to a Kafka broker to read data from individual topics.
-
-**NOTE**
-
+
RisingWave supports reading messages that have been compressed by [zstd](http://www.zstd.net/). Additional configurations are not required.
-
+
@@ -374,7 +360,7 @@ If your Kafka source service is located in a different VPC from RisingWave, use
To create a Kafka source with a PrivateLink connection, in the WITH section of your `CREATE SOURCE` or `CREATE TABLE` statement, specify the following parameters.
| Parameter | Notes |
-| -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| privatelink.targets | The PrivateLink targets that correspond to the Kafka brokers. The targets should be in JSON format. Note that each target listed corresponds to each broker specified in the properties.bootstrap.server field. If the order is incorrect, there will be connectivity issues. |
| privatelink.endpoint | The DNS name of the VPC endpoint. If you're using RisingWave Cloud, you can find the auto-generated endpoint after you created a connection. See details in [Create a PrivateLink connection](/cloud/create-a-connection/#whats-next). |
| connection.name | The name of the connection. This parameter should only be included if you are using a connection created with the [CREATE CONNECTION](/docs/current/sql-create-connection/) statement. Omit this parameter if you have provisioned a VPC endpoint using privatelink.endpoint (recommended). |
@@ -432,18 +418,16 @@ You need to specify encryption and authentication parameters in the WITH section
To read data encrypted with SSL without SASL authentication, specify these parameters in the WITH section of your `CREATE SOURCE` statement.
| Parameter | Notes |
-| ----------------------------------- | ----------- |
+| :---------------------------------- | :---------- |
| properties.security.protocol | Set to SSL. |
| properties.ssl.ca.location | |
| properties.ssl.certificate.location | |
| properties.ssl.key.location | |
| properties.ssl.key.password | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
-
+
Here is an example of creating a table encrypted with SSL without using SASL authentication.
@@ -469,14 +453,13 @@ WITH (
| Parameter | Notes |
-| ---------------------------- | ---------------------------------------------------------------------------------------------- |
+| :--------------------------- | :--------------------------------------------------------------------------------------------- |
| properties.security.protocol | For SASL/PLAIN without SSL, set to SASL\_PLAINTEXT. For SASL/PLAIN with SSL, set to SASL\_SSL. |
| properties.sasl.mechanism | Set to PLAIN. |
| properties.sasl.username | |
| properties.sasl.password | |
-**NOTE**
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
@@ -533,17 +516,15 @@ WITH (
| Parameter | Notes |
-| ---------------------------- | ---------------------------------------------------------------------------------------------- |
+| :--------------------------- | :--------------------------------------------------------------------------------------------- |
| properties.security.protocol | For SASL/SCRAM without SSL, set to SASL\_PLAINTEXT. For SASL/SCRAM with SSL, set to SASL\_SSL. |
| properties.sasl.mechanism | Set to SCRAM-SHA-256 or SCRAM-SHA-512 depending on the encryption method used. |
| properties.sasl.username | |
| properties.sasl.password | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
-
+
For SASL/SCRAM with SSL, you also need to include these SSL parameters:
@@ -575,7 +556,7 @@ WITH (
| Parameter | Notes |
-| ------------------------------------------------ | ---------------------------------------------------------------------------------- |
+| :----------------------------------------------- | :--------------------------------------------------------------------------------- |
| properties.security.protocol | Set to SASL\_PLAINTEXT, as RisingWave does not support using SASL/GSSAPI with SSL. |
| properties.sasl.mechanism | Set to GSSAPI. |
| properties.sasl.kerberos.service.name | |
@@ -584,11 +565,9 @@ WITH (
| properties.sasl.kerberos.kinit.cmd | |
| properties.sasl.kerberos.min.time.before.relogin | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix.
-
+
Here is an example of creating a source authenticated with SASL/GSSAPI without SSL encryption.
@@ -615,23 +594,19 @@ WITH (
-**CAUTION**
-
The implementation of SASL/OAUTHBEARER in RisingWave validates only [unsecured client side tokens](https://docs.confluent.io/platform/current/kafka/authentication%5Fsasl/authentication%5Fsasl%5Foauth.html#unsecured-client-side-token-creation-options-for-sasl-oauthbearer), and does not support OpenID Connect (OIDC) authentication. Therefore, it should not be used in production environments.
| Parameter | Notes |
-| ---------------------------------- | ---------------------------------------------------------------------------------------------------------- |
+| :--------------------------------- | :--------------------------------------------------------------------------------------------------------- |
| properties.security.protocol | For SASL/OAUTHBEARER without SSL, set to SASL\_PLAINTEXT. For SASL/OAUTHBEARER with SSL, set to SASL\_SSL. |
| properties.sasl.mechanism | Set to OAUTHBEARER. |
| properties.sasl.oauthbearer.config | |
-
-**NOTE**
-
+
For the definitions of the parameters, see the [librdkafka properties list](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Note that the parameters in the list assumes all parameters start with `properties.` and therefore do not include this prefix. Also, due to the limitation of the SASL/OAUTHBEARER implementation, you only need to specify one OAUTHBEARER parameter: `properties.sasl.oauthbearer.config`. Other OAUTHBEARER parameters are not applicable.
-
+
For SASL/OAUTHBEARER with SSL, you also need to include these SSL parameters:
diff --git a/integrations/sources/kinesis.mdx b/integrations/sources/kinesis.mdx
index 4f2fdb42..7c9fe98a 100644
--- a/integrations/sources/kinesis.mdx
+++ b/integrations/sources/kinesis.mdx
@@ -32,23 +32,19 @@ FORMAT data_format ENCODE data_encode (
```
-**INFO**
For Avro and Protobuf data, do not specify `schema_definition` in the `CREATE SOURCE` or `CREATE TABLE` statement. The schema should be provided in a Web location in the option `schema.location` in the `ENCODE` section.
-
-**NOTE**
RisingWave performs primary key constraint checks on tables with connector settings but not on regular sources. If you need the checks to be performed, please create a table with connector settings.
For a table with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record.
-
### Connector parameters
| Field | Notes |
-| ----------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :---------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| stream | Required. Name of the stream. |
| aws.region | Required. AWS service region. For example, US East (N. Virginia). |
| endpoint | Optional. URL of the entry point for the AWS Kinesis service. |
@@ -63,11 +59,11 @@ For a table with primary key constraints, if a new data record with an existing
### Other parameters
| Field | Notes |
-| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _data\_format_ | Supported formats: DEBEZIUM, MAXWELL, CANAL, UPSERT, PLAIN. |
| _data\_encode_ | Supported encodes: JSON, AVRO, PROTOBUF, CSV, BYTES. |
| _message_ | Message name of the main Message in schema definition. Required when data\_encode is PROTOBUF. |
-| _location_ | Web location of the schema file in http://..., https://..., or S3://... format. Required when data\_encode is AVRO or PROTOBUF. Examples:https://\/risingwave/proto-simple-schema.protos3://risingwave-demo/schema-location |
+| _location_ | Web location of the schema file in `http://...`, `https://...`, or `S3://...` format. Required when `data_encode` is `AVRO` or `PROTOBUF`. Examples:`https:///risingwave/proto-simple-schema.proto` `s3://risingwave-demo/schema-location` |
## Example
diff --git a/integrations/sources/mongodb-cdc.mdx b/integrations/sources/mongodb-cdc.mdx
index 8d858516..746277d6 100644
--- a/integrations/sources/mongodb-cdc.mdx
+++ b/integrations/sources/mongodb-cdc.mdx
@@ -33,7 +33,7 @@ WITH (
Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks.
| Field | Notes |
-| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| mongodb.url | The [connection string](https://www.mongodb.com/docs/manual/reference/connection-string/) of MongoDB. |
| collection.name | The collection or collections you want to ingest data from. Use the format db\_name.collection\_name to specify which database the collection is in. To ingest data from collections in different database, use a comma-separated list of regular expressions. |
@@ -63,7 +63,7 @@ You can see the [INCLUDE clause](/docs/current/include-clause/) for more details
### Metadata options
| Field | Notes |
-| ---------------- | ------------------------------- |
+| :--------------- | :------------------------------ |
| database\_name | Name of the database. |
| collection\_name | Name of the MongoDB collection. |
diff --git a/integrations/sources/mqtt.mdx b/integrations/sources/mqtt.mdx
index 80e25a26..3ffc88af 100644
--- a/integrations/sources/mqtt.mdx
+++ b/integrations/sources/mqtt.mdx
@@ -54,17 +54,17 @@ FORMAT PLAIN ENCODE data_encode; -- Format options: plain (encode BYTES and JSON
### Parameters
| Field | Notes |
-| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with tcp://, mqtt://, ssl://, or mqtts:// to denote the protocol. mqtts:// and ssl:// use native certificates if no CA is specified. |
-| qos | Optional. The quality of service for publishing messages. Defaults to at\_most\_once. Options include at\_most\_once, at\_least\_once, or exactly\_once. |
+| :----------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| url | Required. The URL of the broker to connect to, e.g., tcp://localhost. Must be prefixed with `tcp://`, `mqtt://`, `ssl://`, or `mqtts://` to denote the protocol. `mqtts://` and `ssl://` use native certificates if no CA is specified. |
+| qos | Optional. The quality of service for publishing messages. Defaults to at\_most\_once. Options include `at_most_once`, `at_least_once`, or `exactly_once`. |
| username | Optional. Username for the MQTT broker. |
| password | Optional. Password for the MQTT broker. |
| client\_prefix | Optional. Prefix for the MQTT client ID. Defaults to "risingwave". |
-| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects. If true, the broker clears all client states upon disconnect; if false, the broker retains the client state and resumes pending operations upon reconnection. |
+| clean\_start | Optional. Determines if all states from queues are removed when the client disconnects.
If true, the broker clears all client states upon disconnect;
If false, the broker retains the client state and resumes pending operations upon reconnection.
|
| inflight\_messages | Optional. Maximum number of inflight messages. Defaults to 100. |
| max\_packet\_size | Optional. The maximum message size for the MQTT client. |
-| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use fs:// prefix for file paths. |
-| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use fs:// prefix for file paths. |
+| tls.client\_cert | Optional. Path to the client's certificate file (PEM) or a string with the certificate content. Required for client authentication. Can use `fs://` prefix for file paths. |
+| tls.client\_key | Optional. Path to the client's private key file (PEM) or a string with the private key content. Required for client authentication. Can use `fs://` prefix for file paths. |
| topic | Required. The topic name to subscribe or publish to. Can include wildcard topics, e.g., /topic/#. |
This SQL statement creates a table named `iot_sensor_data` with columns for device ID, timestamp, temperature, humidity, and device status. The table is configured to connect to an MQTT broker using the MQTT connector, with specific URL, topic, and quality of service (QoS) settings, the data is encoded as JSON.
diff --git a/integrations/sources/mysql-cdc.mdx b/integrations/sources/mysql-cdc.mdx
index b391742a..b7d3b064 100644
--- a/integrations/sources/mysql-cdc.mdx
+++ b/integrations/sources/mysql-cdc.mdx
@@ -365,12 +365,12 @@ CREATE TABLE {{ this }} (
This feature is only available in the premium edition of RisingWave. The premium edition offers additional advanced features and capabilities beyond the free and community editions. If you have any questions about upgrading to the premium edition, please contact our sales team at [sales@risingwave-labs.com](mailto:sales@risingwave-labs.com).
-
+
PUBLIC PREVIEW
This feature is in the public preview stage, meaning it's nearing the final product but is not yet fully stable. If you encounter any issues or have feedback, please contact us through our [Slack channel](https://www.risingwave.com/slack). Your input is valuable in helping us improve the feature. For more information, see our [Public preview feature list](../../changelog/product-lifecycle#features-in-the-public-preview-stage).
-
+
RisingWave supports automatically mapping the upstream table schema when creating a CDC table from a MySQL CDC source. Instead of defining columns individually, you can use `*` when creating a table to ingest all columns from the source table. Note that `*` cannot be used if other columns are specified in the table creation process.
Below is an example to create a table that ingests all columns from the upstream table from the MySQL database:
diff --git a/integrations/sources/nats-jetstream.mdx b/integrations/sources/nats-jetstream.mdx
index 26e0a0b4..36edb509 100644
--- a/integrations/sources/nats-jetstream.mdx
+++ b/integrations/sources/nats-jetstream.mdx
@@ -61,16 +61,12 @@ FORMAT PLAIN ENCODE data_encode;
)
```
-
-**NOTE**
-
+
RisingWave performs primary key constraint checks on tables with connector settings but not on regular sources. If you need the checks to be performed, please create a table with connector settings.
For a table with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record.
-
-
-**NOTE**
+
According to the [NATS documentation](https://docs.nats.io/running-a-nats-service/nats%5Fadmin/jetstream%5Fadmin/naming), stream names must adhere to subject naming rules as well as being friendly to the file system. Here are the recommended guidelines for stream names:
@@ -80,11 +76,11 @@ According to the [NATS documentation](https://docs.nats.io/running-a-nats-servic
* Keep the name length limited to 32 characters as the JetStream storage directories include the account, stream name, and consumer name.
* Avoid using reserved file names like `NUL` or `LPT1`.
* Be cautious of case sensitivity in file systems. To prevent collisions, ensure that stream or account names do not clash due to case differences. For example, `Foo` and `foo` would collide on Windows or macOS systems.
-
+
### Parameters
| Field | Notes |
-| -------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| server\_url | Required. URLs of the NATS JetStream server, in the format of _address_:_port_. If multiple addresses are specified, use commas to separate them. |
| subject | Required. NATS subject that you want to ingest data from. To specify more than one subjects, use a comma. |
| stream | Required. NATS stream that you want to ingest data from. |
diff --git a/integrations/sources/postgresql-cdc.mdx b/integrations/sources/postgresql-cdc.mdx
index ac68851c..695f32ac 100644
--- a/integrations/sources/postgresql-cdc.mdx
+++ b/integrations/sources/postgresql-cdc.mdx
@@ -27,12 +27,11 @@ ALTER SYSTEM SET wal_level = logical;
```
Keep in mind that changing the `wal_level` requires a restart of the PostgreSQL instance and can affect database performance.
-
-**NOTE**
+
If you choose to create multiple CDC tables without using a shared source, be sure to set `max_wal_senders` to be greater than or equal to the number of synced tables. By default, `max_wal_senders` is 10.
-
+
2. Assign `REPLICATION`, `LOGIN`,and `CREATEDB` role attributes to the user.
For an existing user, run the following statement to assign the attributes:
@@ -151,7 +150,7 @@ To check the progress of backfilling historical data, find the corresponding int
Unless specified otherwise, the fields listed are required. Note that the value of these parameters should be enclosed in single quotation marks.
| Field | Notes |
-| ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| hostname | Hostname of the database. |
| port | Port number of the database. |
| username | Username of the database. |
@@ -167,17 +166,15 @@ Unless specified otherwise, the fields listed are required. Note that the value
| publication.create.enable | Optional. By default, the value is `true`. If publication.name does not exist and this value is `true`, a publication.name will be created. If publication.name does not exist and this value is `false`, an error will be returned. |
| transactional | Optional. Specify whether you want to enable transactions for the CDC table that you are about to create. By default, the value is `true` for shared sources, and `false` otherwise. This feature is also supported for shared CDC sources for multi-table transactions. For performance considerations, transactions involving changes to more than 4096 rows cannot be guaranteed. |
-
-**NOTE**
-
+
RisingWave implements CDC via PostgreSQL replication. Inspect the current progress via the [pg\_replication\_slots](https://www.postgresql.org/docs/14/view-pg-replication-slots.html) view. Remove inactive replication slots via [pg\_drop\_replication\_slot()](https://www.postgresql.org/docs/current/functions-admin.html#:~:text=pg%5Fdrop%5Freplication%5Fslot). RisingWave does not automatically drop inactive replication slots. You must do this manually to prevent WAL files from accumulating in the upstream PostgreSQL database.
-
+
The following fields are used when creating a CDC table.
| Field | Notes |
-| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| snapshot | Optional. If false, CDC backfill will be disabled and only upstream events that have occurred after the creation of the table will be consumed. This option can only be applied for tables created from a shared source. |
| snapshot.interval | Optional. Specifies the barrier interval for buffering upstream events. The default value is 1. |
| snapshot.batch\_size | Optional. Specifies the batch size of a snapshot read query from the upstream table. The default value is 1000. |
@@ -229,7 +226,7 @@ Data is in Debezium JSON format. [Debezium](https://debezium.io) is a log-based
Below are the metadata columns available for PostgreSQL CDC.
| Field | Notes |
-| -------------- | --------------------- |
+| :------------- | :-------------------- |
| database\_name | Name of the database. |
| schema\_name | Name of the schema. |
| table\_name | Name of the table. |
@@ -297,14 +294,12 @@ The following table shows the corresponding data type in RisingWave that should
RisingWave data types marked with an asterisk indicate that while there is no corresponding RisingWave data type, the ingested data can still be consumed as the listed type.
-
-**NOTE**
-
+
RisingWave cannot correctly parse composite types from PostgreSQL as Debezium does not support composite types in PostgreSQL.
-
+
| PostgreSQL type | RisingWave type |
-| ---------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :--------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| BOOLEAN | BOOLEAN |
| BIT(1) | BOOLEAN |
| BIT( > 1) | No support |
diff --git a/integrations/sources/pulsar.mdx b/integrations/sources/pulsar.mdx
index c3fe2d7a..0559ab16 100644
--- a/integrations/sources/pulsar.mdx
+++ b/integrations/sources/pulsar.mdx
@@ -38,43 +38,38 @@ FORMAT data_format ENCODE data_encode (
```
-**INFO**
For Avro and Protobuf data, do not specify `schema_definition` in the `CREATE SOURCE` or `CREATE TABLE` statement. The schema should be provided in a Web location in the option `schema.location` in `ENCODE properties` section.
-
-**NOTE**
-
RisingWave performs primary key constraint checks on tables with connector settings but not on regular sources. If you need the checks to be performed, please create a table with connector settings.
For a table with primary key constraints, if a new data record with an existing key comes in, the new record will overwrite the existing record.
-
### Connector parameters
| Field | Notes |
-| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :----------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| topic | Required. Address of the Pulsar topic. One source can only correspond to one topic. |
-| service.url | Required. Address of the Pulsar service. Typically in the format pulsar:// or pulsar+ssl://\:\ |
-| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are earliest (earliest offset) and latest (latest offset). If not specified, the default value earliest will be used. |
+| service.url | Required. Address of the Pulsar service. Typically in the format `pulsar://` or `pulsar+ssl://:` |
+| scan.startup.mode | Optional. The offset mode that RisingWave will use to consume data. The two supported modes are `earliest` (earliest offset) and `latest` (latest offset). If not specified, the default value `earliest` will be used. |
| scan.startup.timestamp.millis. | Optional. RisingWave will start to consume data from the specified UNIX timestamp (milliseconds). |
-| auth.token | Optional. A token for auth. If both auth.token and oauth are set, only oauth authorization is effective. |
-| oauth.issuer.url | Optional. The issuer url for OAuth2\. This field must be filled if other oauth fields are specified. |
-| oauth.credentials.url | Optional. The path for credential files, starts with file://. This field must be filled if other oauth fields are specified. |
-| oauth.audience | Optional. The audience for OAuth2\. This field must be filled if other oauth fields are specified. |
+| auth.token | Optional. A token for auth. If both `auth.token` and `oauth` are set, only `oauth` authorization is effective. |
+| oauth.issuer.url | Optional. The issuer url for OAuth2\. This field must be filled if other `oauth` fields are specified. |
+| oauth.credentials.url | Optional. The path for credential files, starts with `file://`. This field must be filled if other `oauth` fields are specified. |
+| oauth.audience | Optional. The audience for OAuth2\. This field must be filled if other `oauth` fields are specified. |
| oauth.scope | Optional. The scope for OAuth2. |
### Other parameters
| Field | Notes |
-| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _data\_format_ | Supported formats: DEBEZIUM, MAXWELL, CANAL, UPSERT, PLAIN. |
| _data\_encode_ | Supported encodes: JSON, AVRO, PROTOBUF, CSV, BYTES. |
| _message_ | Message name of the main Message in schema definition. Required when data\_encode is PROTOBUF. |
-| _location_ | Web location of the schema file in http://..., https://..., or S3://... format. Required when data\_encode is AVRO or PROTOBUF. Examples:https://\/risingwave/proto-simple-schema.protos3://risingwave-demo/schema-location |
-| _aws.credentials.access\_key\_id_ | Optional. The AWS access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. |
-| _aws.credentials.secret\_access\_key_ | Optional. The AWS secret access key for loading from S3\. This field does not need to be filled if oauth.credentials.url is specified to a local path. |
+| _location_ | Web location of the schema file in `http://...`, `https://...`, or `S3://...` format. Required when `data_encode` is `AVRO` or `PROTOBUF`. Examples:`https://\/risingwave/proto-simple-schema.proto`,`s3://risingwave-demo/schema-location` |
+| _aws.credentials.access\_key\_id_ | Optional. The AWS access key for loading from S3\. This field does not need to be filled if `oauth.credentials.url` is specified to a local path. |
+| _aws.credentials.secret\_access\_key_ | Optional. The AWS secret access key for loading from S3\. This field does not need to be filled if `oauth.credentials.url` is specified to a local path. |
| _region_ | Required if loading descriptors from S3\. The AWS service region. |
| _aws.credentials.role.arn_ | Optional. The Amazon Resource Name (ARN) of the role to assume. |
| _aws.credentials.role.external\_id_ | Optional. The [external](https://aws.amazon.com/blogs/security/how-to-use-external-id-when-granting-access-to-your-aws-resources/) id used to authorize access to third-party resources. |
diff --git a/integrations/sources/s3.mdx b/integrations/sources/s3.mdx
index c7785d80..4a1ff358 100644
--- a/integrations/sources/s3.mdx
+++ b/integrations/sources/s3.mdx
@@ -23,7 +23,7 @@ FORMAT data_format ENCODE data_encode (
```
-**INFO**
+
For CSV data, specify the delimiter in the `delimiter` option in `ENCODE properties`.
@@ -40,15 +40,15 @@ For CSV data, specify the delimiter in the `delimiter` option in `ENCODE propert
## Parameters
| Field | Notes |
-| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| connector | Required. Support the s3 connector only. |
| s3.region\_name | Required. The service region. |
| s3.bucket\_name | Required. The name of the bucket the data source is stored in. |
| s3.credentials.access | Required. This field indicates the access key ID of AWS. |
| s3.credentials.secret | Required. This field indicates the secret access key of AWS. |
| s3.endpoint\_url | Conditional. The host URL for an S3-compatible object storage server. This allows users to use a different server instead of the standard S3 server. |
-| compression\_format | Optional. This field specifies the compression format of the file being read. You can define compression\_format in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the .gz suffix. When set to None or not defined, the file reader will automatically read and decompress .gz and .gzip files. |
-| match\_pattern | Conditional. This field is used to find object keys in s3.bucket\_name that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. |
+| compression\_format | Optional. This field specifies the compression format of the file being read. You can define `compression_format` in the CREATE TABLE statement. When set to gzip or gz, the file reader reads all files with the `.gz` suffix. When set to `None` or not defined, the file reader will automatically read and decompress `.gz` and `.gzip` files. |
+| match\_pattern | Conditional. This field is used to find object keys in `s3.bucket_name` that match the given pattern. Standard Unix-style [glob](https://en.wikipedia.org/wiki/Glob%5F%28programming%29) syntax is supported. |
| s3.assume\_role | Optional. Specifies the ARN of an IAM role to assume when accessing S3\. It allows temporary, secure access to S3 resources without sharing long-term credentials. |
| refresh.interval.sec | Optional. Configure the time interval between operations of listing files. It determines the delay in discovering new files, with a default value of 60 seconds. |
@@ -57,16 +57,16 @@ note
Empty cells in CSV files will be parsed to `NULL`.
| Field | Notes |
-| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------- |
| _data\_format_ | Supported data format: PLAIN. |
| _data\_encode_ | Supported data encodes: CSV, JSON, PARQUET. |
-| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: 'true', 'false'. Default: 'true'. |
-| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is \\n; for CSV encode, the delimiter can be one of ,, ;, E'\\t'. |
+| _without\_header_ | This field is only for CSV encode, and it indicates whether the first line is header. Accepted values: `true`, `false`. Default is `true`. |
+| _delimiter_ | How RisingWave splits contents. For JSON encode, the delimiter is `\n`; for CSV encode, the delimiter can be one of `,`, `;`, `E'\t'`. |
### Additional columns
| Field | Notes |
-| -------- | --------------------------------------------------------------------------------------------------------------------------- |
+| :------- | :-------------------------------------------------------------------------------------------------------------------------- |
| _file_ | Optional. The column contains the file name where current record comes from. |
| _offset_ | Optional. The column contains the corresponding bytes offset (record offset for parquet files) where current message begins |
@@ -194,12 +194,10 @@ Function signature
file_scan(file_format, storage_type, s3_region, s3_access_key, s3_secret_key, file_location_or_directory)
```
-
-**NOTE**
-
+
When reading a directory of Parquet files, the schema will be based on the first Parquet file listed. Please ensure that all Parquet files in the directory have the same schema.
-
+
For example, assume you have a Parquet file named `sales_data.parquet` that stores a company's sales data, containing the following fields:
* `product_id`: Product ID
diff --git a/integrations/sources/sql-server-cdc.mdx b/integrations/sources/sql-server-cdc.mdx
index ba4f2d59..6647339b 100644
--- a/integrations/sources/sql-server-cdc.mdx
+++ b/integrations/sources/sql-server-cdc.mdx
@@ -52,7 +52,6 @@ EXEC sys.sp_cdc_enable_table @source_schema = 'dbo', @source_name = 't1', @role_
Replace `dbo` with the schema name and `t1` with the table name.
-**NOTE**
SQL Server allows you to create multiple CDC tables for the same source table using different capture instance names (@capture\_instance). However, RisingWave currently supports only a single capture instance per table. If your table has only one capture instance, RisingWave will automatically use it to create a CDC table. However, if there are multiple capture instances, RisingWave will select one at random for CDC table creation.
@@ -106,13 +105,11 @@ Unless specified otherwise, the fields listed are required. Note that the value
|database.encrypt| Optional. Specify whether to enable SSL encryption. Currently, `trustServerCertificate` is enabled regardless of the value of `database.encrypt`. |
| sqlserver_table_name | The identifier of SQL Server table in the format of `database_name.schema_name.table_name`. |
-
-**NOTE**
-
+
As noted earlier, RisingWave will use the available capture instance to create a CDC table. If multiple capture instances exist, RisingWave will randomly choose one. Specifying a particular capture instance is not supported.
Additionally, unlike MySQL and PostgreSQL, the SQL Server CDC connector does not support transactional CDC, as doing so would compromise the freshness of CDC sources. For further details, refer to the [Debezium SQL Server CDC connector documentation](https://debezium.io/documentation/reference/2.6/connectors/sqlserver.html#sqlserver-transaction-metadata).
-
+
The following fields are used when creating a CDC table.
diff --git a/integrations/visualization/beekeeper-studio.mdx b/integrations/visualization/beekeeper-studio.mdx
index 26277cc7..eaf1b97c 100644
--- a/integrations/visualization/beekeeper-studio.mdx
+++ b/integrations/visualization/beekeeper-studio.mdx
@@ -3,11 +3,9 @@ title: "Connect Beekeeper Studio to RisingWave"
sidebarTitle: Beekeeper Studio
description: "Beekeeper Studio is a modern, easy to use SQL editor and database manager. It provides a graphical user interface, allowing you to efficiently query and manage PostgreSQL, MySQL, SQL Server and more. Since RisingWave is PostgreSQL-compatible, you can easily connect Beekeeper Studio to RisingWave."
---
-
-**NOTE**
-
+
RisingWave only supports connecting the Beekeeper Studio Community edition. The Ultimate (commercial) edition is not officially tested with RisingWave and may contain bugs. Please report any issues with the Ultimate edition to the RisingWave team.
-
+
## Prerequisites
diff --git a/integrations/visualization/grafana.mdx b/integrations/visualization/grafana.mdx
index 5ec3c4b3..310d30cb 100644
--- a/integrations/visualization/grafana.mdx
+++ b/integrations/visualization/grafana.mdx
@@ -32,13 +32,11 @@ To install Grafana locally, see the [Install Grafana](https://grafana.com/docs/g
-
-**NOTE**
-
+
If both RisingWave and Grafana are started locally, the host domain can be either **localhost:4566** or **127.0.0.1:4566**.
If you are running Grafana Cloud, the host domain should be your computer’s public IP address.
-
+
5. Click **Save & test**.
diff --git a/operate/access-control.mdx b/operate/access-control.mdx
index d4f288ba..450010c5 100644
--- a/operate/access-control.mdx
+++ b/operate/access-control.mdx
@@ -60,7 +60,7 @@ ALTER USER user1 RENAME TO user001;
See the table below for the privileges available in RisingWave and the corresponding object levels that they can apply to.
| Privilege | Description | Object Level |
-| --------- | ----------------------------------------------------- | -------------------------------- |
+| :-------- | :---------------------------------------------------- | :------------------------------- |
| SELECT | Permission to retrieve data from a relation object. | Table, Source, Materialized View |
| INSERT | Permission to add new rows to a table. | Table |
| UPDATE | Permission to modify existing data in a table. | Table |
diff --git a/operate/cluster-limit.mdx b/operate/cluster-limit.mdx
index 7b9512b5..f6715f1d 100644
--- a/operate/cluster-limit.mdx
+++ b/operate/cluster-limit.mdx
@@ -30,7 +30,7 @@ meta_actor_cnt_per_worker_parallelism_hard_limit = 400
```
-**CAUTION**
+
Please be aware that once you bypass the check or increase the limits, the cluster could become overloaded, leading to issues with stability, availability, or performance.
diff --git a/operate/dedicated-compute-node.mdx b/operate/dedicated-compute-node.mdx
index 92dbe798..a837ded7 100644
--- a/operate/dedicated-compute-node.mdx
+++ b/operate/dedicated-compute-node.mdx
@@ -14,11 +14,9 @@ You need to restart the node to update the role. A role can be one of:
* `serving`: Indicates that the compute node is read-only and executes batch queries only.
* `streaming`: Indicates that the compute node is only available for streaming.
-
-**NOTE**
-
+
In a production environment, it's advisable to use separate nodes for batch and streaming operations. The `both` mode, which allows a node to handle both batch and streaming queries, is more suited for testing scenarios. While it's possible to execute batch and streaming queries concurrently, it's recommended to avoid running resource-intensive batch and streaming queries at the same time.
-
+
For specific changes required in the YAML file, see [Separate batch streaming modes](https://github.com/risingwavelabs/risingwave-operator/blob/main/docs/manifests/risingwave/advanced/separate-batch-streaming-modes.yaml).
diff --git a/operate/manage-a-large-number-of-streaming-jobs.mdx b/operate/manage-a-large-number-of-streaming-jobs.mdx
index ea894917..7809f40b 100644
--- a/operate/manage-a-large-number-of-streaming-jobs.mdx
+++ b/operate/manage-a-large-number-of-streaming-jobs.mdx
@@ -110,7 +110,7 @@ worker_id|count|
To rebalance the actor, you can use the alter parallelism statement mentioned above after v1.7.0, and the actors will be distributed to different compute nodes automatically.
-**CAUTION**
+
In some references, `/risingwave/bin/risingwave ctl scale horizon --include-workers all` is used to scale out all streaming jobs to avoid the skewed actor distribution. However, this approach may not be sufficient when dealing with a large number of streaming jobs, as it does not consider the `default_parallelism` parameter.
diff --git a/operate/manage-secrets.mdx b/operate/manage-secrets.mdx
index 27279fe5..f2312589 100644
--- a/operate/manage-secrets.mdx
+++ b/operate/manage-secrets.mdx
@@ -44,11 +44,9 @@ CREATE SECRET mysql_pwd WITH (
) AS '123';
```
-
-**NOTE**
-
+
Currently only the meta backend is supported.
-
+
## Use secrets
diff --git a/operate/meta-backup.mdx b/operate/meta-backup.mdx
index 06e02558..81dac66e 100644
--- a/operate/meta-backup.mdx
+++ b/operate/meta-backup.mdx
@@ -10,8 +10,6 @@ A meta snapshot is a backup of meta service's data at a specific point in time.
Before you can create a meta snapshot, you need to set the `backup_storage_url` and `backup_storage_directory` system parameters prior to the first backup attempt.
-**CAUTION**
-
Be careful not to set the `backup_storage_url` and `backup_storage_directory` when there are snapshots. However, it's not strictly forbidden. If you insist on doing so, please note the snapshots taken before the setting will all be invalidated and cannot be used in restoration anymore.
@@ -63,11 +61,9 @@ Below are two separate methods to restore from a meta snapshot using SQL databas
If the cluster has been using a SQL database as meta store backend, follow these steps to restore from a meta snapshot.
1. Shut down the meta service.
-
-**NOTE**
-
+
This step is especially important because the meta backup and recovery process does not replicate SST files. It is not permitted for multiple clusters to run with the same SSTs set at any time, as this can corrupt the SST files.
-
+
2. Create a new meta store, i.e. a new SQL database instance.
Note that this new SQL database instance must have the exact same tables defined as the original, but all tables should remain empty. To achieve this, you can optionally use the [schema migration tool](https://github.com/risingwavelabs/risingwave/tree/main/src/meta/model%5Fv2/migration) to create tables, then truncate those non-empty tables populated by the tool.
3. Restore the meta snapshot to the new meta store.
@@ -106,11 +102,9 @@ Parameters to `risectl meta restore-meta` should be:
If the cluster has been using etcd as meta store backend, follow these steps to restore from a meta snapshot.
1. Shut down the meta service.
-
-**NOTE**
-
+
This step is especially important because the meta backup and recovery process does not replicate SST files. It is not permitted for multiple clusters to run with the same SSTs set at any time, as this can corrupt the SST files.
-
+
2. Create a new meta store, i.e. a new and empty etcd instance.
3. Restore the meta snapshot to the new meta store.
@@ -179,7 +173,5 @@ SET QUERY_EPOCH=0;
```
-**LIMITATION**
-
RisingWave only supports historical data access at a specific point in time backed up by at least one meta snapshot.
diff --git a/operate/secure-connections-with-ssl-tls.mdx b/operate/secure-connections-with-ssl-tls.mdx
index edb0479a..983e5473 100644
--- a/operate/secure-connections-with-ssl-tls.mdx
+++ b/operate/secure-connections-with-ssl-tls.mdx
@@ -26,11 +26,9 @@ SSL connection (protocol: TLSv1.3, cipher: TLS_AES_256_GCM_SHA384, bits: 256, co
## Optional: Create a self-signed certificate for testing purposes
-
-**NOTE**
-
+
While a self-signed certificate is suitable for testing, it is recommended to obtain a certificate from a Certificate Authority (CA) for production environments.
-
+
To create a simple self-signed certificate for the server, valid for 365 days, for testing purposes, use the OpenSSL command below. Replace `localhost` with the desired Common Name (CN).
```bash
diff --git a/operate/view-configure-system-parameters.mdx b/operate/view-configure-system-parameters.mdx
index bf51312b..9ca1cef4 100644
--- a/operate/view-configure-system-parameters.mdx
+++ b/operate/view-configure-system-parameters.mdx
@@ -9,11 +9,11 @@ System parameters in RisingWave refer to the parameters that advanced users can
Currently, these system parameters are available in RisingWave.
| Parameter | Description |
-| ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :----------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| barrier\_interval\_ms | The time interval of the periodic barriers. |
| checkpoint\_frequency | Specify the number of barriers for which a checkpoint will be created. The value must be a positive integer. |
| sstable\_size\_mb | The target size of SSTable. |
-| parallel\_compact\_size\_mb | This parameter, together with max\_sub\_compaction, controls the concurrency of individual tasks. If the data size is smaller than parallel\_compact\_size\_mb, only a single thread is used to execute the compaction task. If the data size of an individual task exceeds parallel\_compact\_size\_mb, multiple concurrent threads are started to complete the task. At this time, if the data size is N, then the total number of these threads is determined by dividing N by parallel\_compact\_size\_mb. Additionally, each sub-task's size cannot exceed parallel\_compact\_size\_mb, and the total number of sub-tasks cannot exceed max\_sub\_compaction. |
+| parallel\_compact\_size\_mb | This parameter, together with `max_sub_compaction`, controls the concurrency of individual tasks.
If the data size is smaller than `parallel_compact_size_mb`, only a single thread is used to execute the compaction task.
If the data size of an individual task exceeds `parallel_compact_size_mb`, multiple concurrent threads are started to complete the task. At this time, if the data size is N, then the total number of these threads is determined by dividing N by `parallel_compact_size_mb`.
Additionally, each sub-task's size cannot exceed `parallel_compact_size_mb`, and the total number of sub-tasks cannot exceed `max_sub_compaction`. |
| block\_size\_kb | The size of each block in bytes in SSTable. |
| bloom\_false\_positive | False positive rate of bloom filter in SSTable. |
| state\_store | The state store URL. |
@@ -22,7 +22,7 @@ Currently, these system parameters are available in RisingWave.
| backup\_storage\_directory | The directory of the remote storage for backups. |
| telemetry\_enabled | Whether to enable telemetry or not. For more information, see [Telemetry](/docs/current/telemetry/). |
| max\_concurrent\_creating\_streaming\_jobs | The maximum number of streaming jobs that can be created concurrently. That is, the maximum of materialized views, indexes, tables, sinks, or sources that can be created concurrently. |
-| pause\_on\_next\_bootstrap | This parameter is used for debugging and maintenance purposes. Setting it to true will pause all data sources, such as connectors and DMLs, when the cluster restarts. This parameter will then be reset to its default value (false). To resume data ingestion, simply run risectl meta resume or restart the cluster again. |
+| pause\_on\_next\_bootstrap | This parameter is used for debugging and maintenance purposes. Setting it to true will pause all data sources, such as connectors and DMLs, when the cluster restarts. This parameter will then be reset to its default value (false). To resume data ingestion, simply run `risectl meta resume` or restart the cluster again. |
| enable\_tracing | Whether to enable distributed tracing. This parameter is used to toggle the opentelemetry tracing during runtime. Its default value is false. |
| time\_travel\_retention\_ms | The data retention period for time travel. Defaults to 0, which disables time travel. To enable this feature, set it to a a non-zero value. |
@@ -80,8 +80,6 @@ For example, to initialize the setting of `data_directory`:
`meta-node --data_directory "hummock_001"`
-
-**NOTE**
-
+
As RisingWave reads system parameters at different times, there is no guarantee that a parameter value change will take effect immediately. We recommend that you adjust system parameters before running a streaming query after your RisingWave cluster starts.
-
+
diff --git a/processing/maintain-wide-table-with-table-sinks.mdx b/processing/maintain-wide-table-with-table-sinks.mdx
index 9d403b42..dba01d0e 100644
--- a/processing/maintain-wide-table-with-table-sinks.mdx
+++ b/processing/maintain-wide-table-with-table-sinks.mdx
@@ -34,11 +34,10 @@ CREATE SINK sink3 INTO wide_d (v3,k) AS
);
```
-
-**NOTE**
+
Keep in mind that the `ON CONFLICT` clause does not affect the update or delete events, the sinks should be forced to be append-only. Otherwise, the delete or update events from any sink will delete the regarding row.
-
+
## Enrich data with foreign keys in Star/Snowflake schema model
diff --git a/processing/sql/joins.mdx b/processing/sql/joins.mdx
index 8fb314d5..61513613 100644
--- a/processing/sql/joins.mdx
+++ b/processing/sql/joins.mdx
@@ -229,12 +229,10 @@ FROM s1 JOIN s2
ON s1.id = s2.id and s1.ts between s2.ts and s2.ts + INTERVAL '1' MINUTE;
```
-
-**NOTE**
-
+
Interval join‘s state cleaning is triggered only when upstream messages arrive, and it operates at the granularity of each join key. As a result, if no messages are received for a join key, the state may still hold stale data.
-
+
## Process-time temporal joins
@@ -252,16 +250,14 @@ SELECT ... FROM [AS ]
ON ;
```
-
-**NOTE**
-
+
- The left table expression is an append-only table or source.
- The right table expression is a table, index or materialized view.
- The process-time syntax `FOR SYSTEM_TIME AS OF PROCTIME()` is included in the right table expression.
- The join type is INNER JOIN or LEFT JOIN.
- The Join condition includes the primary key of the right table expression.
-
+
#### Example
@@ -328,8 +324,6 @@ You will get these results:
| 2 | 102 | 3 | 2023-06-19 | Product B | 15 |
| 3 | 101 | 2 | 2023-06-20 | Product A | 22 |
-
-**NOTE**
+
Every time you update the left-hand side table, it will look up the latest data from the right-hand side table.
-
-
+
diff --git a/processing/sql/temporal-filters.mdx b/processing/sql/temporal-filters.mdx
index 6af6836a..797bb1ec 100644
--- a/processing/sql/temporal-filters.mdx
+++ b/processing/sql/temporal-filters.mdx
@@ -105,14 +105,12 @@ However, due to delays caused by the network or other phases, it is not guarante
LEFT JOIN dimension FOR SYSTEM_TIME AS OF PROCTIME() ON id1 = id2;
```
-
-**NOTE**
-
+
Currently, RisingWave's optimizer cannot ensure the temporal filter's predicate pushdown. Please add the temporal filter in the `FROM` clause as a sub-query, like the SQL example, instead of writing the temporal filter in the query's top `WHERE` clause.
-
+
-**INFO**
+
The `PROCTIME` in the example can be replaced with the event time in the records.
diff --git a/processing/sql/time-windows.mdx b/processing/sql/time-windows.mdx
index e4916d56..9652aebb 100644
--- a/processing/sql/time-windows.mdx
+++ b/processing/sql/time-windows.mdx
@@ -116,11 +116,9 @@ The result looks like the table below. Note that the number of rows in the resul
In RisingWave, session windows are supported by a special type of window function frame: `SESSION` frame. You can refer to [Window function calls](/docs/current/query-syntax-value-exp/#window-function-calls) for detailed syntax.
-
-**NOTE**
-
+
Currently, `SESSION` frame is only supported in batch mode and emit-on-window-close streaming mode.
-
+
When using session windows, you can achieve the effect that is very similar to `tumble()` and `hop()` time window functions, that is, to assign each row a time window by augmenting it with `window_start` and `window_end`. Here is an example:
diff --git a/processing/sql/top-n-by-group.mdx b/processing/sql/top-n-by-group.mdx
index 98576e46..5d168b50 100644
--- a/processing/sql/top-n-by-group.mdx
+++ b/processing/sql/top-n-by-group.mdx
@@ -23,20 +23,18 @@ function_name() OVER ([PARTITION BY col1[, col2...]]
ORDER BY col1 [ ASC | DESC ][, col2 [ ASC | DESC ]...])
```
-
-**NOTE**
-
+
`rank` cannot be included in `column_list`.
-
+
-**INFO**
+
You must follow the pattern exactly to construct a valid Top-N query.
| Parameter | Description |
-| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _function\_name_ | RisingWave supports two window functions in top-N queries: row\_number(): Returns the sequential row ordinal (1-based) of each row for each ordered partition.rank(): Returns the ordinal (1-based) rank of each row within the ordered partition. All peer rows receive the same rank value. The next row or set of peer rows receives a rank value which increments by the number of peers with the previous rank value. |
| PARTITION BY clause | Specifies the partition columns. Each partition will have a Top-N result. |
| ORDER BY clause | Specifies how the rows are ordered. |
diff --git a/processing/time-travel-queries.mdx b/processing/time-travel-queries.mdx
index 9cfabd24..aff430fe 100644
--- a/processing/time-travel-queries.mdx
+++ b/processing/time-travel-queries.mdx
@@ -23,11 +23,9 @@ The system parameter `time_travel_retention_ms` controls time travel functionali
For example, you can set `time_travel_retention_ms` to `86400000` (1 day). Then historical data older than this period will be deleted and no longer accessible.
-
-**NOTE**
-
+
Enabling time travel will introduce additional overhead to both the meta store and the object store.
-
+
## Syntax
@@ -37,11 +35,9 @@ Specify `FOR SYSTEM_TIME AS OF` separately for each table accessing historical d
* Datetime. For example, `SELECT * FROM t_foo FOR SYSTEM_TIME AS OF '2000-02-29T12:13:14-08:30';`.
* NOW() \[ - Interval \]. For example, `SELECT * FROM t_foo FOR SYSTEM_TIME AS OF NOW() - '10' SECOND;`.
-
-note
-
+
If you specify a point in time that is outside the time travel period, the query will return an error, like `time travel: version not found for epoch`.
-
+
## Storage space reclamation
diff --git a/processing/watermarks.mdx b/processing/watermarks.mdx
index db3a4a06..52b2e4d7 100644
--- a/processing/watermarks.mdx
+++ b/processing/watermarks.mdx
@@ -6,7 +6,7 @@ description: "In stream processing, watermarks are integral when using event tim
Let us go over an example on how watermarks are generated and utilized during window computations. Say the following events and their corresponding event-time timestamps arrive.
| Event | Timestamp |
-| ------- | ----------- |
+| :------ | :---------- |
| Event F | 11:59:30 AM |
| Event G | 12:00:00 PM |
| Event H | 12:00:10 PM |
@@ -15,7 +15,7 @@ Let us go over an example on how watermarks are generated and utilized during wi
Consider a scenario where the watermark is set as the maximum event time observed so far minus 10 seconds. So the following watermarks will be generated.
| Event | Timestamp | Watermark |
-| ------- | ----------- | ----------- |
+| :------ | :---------- | :---------- |
| Event F | 11:59:30 AM | 11:59:20 AM |
| Event G | 12:00:00 PM | 11:59:50 AM |
| Event H | 12:00:11 PM | 12:00:01 PM |
@@ -49,11 +49,9 @@ WATERMARK FOR time_col as time_col - INTERVAL 'string' time_unit
```
Supported `time_unit` values include: second, minute, hour, day, month, and year. For more details, see the `interval` data type under [Overview of data types](/docs/current/sql-data-types/).
-
-**NOTE**
-
+
Currently, RisingWave only supports using one of the columns from the table as the watermark column. To use nested fields (e.g., fields in `STRUCT`), or perform expression evaluation on the input rows (e.g., casting data types), please refer to [generated columns](/docs/current/query-syntax-generated-columns/).
-
+
### Example
diff --git a/sql/commands/sql-alter-connection.mdx b/sql/commands/sql-alter-connection.mdx
index 5cd9285d..e2ea0077 100644
--- a/sql/commands/sql-alter-connection.mdx
+++ b/sql/commands/sql-alter-connection.mdx
@@ -22,7 +22,7 @@ ALTER CONNECTION connection_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------- |
| **SET SCHEMA** | This clause changes the schema of the connection. To change a connection's schema, you must also have CREATE privilege on the new schema. |
| _schema\_name_ | Specify the schema to which you want to change. |
diff --git a/sql/commands/sql-alter-database.mdx b/sql/commands/sql-alter-database.mdx
index 349bd3c7..975ed039 100644
--- a/sql/commands/sql-alter-database.mdx
+++ b/sql/commands/sql-alter-database.mdx
@@ -21,7 +21,7 @@ ALTER DATABASE database_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **OWNER TO** | This clause changes the owner of the database. To alter the owner, you must be able to SET ROLE to the new owning role, and you must have the CREATEDB privilege. Note that superusers have all these privileges automatically. |
| _new\_user_ | The new owner you want to assign to the database. |
@@ -38,7 +38,7 @@ ALTER DATABASE database_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **RENAME TO** | This clause changes the name of the database. Only the database owner or a superuser can rename a database; non-superuser owners must also have the CREATEDB privilege. The current database cannot be renamed. (Connect to a different database if you need to do that.) |
| _new\_name_ | The new name of the database. |
diff --git a/sql/commands/sql-alter-function.mdx b/sql/commands/sql-alter-function.mdx
index 1605279a..42d1bc13 100644
--- a/sql/commands/sql-alter-function.mdx
+++ b/sql/commands/sql-alter-function.mdx
@@ -22,7 +22,7 @@ ALTER FUNCTION function( argument_type [, ...] )
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------ |
| **SET SCHEMA** | This clause changes the schema of the function. To change a function's schema, you must also have CREATE privilege on the new schema. |
| _schema\_name_ | Specify the schema to which you want to change. |
diff --git a/sql/commands/sql-alter-index.mdx b/sql/commands/sql-alter-index.mdx
index 3b731cff..8ca8aa1e 100644
--- a/sql/commands/sql-alter-index.mdx
+++ b/sql/commands/sql-alter-index.mdx
@@ -22,7 +22,7 @@ ALTER INDEX index_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **RENAME TO** | This clause changes the name of the index. If the index is associated with a table constraint (either UNIQUE, PRIMARY KEY, or EXCLUDE), the constraint is renamed as well. There is no effect on the stored data. |
| _new\_name_ | The new name of the index. |
diff --git a/sql/commands/sql-alter-materialized-view.mdx b/sql/commands/sql-alter-materialized-view.mdx
index 099ba5c1..5426af31 100644
--- a/sql/commands/sql-alter-materialized-view.mdx
+++ b/sql/commands/sql-alter-materialized-view.mdx
@@ -28,7 +28,7 @@ ALTER MATERIALIZED VIEW materialized_view_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------ |
+| :------------------ | :----------------------------------------------------------------------------------------------------------------------------------- |
| **OWNER TO** | This clause changes the owner of the materialized view. Note that this will cascadingly change all related internal objects as well. |
| _new\_user_ | The new owner you want to assign to the materialized view. |
@@ -45,7 +45,7 @@ ALTER MATERIALIZED VIEW materialized_view_name
```
| Parameter or clause | Description |
-| ------------------- | -------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------- |
| **SET SCHEMA** | This clause moves the materialized view to a different schema. |
| _schema\_name_ | The name of the schema to which the materialized view will be moved. |
@@ -62,7 +62,7 @@ SET PARALLELISM = parallelism_number;
```
| Parameter or clause | Description |
-| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **SET PARALLELISM** | This clause controls the degree of [parallelism](/docs/current/key-concepts/#parallelism) for the targeted [streaming job](/docs/current/key-concepts/#streaming-jobs). |
| _parallelism\_number_ | This parameter can be ADAPTIVE or a fixed number, like 1, 2, 3, etc. Altering the parameter to ADAPTIVE will expand the streaming job's degree of parallelism to encompass all available units, whereas setting it to a fixed number will lock the job's parallelism at that specific figure. Setting it to 0 is equivalent to ADAPTIVE. |
@@ -79,7 +79,7 @@ ALTER MATERIALIZED VIEW materialized_view_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------ |
+| :------------------ | :----------------------------------------------------- |
| **RENAME TO** | This clause changes the name of the materialized view. |
| _new\_name_ | The new name of the materialized view. |
diff --git a/sql/commands/sql-alter-schema.mdx b/sql/commands/sql-alter-schema.mdx
index 84577513..410568bb 100644
--- a/sql/commands/sql-alter-schema.mdx
+++ b/sql/commands/sql-alter-schema.mdx
@@ -22,7 +22,7 @@ ALTER SCHEMA current_schema_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **OWNER TO** | This clause changes the owner of the schema. To alter the owner, you must be able to SET ROLE to the new owning role, and you must have the CREATEDB privilege. Note that superusers have all these privileges automatically. |
| _new\_user_ | The new owner you want to assign to the schema. |
@@ -39,7 +39,7 @@ ALTER SCHEMA current_schema_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **RENAME TO** | This clause changes the name of the schema. To rename a schema you must also have the CREATE privilege for the database. Note that superusers have the privilege automatically. |
| _new\_name_ | The new name of the schema. |
diff --git a/sql/commands/sql-alter-sink.mdx b/sql/commands/sql-alter-sink.mdx
index c5fd41d7..ba829f01 100644
--- a/sql/commands/sql-alter-sink.mdx
+++ b/sql/commands/sql-alter-sink.mdx
@@ -28,7 +28,7 @@ ALTER SINK sink_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------ |
| **OWNER TO** | This clause changes the owner of the sink. This will cascadingly change all related internal-objects as well. |
| _new\_user_ | The new owner you want to assign to the sink. |
@@ -45,7 +45,7 @@ ALTER SINK sink_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------ |
| **SET SCHEMA** | This clause moves the sink to a different schema. |
| _schema\_name_ | The name of the schema to which the sink will be moved. |
@@ -62,7 +62,7 @@ SET PARALLELISM = parallelism_number;
```
| Parameter or clause | Description |
-| --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **SET PARALLELISM** | This clause controls the degree of [parallelism](/docs/current/key-concepts/#parallelism) for the targeted [streaming job](/docs/current/key-concepts/#streaming-jobs). |
| _parallelism\_number_ | This parameter can be ADAPTIVE or a fixed number, like 1, 2, 3, etc. Altering the parameter to ADAPTIVE will expand the streaming job's degree of parallelism to encompass all available units, whereas setting it to a fixed number will lock the job's parallelism at that specific figure. Setting it to 0 is equivalent to ADAPTIVE. |
@@ -79,7 +79,7 @@ ALTER SINK sink_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------- |
+| :------------------ | :---------------------------------------- |
| **RENAME TO** | This clause changes the name of the sink. |
| _new\_name_ | The new name of the sink. |
diff --git a/sql/commands/sql-alter-source.mdx b/sql/commands/sql-alter-source.mdx
index fcd6f1fd..3a479ef9 100644
--- a/sql/commands/sql-alter-source.mdx
+++ b/sql/commands/sql-alter-source.mdx
@@ -22,7 +22,7 @@ ALTER SOURCE source_name
```
| Parameter or clause | Description |
-| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **ADD COLUMN** | This clause adds a column to the specified source. |
| _col\_name_ | The name of the new column you want to add to the source. |
| _data\_type_ | The data type of the newly added column. With the struct data type, you can create a nested table. Elements in a nested table need to be enclosed with angle brackets (\<>). |
@@ -33,13 +33,11 @@ ALTER SOURCE src1
ADD COLUMN v3 int;
```
-
-**NOTE**
-
+
* To alter columns in a source created with a schema registry, see [FORMAT and ENCODE options](/docs/current/sql-alter-source/#format-and-encode-options).
* You cannot add a primary key column to a source or table in RisingWave. To modify the primary key of a source or table, you need to recreate the table.
* You cannot remove a column from a source in RisingWave. If you intend to remove a column from a source, you'll need to drop the source and create the source again.
-
+
### `RENAME TO`
@@ -49,7 +47,7 @@ ALTER SOURCE source_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------- |
+| :------------------ | :------------------------------------------ |
| **RENAME TO** | This clause changes the name of the source. |
| _new\_source\_name_ | The new name of the source. |
@@ -67,7 +65,7 @@ ALTER SOURCE current_source_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------------- |
+| :------------------ | :---------------------------------------------- |
| **OWNER TO** | This clause changes the owner of the source. |
| _new\_user_ | The new owner you want to assign to the source. |
@@ -84,7 +82,7 @@ ALTER SOURCE current_source_name
```
| Parameter or clause | Description |
-| ------------------- | --------------------------------------------------------- |
+| :------------------ | :-------------------------------------------------------- |
| **SET SCHEMA** | This clause moves the source to a different schema. |
| _schema\_name_ | The name of the schema to which the source will be moved. |
@@ -127,13 +125,11 @@ ALTER SOURCE src_user FORMAT PLAIN ENCODE PROTOBUF(
);
```
-
-**NOTE**
-
+
Currently, it is not supported to modify the `data_format` and `data_encode`. Furthermore, when refreshing the schema registry of a source, it is not allowed to drop columns or change types.
Another way of refreshing the schema is using the [REFRESH SCHEMA clause](#refresh-schema).
-
+
### `REFRESH SCHEMA`
diff --git a/sql/commands/sql-alter-table.mdx b/sql/commands/sql-alter-table.mdx
index 1360bb8b..9a9aed29 100644
--- a/sql/commands/sql-alter-table.mdx
+++ b/sql/commands/sql-alter-table.mdx
@@ -22,7 +22,7 @@ ALTER TABLE table_name
```
| Parameter or clause | Description |
-| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **ADD \[ COLUMN \]** | This clause adds a new column to the table. COLUMN is optional. |
| _column\_name_ | Specify the name of the column you want to add. |
| _data\_type_ | The data type of the new column. |
@@ -34,12 +34,10 @@ ALTER TABLE table_name
ALTER TABLE employees ADD age int;
```
-
-**NOTE**
-
+
* If your table is defined with a schema registry, its columns can not be altered.
* Columns added by this command cannot be used by any existing materialized views or indexes. You must create new materialized views or indexes to reference it.
-
+
### `DROP COLUMN`
@@ -49,7 +47,7 @@ ALTER TABLE table_name
```
| Parameter or clause | Description |
-| --------------------- | ------------------------------------------------------------------------------------------ |
+| :-------------------- | :----------------------------------------------------------------------------------------- |
| **DROP \[ COLUMN \]** | This clause drops an existing column from a table. COLUMN is optional. |
| **IF EXISTS** | Do not return an error if the specified column does not exist. A notice is issued instead. |
| _column\_name_ | Specify the column you want to remove. |
@@ -59,13 +57,11 @@ ALTER TABLE table_name
ALTER TABLE employees DROP fax;
```
-
-**NOTE**
-
+
* If your table is defined with a schema registry, you can only change the table schema by `ALTER TABLE t REFRESH SCHEMA`. One exception is you can drop the generated columns even if the schema is defined with a schema registry. Note that dropping these generated columns will trigger a schema refresh.
* You cannot drop columns referenced by materialized views or indexes.
* To drop a column referenced by a generated column, you must first drop the generated column.
-
+
### `OWNER TO`
@@ -75,7 +71,7 @@ ALTER TABLE table_name
```
| Parameter or clause | Description |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **OWNER TO** | This clause changes the owner of the table to the specified user. It will cascadingly change all related internal objects as well, and the associated indexes will be changed too. |
| _new\_user_ | Specify the user you want to assign to the table. |
@@ -94,7 +90,7 @@ ALTER TABLE table_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------- |
| **SET SCHEMA** | This clause moves the table into another schema. Associated indexes, constraints, and sequences owned by table columns are moved as well. |
| _schema\_name_ | Specify the schema to which the table will be moved. |
@@ -111,7 +107,7 @@ SET PARALLELISM = parallelism_number;
```
| Parameter or clause | Description |
-| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **SET PARALLELISM** | This clause controls the degree of [parallelism](/docs/current/key-concepts/#parallelism) for the targeted [streaming job](/docs/current/key-concepts/#streaming-jobs). |
| _parallelism\_number_ | This parameter can be ADAPTIVE or a fixed number, like 1, 2, 3, etc. Altering the parameter to ADAPTIVE will expand the streaming job's degree of parallelism to encompass all available units, whereas setting it to a fixed number will lock the job's parallelism at that specific figure. Setting it to 0 is equivalent to ADAPTIVE. After setting the parallelism, the parallelism status of a table can be observed within the internal [rw\_table\_fragments](/docs/current/view-configure-runtime-parameters/) table or the [rw\_fragments](/docs/current/view-configure-runtime-parameters/)table. |
@@ -179,7 +175,7 @@ ALTER TABLE table_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------ |
+| :------------------ | :----------------------------------------- |
| **RENAME TO** | This clause changes the name of the table. |
| _new\_name_ | The new name of the table. |
@@ -202,9 +198,9 @@ This command alters the schema registry of a table created with connectors.
ALTER TABLE t_user REFRESH SCHEMA;
```
-
+
If a downstream fragment references a column that is either missing or has undergone a type change in the updated schema, the command will be declined.
-
+
### `SET SOURCE_RATE_LIMIT`
diff --git a/sql/commands/sql-alter-user.mdx b/sql/commands/sql-alter-user.mdx
index 3f55741c..ace8cdf5 100644
--- a/sql/commands/sql-alter-user.mdx
+++ b/sql/commands/sql-alter-user.mdx
@@ -27,18 +27,16 @@ ALTER USER user_name WITH oauth (
## Parameters
| Parameter or clause | Description |
-| -------------------- | ----------------------------------------------------------------------------------------------------------------------- |
+| :------------------- | :---------------------------------------------------------------------------------------------------------------------- |
| _user\_name_ | The name of the user to be modified. |
| _new\_user\_name_ | The new name of the user. |
| _system\_permission_ | See [the options for system permissions of the CREATE USER command](/docs/current/sql-create-user/#system-permissions). |
For the alter user authentication method, the `jwks_url` and `issuer` parameters are mandatory. On the other hand, `other_params_should_match` is an optional parameter that will be validated against `jwt.claims`. Ensure that all keys in the options are in **lowercase**.
-
-**NOTE**
-
+
`kid` and `alg` are required in the header of JWT, and `kid` is also required in the JWKs returned by the JWKS server. All parameters set in user creation (except `jwks_url`) will be checked in the claims of JWT. Any mismatch will deny the login process.
-
+
## Examples
diff --git a/sql/commands/sql-alter-view.mdx b/sql/commands/sql-alter-view.mdx
index 48d195ee..a3cdb06c 100644
--- a/sql/commands/sql-alter-view.mdx
+++ b/sql/commands/sql-alter-view.mdx
@@ -22,7 +22,7 @@ ALTER VIEW view_name
```
| Parameter or clause | Description |
-| ------------------- | --------------------------------------------- |
+| :------------------ | :-------------------------------------------- |
| **OWNER TO** | This clause changes the owner of the view. |
| _new\_user_ | The new owner you want to assign to the view. |
@@ -39,7 +39,7 @@ ALTER VIEW view_name
```
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------ |
| **SET SCHEMA** | This clause moves the view to a different schema. |
| _schema\_name_ | The name of the schema to which the view will be moved. |
@@ -56,7 +56,7 @@ ALTER VIEW view_name
```
| Parameter or clause | Description |
-| ------------------- | ----------------------------------------- |
+| :------------------ | :---------------------------------------- |
| **RENAME TO** | This clause changes the name of the view. |
| _new\_name_ | The new name of the view. |
diff --git a/sql/commands/sql-comment-on.mdx b/sql/commands/sql-comment-on.mdx
index f1a9e365..b21b16b2 100644
--- a/sql/commands/sql-comment-on.mdx
+++ b/sql/commands/sql-comment-on.mdx
@@ -14,7 +14,7 @@ COMMENT ON . IS
## Parameters
| Parameter | Notes |
-| ----------------------------- | ---------------------------------------------------------------------------------------------------------- |
+| :---------------------------- | :--------------------------------------------------------------------------------------------------------- |
| _object\_type_ | Type of the object that you want to add comments to. Allowed values: TABLE, COLUMN. |
| _relation\_name.object\_name_ | Name of the object that you want to add comments to. For columns, you also need to specify the table name. |
| _comment_ | Comment that you want to add. |
diff --git a/sql/commands/sql-create-aggregate.mdx b/sql/commands/sql-create-aggregate.mdx
index 762dc0b0..a6188e7e 100644
--- a/sql/commands/sql-create-aggregate.mdx
+++ b/sql/commands/sql-create-aggregate.mdx
@@ -15,7 +15,7 @@ CREATE AGGREGATE function_name ( argument_type [, ...] )
### Parameters
| Parameter or clause | Description |
-| -------------------------- | ----------------------------------------------------------------------------------------------------- |
+| :------------------------- | :---------------------------------------------------------------------------------------------------- |
| _function\_name_ | The name of the aggregate function that you want to declare in RisingWave. |
| _argument\_type_ | The data type of the input parameter(s) that the function expects to receive. |
| **RETURNS** _return\_type_ | The data type of the return value from the aggregate function. |
diff --git a/sql/commands/sql-create-connection.mdx b/sql/commands/sql-create-connection.mdx
index 5def6f28..e9193aaf 100644
--- a/sql/commands/sql-create-connection.mdx
+++ b/sql/commands/sql-create-connection.mdx
@@ -19,18 +19,16 @@ WITH (
All WITH options are required unless stated otherwise.
| Parameter or clause | Description |
-| ------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------- |
| _connection\_name_ | The name of the connection to be created. |
| type | The type of connection. |
| provider | The provider of the connection. |
| service.name | The service name of the endpoint service. |
| tags | Optional. The AWS tags used to check for resource leakage. This parameter should have the format: key1=value1, key2=value2, .... |
-
-**NOTE**
-
+
You can either tag the VPC endpoints by specifying the `tags` parameter when using the `CREATE CONNECTION` command or by specifying the environment variable `RW_PRIVATELINK_ENDPOINT_DEFAULT_TAGS`. When specifying the tags, follow the format of `key1=value1, key2=value2, ...`. If both are specified, the tags specified in the environment variable will be appended to the ones specified by the `tags` parameter.
-
+
## Example
diff --git a/sql/commands/sql-create-database.mdx b/sql/commands/sql-create-database.mdx
index 7e764896..dc569f6d 100644
--- a/sql/commands/sql-create-database.mdx
+++ b/sql/commands/sql-create-database.mdx
@@ -13,7 +13,7 @@ CREATE DATABASE [ IF NOT EXISTS ] database_name
## Parameters
| Parameter or clause | Description |
-| ------------------------ | --------------------------------------------------------------------------------------------- |
+| :----------------------- | :-------------------------------------------------------------------------------------------- |
| _database_name_ | The name of the database to be created. |
| **IF NOT EXISTS** clause | Creates a database if the database name has not already been used. Otherwise throws an error. |
| **OWNER [=] user_name** clause | Specifies which user owns the database to be created. |
@@ -25,9 +25,7 @@ CREATE DATABASE IF NOT EXISTS travel
WITH OWNER = travel_admin;
```
-
-**NOTE**
-
+
Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/).
-
+
diff --git a/sql/commands/sql-create-function.mdx b/sql/commands/sql-create-function.mdx
index 0af52e8d..97f86a2a 100644
--- a/sql/commands/sql-create-function.mdx
+++ b/sql/commands/sql-create-function.mdx
@@ -24,7 +24,7 @@ CREATE FUNCTION function_name ( argument_type [, ...] )
### Parameters
| Parameter or clause | Description |
-| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _function\_name_ | The name of the UDF that you want to declare in RisingWave. |
| _argument\_type_ | The data type of the input parameter(s) that the UDF expects to receive. |
| **RETURNS** _return\_type_ | Use this if the function returns a single value (i.e., scalar). It specifies the data type of the return value from the UDF.The struct type, which can contain multiple values, is supported. But the field names must be consistent between the programming language and SQL definitions, or it will be considered a type mismatch. |
diff --git a/sql/commands/sql-create-index.mdx b/sql/commands/sql-create-index.mdx
index 0d1b93b5..c45d0a15 100644
--- a/sql/commands/sql-create-index.mdx
+++ b/sql/commands/sql-create-index.mdx
@@ -14,7 +14,7 @@ CREATE INDEX [ IF NOT EXISTS ] index_name ON object_name ( index_column [ ASC |
## Parameters
| Parameter or clause | Description |
-| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :------------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **IF NOT EXISTS** | This clause is used to check if an index with the specified name already exists before creating a new index. If the index already exists, the clause prevents an error from occurring and the index creation operation is skipped. A notice is issued in this case. Note that there is no guarantee that the existing index is anything like the one that would have been created. Index name is required when IF NOT EXISTS is specified. |
| _index\_name_ | The name of the index to be created. |
| _object\_name_ | The name of the table or materialized view where the index is created. |
diff --git a/sql/commands/sql-create-mv.mdx b/sql/commands/sql-create-mv.mdx
index ee45bfd2..3a7bb661 100644
--- a/sql/commands/sql-create-mv.mdx
+++ b/sql/commands/sql-create-mv.mdx
@@ -20,21 +20,17 @@ To perform the operations in the background, you can execute `SET BACKGROUND_DDL
## Parameters
| Parameter or clause | Description |
-| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _mv\_name_ | The name of the materialized view to be created. |
| _select\_query_ | A SELECT query that retrieves data for the materialized view. See [SELECT](/docs/current/sql-select/) for the syntax and examples of the SELECT command. |
-
-**NOTE**
-
+
Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/).
-
-
-
-**NOTE**
+
+
The `ORDER BY` clause in the `CREATE MATERIALIZED VIEW` statement is allowed but not considered as part of the definition of the materialized view. It's only used in the initial creation of the materialized view and not during refreshes.
-
+
## Examples
diff --git a/sql/commands/sql-create-schema.mdx b/sql/commands/sql-create-schema.mdx
index ff6b47e9..74872311 100644
--- a/sql/commands/sql-create-schema.mdx
+++ b/sql/commands/sql-create-schema.mdx
@@ -14,7 +14,7 @@ CREATE SCHEMA [ IF NOT EXISTS ] AUTHORIZATION user_name;
## Parameters
| Parameter or clause | Description |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ |
+| :----------------------- | :----------------------------------------------------------------------------------------------------------------------------------- |
| _schema\_name_ | The name of the schema to be created. |
| **IF NOT EXISTS** clause | Creates a schema if the schema name has not already been used. Otherwise throws an error. |
| _database\_name_ | The name of the database for the schema to be created in. If not specified, the schema will be created in the default database dev. |
@@ -27,12 +27,10 @@ CREATE SCHEMA [ IF NOT EXISTS ] AUTHORIZATION user_name;
CREATE SCHEMA IF NOT EXISTS schema_1;
```
-
-**NOTE**
-
+
Names and unquoted identifiers are case-insensitive. Therefore, you must double-quote any of these fields for them to be case-sensitive. See also [Identifiers](/docs/current/sql-identifiers/).
-
+
```sql Examples of AUTHORIZATION clause
diff --git a/sql/commands/sql-create-secret.mdx b/sql/commands/sql-create-secret.mdx
index f3a08cb3..8849c3d6 100644
--- a/sql/commands/sql-create-secret.mdx
+++ b/sql/commands/sql-create-secret.mdx
@@ -12,7 +12,7 @@ CREATE SECRET secret_name WITH ( backend = 'meta') AS 'your_secret';
## Parameters
| Parameter or Clause | Description |
-| ------------------- | ----------------------------------------------------------------------------------------------------- |
+| :------------------ | :---------------------------------------------------------------------------------------------------- |
| _secret\_name_ | The name of the secret to be created. This should be a unique identifier within the system. |
| _backend_ | Specifies the backend where the secret will be stored. Currently, only the meta backend is supported. |
| _your\_secret_ | The secret value that you wish to store securely. |
diff --git a/sql/commands/sql-create-sink-into.mdx b/sql/commands/sql-create-sink-into.mdx
index 59728f8f..a8f4a085 100644
--- a/sql/commands/sql-create-sink-into.mdx
+++ b/sql/commands/sql-create-sink-into.mdx
@@ -13,21 +13,17 @@ CREATE SINK [ IF NOT EXISTS ] sink_name INTO table_name [ ( col_name [ , ... ] )
## Parameters
| Parameter or clause | Description |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| :------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| sink\_name | The name of the sink. If a schema name is given (for example, CREATE SINK\.\ ...), then the sink is created in the specified schema. Otherwise it is created in the current schema. |
| col\_name | The corresponding table columns in the sink result. For those columns not listed, it will be inserted as the default value defined in the table. |
-
-**NOTE**
-
+
A table without a primary key can only accept the append-only sink.
-
-
-
-**NOTE**
+
+
Currently, if there are sinks in the table, the table cannot be altered to add or drop columns.
-
+
## Examples
diff --git a/sql/commands/sql-create-sink.mdx b/sql/commands/sql-create-sink.mdx
index eaf27e6b..97aef14d 100644
--- a/sql/commands/sql-create-sink.mdx
+++ b/sql/commands/sql-create-sink.mdx
@@ -23,7 +23,7 @@ WITH (
## Parameters
| Parameter | Description |
-| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _sink\_name_ | The name of the sink. |
| _col\_name_ | The name of the column. |
| _sink\_from_ | Specify the direct data source for output. It can be a materialized view or a table. |
@@ -32,11 +32,9 @@ WITH (
| **WITH** clause | Specify the connector settings here if trying to store all the sink data. See [Supported sinks](#supported-sinks) for the full list of supported sink as well as links to specific connector pages detailing the syntax for each sink. |
| **FORMAT** and **ENCODE** options | Optional. Specify the data format and the encoding format of the sink data. It is only used for Kafka, Kinesis, Pulsar, and Redis sinks. |
-
-**NOTE**
-
+
Please distinguish between the parameters set in the FORMAT and ENCODE options and those set in the WITH clause. Ensure that you place them correctly and avoid any misuse.
-
+
## Supported sinks
@@ -95,10 +93,8 @@ Click a sink name to see the SQL syntax, options, and sample statement of sinkin
-
-**NOTE**
-
+
Timestamptz values are stored in UTC.
When sinking downstream, the representation of timestamptz is configurable. By default, it is in the format `2023-11-11T18:30:09.453000Z`.
-
+
diff --git a/sql/commands/sql-create-source.mdx b/sql/commands/sql-create-source.mdx
index 723efe3c..c6b4b287 100644
--- a/sql/commands/sql-create-source.mdx
+++ b/sql/commands/sql-create-source.mdx
@@ -53,16 +53,14 @@ FORMAT upsert ENCODE AVRO (
);
```
-
-**NOTE**
-
+
The generated column is created in RisingWave and will not be accessed through the external connector. Therefore, if the external upstream system has a schema, it does not need to include the generated column within the table's schema in the external system.
-
+
## Parameter
| Parameter | Description |
-| --------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| :-------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _source\_name_ | The name of the source. If a schema name is given (for example, CREATE SOURCE \.\