diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 0a4c5d2cb5..13770fbcbd 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -84,9 +84,6 @@ ***** xref:deploy:redpanda/manual/production/production-deployment-automation.adoc[] ***** xref:deploy:redpanda/manual/production/production-deployment.adoc[] ***** xref:deploy:redpanda/manual/production/production-readiness.adoc[] -**** xref:deploy:redpanda/manual/high-availability.adoc[High Availability] -**** xref:deploy:redpanda/manual/resilience/shadowing.adoc[Shadowing] -**** xref:deploy:redpanda/manual/resilience/shadowing-guide.adoc[] **** xref:deploy:redpanda/manual/sizing-use-cases.adoc[Sizing Use Cases] **** xref:deploy:redpanda/manual/sizing.adoc[Sizing Guidelines] **** xref:deploy:redpanda/manual/linux-system-tuning.adoc[System Tuning] @@ -179,9 +176,6 @@ *** xref:manage:tiered-storage.adoc[] *** xref:manage:fast-commission-decommission.adoc[] *** xref:manage:mountable-topics.adoc[] -*** xref:manage:remote-read-replicas.adoc[Remote Read Replicas] -*** xref:manage:topic-recovery.adoc[Topic Recovery] -*** xref:manage:whole-cluster-restore.adoc[Whole Cluster Restore] ** xref:manage:iceberg/index.adoc[Iceberg] *** xref:manage:iceberg/about-iceberg-topics.adoc[About Iceberg Topics] *** xref:manage:iceberg/specify-iceberg-schema.adoc[Specify Iceberg Schema] @@ -199,6 +193,21 @@ *** xref:manage:schema-reg/schema-reg-authorization.adoc[Schema Registry Authorization] *** xref:manage:schema-reg/schema-id-validation.adoc[] *** xref:console:ui/schema-reg.adoc[Manage in Redpanda Console] +** xref:deploy:redpanda/manual/high-availability.adoc[High Availability] +** xref:deploy:redpanda/manual/disaster-recovery/index.adoc[Disaster Recovery] +*** xref:deploy:redpanda/manual/disaster-recovery/shadowing/index.adoc[Shadowing] +**** xref:deploy:redpanda/manual/disaster-recovery/shadowing/overview.adoc[Overview] +**** xref:deploy:redpanda/manual/disaster-recovery/shadowing/setup.adoc[Configure Shadowing] +**** xref:deploy:redpanda/manual/disaster-recovery/shadowing/monitor.adoc[Monitor Shadowing] +**** xref:deploy:redpanda/manual/disaster-recovery/shadowing/failover.adoc[Configure Failover] +**** xref:deploy:redpanda/manual/disaster-recovery/shadowing/failover-runbook.adoc[Failover Runbook] +*** xref:deploy:redpanda/manual/disaster-recovery/whole-cluster-restore.adoc[Whole Cluster Restore] +*** xref:deploy:redpanda/manual/disaster-recovery/topic-recovery.adoc[Topic Recovery] +** xref:deploy:redpanda/manual/remote-read-replicas.adoc[Remote Read Replicas] +** xref:manage:recovery-mode.adoc[Recovery Mode] +** xref:manage:rack-awareness.adoc[Rack Awareness] +** xref:manage:raft-group-reconfiguration.adoc[Raft Group Reconfiguration] +** xref:manage:io-optimization.adoc[] ** xref:manage:console/index.adoc[Redpanda Console] *** xref:console:config/configure-console.adoc[Configure Redpanda Console] *** xref:console:config/enterprise-license.adoc[Add an Enterprise License] @@ -212,12 +221,8 @@ *** xref:console:config/topic-documentation.adoc[Topic Documentation] *** xref:console:config/analytics.adoc[Telemetry] *** xref:console:config/kafka-connect.adoc[Kafka Connect] -** xref:manage:recovery-mode.adoc[Recovery Mode] -** xref:manage:rack-awareness.adoc[Rack Awareness] -** xref:manage:monitoring.adoc[] -** xref:manage:io-optimization.adoc[] -** xref:manage:raft-group-reconfiguration.adoc[Raft Group Reconfiguration] ** xref:manage:use-admin-api.adoc[Use the Admin API] +** xref:manage:monitoring.adoc[] * xref:upgrade:index.adoc[Upgrade] ** xref:upgrade:rolling-upgrade.adoc[Upgrade Redpanda in Linux] ** xref:upgrade:k-rolling-upgrade.adoc[Upgrade Redpanda in Kubernetes] diff --git a/modules/deploy/pages/console/linux/deploy.adoc b/modules/deploy/pages/console/linux/deploy.adoc index 002d7ec75b..7d93b9256f 100644 --- a/modules/deploy/pages/console/linux/deploy.adoc +++ b/modules/deploy/pages/console/linux/deploy.adoc @@ -7,7 +7,7 @@ This page shows you how to deploy Redpanda Console on Linux using Docker or the == Prerequisites -* You must have a running Redpanda or Kafka cluster available to connect to. Redpanda Console requires a cluster to function. For instructions on deploying a Redpanda cluster, see xref:deploy:redpanda/manual/index.adoc[]. +* You must have a running Redpanda or Kafka cluster available to connect to. Redpanda Console requires a cluster to function. For instructions on deploying a Redpanda cluster, see xref:deploy:redpanda/manual/production/index.adoc[]. * Review the xref:deploy:console/linux/requirements.adoc[system requirements for Redpanda Console on Linux]. == Deploy with Docker diff --git a/modules/deploy/pages/redpanda/manual/disaster-recovery/index.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/index.adoc new file mode 100644 index 0000000000..9961ccabea --- /dev/null +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/index.adoc @@ -0,0 +1,5 @@ += Disaster Recovery +:description: Set up disaster recovery for Redpanda clusters using Shadowing for cross-region replication. +:env-linux: true +:page-layout: index +:page-categories: Management, High Availability, Disaster Recovery \ No newline at end of file diff --git a/modules/deploy/pages/redpanda/manual/resilience/shadowing-guide.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/failover-runbook.adoc similarity index 97% rename from modules/deploy/pages/redpanda/manual/resilience/shadowing-guide.adoc rename to modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/failover-runbook.adoc index 76a131adf1..be0fea01e1 100644 --- a/modules/deploy/pages/redpanda/manual/resilience/shadowing-guide.adoc +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/failover-runbook.adoc @@ -1,12 +1,10 @@ -= Shadowing Guide += Failover Runbook :description: Step-by-step emergency guide for failing over Redpanda shadow links during disasters. +:page-aliases: deploy:redpanda/manual/resilience/shadowing-guide.adoc :env-linux: true :page-categories: Management, High Availability, Disaster Recovery, Emergency Response -[NOTE] -==== include::shared:partial$enterprise-license.adoc[] -==== This guide provides step-by-step procedures for emergency failover when your primary Redpanda cluster becomes unavailable. Follow these procedures only during active disasters when immediate failover is required. @@ -14,7 +12,7 @@ This guide provides step-by-step procedures for emergency failover when your pri [IMPORTANT] ==== -This is an emergency procedure. For planned failover testing or day-to-day shadow link management, see xref:deploy:redpanda/manual/resilience/shadowing.adoc[]. Ensure you have completed the xref:deploy:redpanda/manual/resilience/shadowing.adoc#disaster-readiness-checklist[disaster readiness checklist] before an emergency occurs. +This is an emergency procedure. For planned failover testing or day-to-day shadow link management, see xref:./failover.adoc[]. Ensure you have completed the disaster readiness checklist in xref:./overview.adoc#disaster-readiness-checklist[] before an emergency occurs. ==== == Emergency failover procedure diff --git a/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/failover.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/failover.adoc new file mode 100644 index 0000000000..5ebf1148b2 --- /dev/null +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/failover.adoc @@ -0,0 +1,148 @@ += Configure Failover +:description: Learn how failover can transform shadow topics into fully writable resources during disasters. +:page-categories: Management, High Availability, Disaster Recovery + +include::shared:partial$enterprise-license.adoc[] + +include::shared:partial$emergency-shadowing-callout.adoc[] + +Failover is the process of modifying shadow topics or an entire shadow cluster from read-only replicas to fully writable resources, and ceasing replication from the source cluster. You can fail over individual topics for selective workload migration or fail over the entire cluster for comprehensive disaster recovery. This critical operation transforms your shadow resources into operational production assets, allowing you to redirect application traffic when the source cluster becomes unavailable. + +== Failover behavior + +When you initiate failover, Redpanda performs the following operations: + +1. **Stops replication**: Halts all data fetching from the source cluster for the specified topics or entire shadow link +2. **Failover topics**: Converts read-only shadow topics into regular, writable topics +3. **Updates topic state**: Changes topic status from `ACTIVE` to `FAILING_OVER`, then `FAILED_OVER` + +Topic failover is irreversible. Once failed over, topics cannot return to shadow mode, and automatic fallback to the original source cluster is not supported. + +== Failover commands + +You can perform failover at different levels of granularity to match your disaster recovery needs: + +=== Individual topic failover + +To fail over a specific shadow topic while leaving other topics in the shadow link still replicating: + +[,bash] +---- +rpk shadow failover --topic +---- + +Use this approach when you need to selectively failover specific workloads or when testing failover procedures. + +=== Complete shadow link failover (cluster failover) + +To fail over all shadow topics associated with the shadow link simultaneously: + +[,bash] +---- +rpk shadow failover --all +---- + +Use this approach during a complete regional disaster when you need to activate the entire shadow cluster as your new production environment. + +=== Force delete shadow link (emergency failover) + +[,bash] +---- +rpk shadow delete --force +---- + +[WARNING] +==== +Force deleting a shadow link is irreversible and immediately fails over all topics in the link, bypassing the normal failover state transitions. This action should only be used as a last resort when topics are stuck in transitional states and you need immediate access to all replicated data. +==== + +== Failover states + +=== Shadow link states + +The shadow link itself has a simple state model: + +* **`ACTIVE`**: Shadow link is operating normally, replicating data + +Shadow links do not have dedicated failover states. Instead, the link's operational status is determined by the collective state of its shadow topics. + +=== Shadow topic states + +Individual shadow topics progress through specific states during failover: + +* **`ACTIVE`**: Normal replication state before failover +* **`FAULTED`**: Shadow topic has encountered an error and is not replicating +* **`FAILING_OVER`**: Failover initiated, replication stopping +* **`FAILED_OVER`**: Failover completed successfully, topic fully writable + +== Monitor failover progress + +Monitor failover progress using the status command: + +[,bash] +---- +rpk shadow status +---- + +The output shows individual topic states and any issues encountered during the failover process. + +**Task states during monitoring:** + +* **`ACTIVE`**: Task is operating normally and replicating data +* **`FAULTED`**: Task encountered an error and requires attention +* **`NOT_RUNNING`**: Task is not currently executing +* **`LINK_UNAVAILABLE`**: Task cannot communicate with the source cluster + + +== Post-failover cluster behavior + +After successful failover, your shadow cluster exhibits the following characteristics: + +**Topic accessibility:** + +* Failed over topics become fully writable and readable. +* Applications can produce and consume messages normally. +* All Kafka APIs are available for failedover topics. +* Original offsets and timestamps are preserved. + +**Shadow link status:** + +* The shadow link remains but stops replicating data. +* Link status shows topics in `FAILED_OVER` state. +* You can safely delete the shadow link after successful failover. + +**Operational limitations:** + +* No automatic fallback mechanism to the original source cluster. +* Data transforms remain disabled until you manually re-enable them. +* Audit log history from the source cluster is not available (new audit logs begin immediately). + +== Failover considerations and limitations + +**Data consistency:** + +* Some data loss may occur due to replication lag at the time of failover. +* Consumer group offsets are preserved, allowing applications to resume from their last committed position. +* In-flight transactions at the source cluster are not replicated and will be lost. + +**Recovery-point-objective (RPO):** + +The amount of potential data loss depends on replication lag when disaster occurs. Monitor lag metrics to understand your effective RPO. + +**Network partitions:** + +If the source cluster becomes accessible again after failover, do not attempt to write to both clusters simultaneously. This creates a scenario with potential data inconsistencies, since metadata starts to diverge. + +**Testing requirements:** + +Regularly test failover procedures in non-production environments to validate your disaster recovery processes and measure RTO. + +== Next steps + +After completing failover: + +* Update your application connection strings to point to the shadow cluster +* Verify that applications can produce and consume messages normally +* Consider deleting the shadow link if failover was successful and permanent + +For emergency situations, see xref:./failover-runbook.adoc[Failover Runbook]. \ No newline at end of file diff --git a/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/index.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/index.adoc new file mode 100644 index 0000000000..280d0efcba --- /dev/null +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/index.adoc @@ -0,0 +1,6 @@ += Shadowing +:description: Set up disaster recovery for Redpanda clusters using Shadowing for cross-region replication. +:env-linux: true +:page-layout: index +:page-categories: Management, High Availability, Disaster Recovery + diff --git a/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/monitor.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/monitor.adoc new file mode 100644 index 0000000000..ca2c8c8bd7 --- /dev/null +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/monitor.adoc @@ -0,0 +1,115 @@ += Monitor Shadowing +:description: Monitor Shadowing health with status commands, metrics, and best practices for tracking replication performance. +:page-categories: Management, Monitoring, Disaster Recovery + +include::shared:partial$enterprise-license.adoc[] + +Monitor your shadow links to ensure proper replication performance and understand your disaster recovery readiness. Use `rpk` commands, metrics, and status information to track shadow link health and troubleshoot issues. + +== Status commands + +List existing shadow links: + +[,bash] +---- +rpk shadow list +---- + +View shadow link configuration details: + +[,bash] +---- +rpk shadow describe +---- + +This command shows the complete configuration of the shadow link, including connection settings, filters, and synchronization options. + +Check your shadow link status to ensure proper operation: + +[,bash] +---- +rpk shadow status +---- + +**Status command options:** + +[,bash] +---- +rpk shadow status +---- + +For troubleshooting specific issues, you can use command options to show individual status sections. See the rpk reference for available status options. + +The status output includes: + +* **Shadow link state**: Overall operational state (`ACTIVE`) +* **Individual topic states**: Current state of each replicated topic (`ACTIVE`, `FAULTED`, `FAILING_OVER`, `FAILED_OVER`) +* **Task status**: Health of replication tasks across brokers (`ACTIVE`, `FAULTED`, `NOT_RUNNING`, `LINK_UNAVAILABLE`) +* **Lag information**: Replication lag per partition showing source vs shadow watermarks + +[[shadow-link-metrics]] +== Metrics + +Shadowing provides comprehensive metrics to track replication performance and health: + +[cols="1,1,2"] +|=== +|Metric |Type |Description + +|`redpanda_shadow_link_shadow_lag` +|Gauge +|The lag of the shadow partition against the source partition, calculated as source partition LSO minus shadow partition HWM. Monitor by `shadow_link_name`, `topic`, and `partition` to understand replication lag for each partition. + +|`redpanda_shadow_link_total_bytes_fetched` +|Count +|The total number of bytes fetched by a sharded replicator (bytes received by the client). Labeled by `shadow_link_name` and `shard` to track data transfer volume from the source cluster. + +|`redpanda_shadow_link_total_bytes_written` +|Count +|The total number of bytes written by a sharded replicator (bytes written to the write_at_offset_stm). Uses `shadow_link_name` and `shard` labels to monitor data written to the shadow cluster. + +|`redpanda_shadow_link_client_errors` +|Count +|The number of errors seen by the client. Track by `shadow_link_name` and `shard` to identify connection or protocol issues between clusters. + +|`redpanda_shadow_link_shadow_topic_state` +|Gauge +|Number of shadow topics in the respective states. Labeled by `shadow_link_name` and `state` to monitor topic state distribution across your shadow links. + +|`redpanda_shadow_link_total_records_fetched` +|Count +|The total number of records fetched by the sharded replicator (records received by the client). Monitor by `shadow_link_name` and `shard` to track message throughput from the source. + +|`redpanda_shadow_link_total_records_written` +|Count +|The total number of records written by a sharded replicator (records written to the write_at_offset_stm). Uses `shadow_link_name` and `shard` labels to monitor message throughput to the shadow cluster. +|=== + +See also: xref:reference:public-metrics-reference.adoc[] + +== Monitoring best practices + +=== Health check procedures + +Establish regular monitoring workflows to ensure shadow link health: + +**Health checks:** +[,bash] +---- +# Check all shadow links are active +rpk shadow list | grep -v "ACTIVE" || echo "All shadow links healthy" + +# Monitor lag for critical topics +rpk shadow status | grep -E "LAG|Lag" +---- + +=== Alert thresholds + +Configure monitoring alerts for: + +* **High replication lag**: When `redpanda_shadow_link_shadow_lag` exceeds your RPO requirements +* **Connection errors**: When `redpanda_shadow_link_client_errors` increases rapidly +* **Topic state changes**: When topics move to `FAULTED` state +* **Task failures**: When replication tasks enter `FAULTED` or `NOT_RUNNING` states +* **Link unavailability**: When tasks show `LINK_UNAVAILABLE` indicating source cluster connectivity issues +* **Throughput drops**: When bytes/records fetched drops significantly \ No newline at end of file diff --git a/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/overview.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/overview.adoc new file mode 100644 index 0000000000..681fabd955 --- /dev/null +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/overview.adoc @@ -0,0 +1,77 @@ += Shadowing Overview +:description: Learn about disaster recovery using Shadowing for cross-region replication. +:env-linux: true +:page-categories: Management, High Availability, Disaster Recovery + +include::shared:partial$enterprise-license.adoc[] + +Shadowing is Redpanda's enterprise-grade disaster recovery solution that establishes asynchronous, offset-preserving replication between two distinct Redpanda clusters. A cluster is able to create a dedicated client that continuously replicates source cluster data, including offsets, timestamps, and cluster metadata. This creates a read-only shadow cluster that you can quickly failover to handle production traffic during a disaster. + +include::shared:partial$emergency-shadowing-callout.adoc[] + +Unlike traditional replication tools that re-produce messages, Shadowing copies data at the byte level, ensuring shadow topics contain identical copies of source topics with preserved offsets and timestamps. + +Shadowing replicates: + +* **Topic data**: All records with preserved offsets and timestamps +* **Topic configurations**: Partition counts, retention policies, and other xref:reference:properties/topic-properties.adoc[topic properties] +* **Consumer group offsets**: Enables seamless consumer resumption after failover +* **Access Control Lists (ACLs)**: User permissions and security policies +* **Schema Registry data**: Schema definitions and compatibility settings + +== How Shadowing fits into disaster recovery + +Shadowing addresses enterprise disaster recovery requirements driven by regulatory compliance and business continuity needs. Organizations typically want to minimize both recovery time objective (RTO) and recovery point objective (RPO), and Shadowing asynchronous replication helps you achieve both goals by reducing data loss during regional outages and enabling rapid application recovery. + +The architecture follows an active-passive pattern. The source cluster processes all production traffic while the shadow cluster remains in read-only mode, continuously receiving updates. If a disaster occurs, you can failover the shadow topics using the Admin API or `rpk`, making them fully writable. At that point, you can redirect your applications to the shadow cluster, which becomes the new production cluster. + +Shadowing complements Redpanda's existing availability and recovery capabilities. xref:deploy:redpanda/manual/high-availability.adoc[High availability] actively protects your day-to-day operations, handling reads and writes seamlessly during node or availability zone failures within a region. Shadowing is your safety net for catastrophic regional disasters. While xref:deploy:redpanda/manual/disaster-recovery/whole-cluster-restore.adoc[Whole Cluster Restore] provides point-in-time recovery from xref:manage:tiered-storage.adoc[Tiered Storage], Shadowing delivers near real-time, cross-region replication for mission-critical applications that require rapid failover with minimal data loss. + +// TODO: insert diagram. Possibly with a .gif animation showing cluster Cluster A being written and cluster B being replicated with a data flow arrow and geo-separation. Diagram must show Icons or labels for topics, configurations, offsets, ACLs, schemas that are being copied + +== Limitations + +Shadowing is designed for active-passive disaster recovery scenarios. Each shadow cluster can maintain only one shadow link. + +Shadowing operates exclusively in asynchronous mode and doesn't support active-active configurations. This means there will always be some replication lag. You cannot write to both clusters simultaneously. + +xref:develop:data-transforms/index.adoc[Data transforms] are disabled on shadow clusters while Shadowing is active. During a disaster, xref:manage:audit-logging.adoc[audit log] history from the source cluster is lost, though the shadow cluster begins generating new audit logs immediately after the failover. + +After you failover shadow topics, automatic fallback to the original source cluster is not supported. + +[CAUTION] +==== +Do not modify synced topic properties on shadow topics. These properties revert to source topic values. +==== + +== Setup and Configuration + +Choose your implementation approach: + +* **xref:./setup.adoc[Setup and Configuration]** - Initial shadow configuration, authentication, and topic selection +* **xref:./monitor.adoc[Monitoring and Operations]** - Health checks, lag monitoring, and operational procedures +* **xref:./failover.adoc[Planned Failover]** - Controlled disaster recovery testing and migrations +* **xref:./failover-runbook.adoc[Failover Runbook]** - Rapid disaster response procedures + +== Disaster readiness checklist + +Before a disaster occurs, ensure you have: + +* [ ] Access to shadow cluster administrative credentials +* [ ] Shadow link names and configuration details, and networking documented +* [ ] Application connection strings for the shadow cluster prepared +* [ ] Tested failover procedures in a non-production environment + +== Next steps + +After setting up Shadowing for your Redpanda clusters, consider these additional steps: + +* **Test your disaster recovery procedures**: Regularly practice failover scenarios in a non-production environment. See xref:./failover-runbook.adoc[] for step-by-step disaster procedures. + +* **Monitor shadow link health**: Set up alerting on the metrics described above to ensure early detection of replication issues. + +* **Implement automated failover**: Consider developing automation scripts that can detect outages and initiate failover based on predefined criteria. + +* **Review security policies**: Ensure your ACL filters replicate the appropriate security settings for your disaster recovery environment. + +* **Document your configuration**: Maintain up-to-date documentation of your shadow link configuration, including network settings, authentication details, and filter definitions. \ No newline at end of file diff --git a/modules/deploy/pages/redpanda/manual/resilience/shadowing.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/setup.adoc similarity index 60% rename from modules/deploy/pages/redpanda/manual/resilience/shadowing.adoc rename to modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/setup.adoc index 18bdc004b4..55d30ca932 100644 --- a/modules/deploy/pages/redpanda/manual/resilience/shadowing.adoc +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/shadowing/setup.adoc @@ -1,5 +1,5 @@ -= Shadowing -:description: Set up disaster recovery for Redpanda clusters using Shadowing for cross-region replication. += Shadowing Setup and Configuration +:description: Set up Shadowing for disaster recovery, including cross-region replication, data filters, networking, and authentication. :env-linux: true :page-categories: Management, High Availability, Disaster Recovery @@ -8,45 +8,6 @@ include::shared:partial$enterprise-license.adoc[] ==== -Shadowing is Redpanda's enterprise-grade disaster recovery solution that establishes asynchronous, offset-preserving replication between two distinct Redpanda clusters. A cluster is able to create a dedicated client that continuously replicates source cluster data, including offsets, timestamps, and cluster metadata. This creates a read-only shadow cluster that you can quickly failover to handle production traffic during a disaster. - -include::shared:partial$emergency-shadowing-callout.adoc[] - -Unlike traditional replication tools that re-produce messages, Shadowing copies data at the byte level, ensuring shadow topics contain identical copies of source topics with preserved offsets and timestamps. - -Shadowing replicates: - -* **Topic data**: All records with preserved offsets and timestamps -* **Topic configurations**: Partition counts, retention policies, and other xref:reference:properties/topic-properties.adoc[topic properties] -* **Consumer group offsets**: Enables seamless consumer resumption after failover -* **Access Control Lists (ACLs)**: User permissions and security policies -* **Schema Registry data**: Schema definitions and compatibility settings - -== How Shadowing fits into disaster recovery - -Shadowing addresses enterprise disaster recovery requirements driven by regulatory compliance and business continuity needs. Organizations typically want to minimize both recovery time objective (RTO) and recovery point objective (RPO), and Shadowing asynchronous replication helps you achieve both goals by reducing data loss during regional outages and enabling rapid application recovery. - -The architecture follows an active-passive pattern. The source cluster processes all production traffic while the shadow cluster remains in read-only mode, continuously receiving updates. If a disaster occurs, you can failover the shadow topics using the Admin API or `rpk`, making them fully writable. At that point, you can redirect your applications to the shadow cluster, which becomes the new production cluster. - -Shadowing complements Redpanda's existing availability and recovery capabilities. xref:deploy:redpanda/manual/high-availability.adoc[High availability] actively protects your day-to-day operations, handling reads and writes seamlessly during node or availability zone failures within a region. Shadowing is your safety net for catastrophic regional disasters. While xref:manage:whole-cluster-restore.adoc[Whole Cluster Restore] provides point-in-time recovery from xref:manage:tiered-storage.adoc[Tiered Storage], Shadowing delivers near real-time, cross-region replication for mission-critical applications that require rapid failover with minimal data loss. - -// TODO: insert diagram. Possibly with a .gif animation showing cluster Cluster A being written and cluster B being replicated with a data flow arrow and geo-separation. Diagram must show Icons or labels for topics, configurations, offsets, ACLs, schemas that are being copied - -== Limitations - -Shadowing is designed for active-passive disaster recovery scenarios. Each shadow cluster can maintain only one shadow link. - -Shadowing operates exclusively in asynchronous mode and doesn't support active-active configurations. This means there will always be some replication lag. You cannot write to both clusters simultaneously. - -xref:develop:data-transforms/index.adoc[Data transforms] are disabled on shadow clusters while Shadowing is active. During a disaster, xref:manage:audit-logging.adoc[audit log] history from the source cluster is lost, though the shadow cluster begins generating new audit logs immediately after the failover. - -After you failover shadow topics, automatic fallback to the original source cluster is not supported. - -[CAUTION] -==== -Do not modify synced topic properties on shadow topics. These properties revert to source topic values. -==== - == Prerequisites === License and cluster requirements @@ -124,7 +85,7 @@ By default, Redpanda always syncs the following topic properties: Additional topic properties are replicated only when you explicitly specify them in your `synced_shadow_topic_properties` configuration. Set `exclude_default` to `true` if you want to sync only the properties listed in `synced_shadow_topic_properties`. The filtering system you configure determines the precise scope of replication across all components, allowing you to balance comprehensive disaster recovery with operational efficiency. -=== Set filters +== Set filters Filters determine which resources Shadowing automatically creates when establishing your shadow link. @@ -132,7 +93,7 @@ Topic filters select which topics Shadowing automatically creates as shadow topi Consumer group and ACL filters control which groups and security policies replicate to maintain application functionality. -==== Filter types and patterns +=== Filter types and patterns Each filter uses two key settings: @@ -143,7 +104,7 @@ Each filter uses two key settings: - `INCLUDE`: Replicate items that match the pattern - `EXCLUDE`: Skip items that match the pattern -==== Filter processing rules +=== Filter processing rules Redpanda processes filters in the order you define them with EXCLUDE filters taking precedence. Design your filter lists carefully: @@ -151,7 +112,7 @@ Redpanda processes filters in the order you define them with EXCLUDE filters tak 2. **Order matters for INCLUDE filters**: Among INCLUDE filters, the first match determines the result 3. **Default behavior**: Items that don't match any filter are excluded from replication -==== Common filtering patterns +=== Common filtering patterns **Replicate all topics except test topics:** [,yaml] @@ -189,7 +150,7 @@ group_filters: name: "prod-consumer-" # Include production consumers ---- -==== Schema Registry synchronization +=== Schema Registry synchronization Shadowing can replicate Schema Registry data by shadowing the `_schemas` system topic. When enabled, this provides byte-for-byte replication of schema definitions, versions, and compatibility settings. @@ -209,7 +170,7 @@ schema_registry_sync_options: **Important:** Once the `_schemas` topic becomes a shadow topic, it cannot be stopped without either failing over the topic or deleting it entirely. -==== System topic filtering rules +=== System topic filtering rules Redpanda system topics have the following specific filtering restrictions: @@ -218,7 +179,7 @@ Redpanda system topics have the following specific filtering restrictions: * Wildcard `*` filters will not match topics that start with `_redpanda` or `__redpanda`. * To shadow specific system topics, you must provide explicit literal filters for those individual topics. -==== ACL filtering +=== ACL filtering By default all ACLs are replicated. This is recommended in order to ensure that your shadow cluster has the same permissions as your source cluster. ACL filters should be used with care: @@ -247,7 +208,7 @@ acl_filters: host: "*" # Any host ---- -==== Consumer group filtering and behavior +=== Consumer group filtering and behavior Consumer group filters determine which consumer groups have their offsets replicated to the shadow cluster. By default, all consumer groups are replicated unless you specify filters. @@ -271,7 +232,7 @@ consumer_offset_sync_options: **Offset clamping:** When Redpanda replicates consumer group offsets from the source cluster, offsets are automatically "clamped" during the commit process. If a replicated offset is above the high watermark (HWM) of the shadow partition, Redpanda clamps the offset to the shadow partition's HWM. This ensures offsets remain valid and prevents consumers from seeking beyond available data on the shadow cluster. -==== Schema Registry synchronization +=== Schema Registry synchronization Shadowing can replicate Schema Registry data by shadowing the `_schemas` system topic. When enabled, this provides byte-for-byte replication of schema definitions, versions, and compatibility settings. @@ -290,7 +251,7 @@ schema_registry_sync_options: **Important:** Once the `_schemas` topic becomes a shadow topic, it cannot be stopped without either failing over the topic or deleting it entirely. -==== Starting offset for new shadow topics +=== Starting offset for new shadow topics When a shadow topic is created for the first time, you can control where replication begins on the source topic. This setting only applies to empty shadow partitions and is crucial for disaster recovery planning. @@ -317,7 +278,7 @@ topic_metadata_sync_options: The starting offset only affects **new shadow topics**. Once a shadow topic exists and has data, changing this setting has no effect on that topic's replication. ==== -==== Generate a sample configuration +=== Generate a sample configuration Use `rpk` to generate a sample configuration file with common filter patterns: @@ -544,266 +505,4 @@ security_sync_options: ---- ==== -See also: link:/api/doc/admin/[Admin API v2 reference^] - -== Failover - -include::shared:partial$emergency-shadowing-callout.adoc[] - -Failover is the process of modifying shadow topics or an entire shadow cluster from read-only replicas to fully writable resources, and ceasing replication from the source cluster. You can fail over individual topics for selective workload migration or fail over the entire cluster for comprehensive disaster recovery. This critical operation transforms your shadow resources into operational production assets, allowing you to redirect application traffic when the source cluster becomes unavailable. - -=== Failover behavior - -When you initiate failover, Redpanda performs the following operations: - -1. **Stops replication**: Halts all data fetching from the source cluster for the specified topics or entire shadow link -2. **Failover topics**: Converts read-only shadow topics into regular, writable topics -3. **Updates topic state**: Changes topic status from `ACTIVE` to `FAILING_OVER`, then `FAILED_OVER` - -Topic failover is irreversible. Once failed over, topics cannot return to shadow mode, and automatic fallback to the original source cluster is not supported. - -=== Failover granularity options - -You can perform failover at two levels of granularity: - -**Individual topic failover:** -[,bash] ----- -rpk shadow failover --topic ----- - -This failover applies only to the specified shadow topic, while leaving other topics in the shadow link still replicating. Use this approach when you need to selectively failover specific workloads or when testing failover procedures. - -**Complete shadow link failover (cluster failover):** -[,bash] ----- -rpk shadow failover --all ----- - -This failover applies to all shadow topics associated with the shadow link simultaneously, effectively failing over the entire cluster's replicated data. Use this approach during a complete regional disaster when you need to activate the entire shadow cluster as your new production environment. - -**Force delete shadow link (emergency failover):** -[,bash] ----- -rpk shadow delete --force ----- - -[WARNING] -==== -Force deleting a shadow link is irreversible and immediately fails over all topics in the link, bypassing the normal failover state transitions. This action should only be used as a last resort when topics are stuck in transitional states and you need immediate access to all replicated data. -==== - -=== Failover states - -==== Shadow link states - -The shadow link itself has a simple state model: - -* **`ACTIVE`**: Shadow link is operating normally, replicating data - -Shadow links do not have dedicated failover states. Instead, the link's operational status is determined by the collective state of its shadow topics. - -==== Shadow topic states - -Individual shadow topics progress through specific states during failover: - -* **`ACTIVE`**: Normal replication state before failover -* **`FAULTED`**: Shadow topic has encountered an error and is not replicating -* **`FAILING_OVER`**: Failover initiated, replication stopping -* **`FAILED_OVER`**: Failover completed successfully, topic fully writable - -==== Monitor failover progress - -Monitor failover progress using the status command: - -[,bash] ----- -rpk shadow status ----- - -The output shows individual topic states and any issues encountered during the failover process. - -**Task states during monitoring:** - -* **`ACTIVE`**: Task is operating normally and replicating data -* **`FAULTED`**: Task encountered an error and requires attention -* **`NOT_RUNNING`**: Task is not currently executing -* **`LINK_UNAVAILABLE`**: Task cannot communicate with the source cluster - -=== Post-failover cluster behavior - -After successful failover, your shadow cluster exhibits the following characteristics: - -**Topic accessibility:** - -* Failed over topics become fully writable and readable. -* Applications can produce and consume messages normally. -* All Kafka APIs are available for failedover topics. -* Original offsets and timestamps are preserved. - -**Shadow link status:** - -* The shadow link remains but stops replicating data. -* Link status shows topics in `FAILED_OVER` state. -* You can safely delete the shadow link after successful failover. - -**Operational limitations:** - -* No automatic fallback mechanism to the original source cluster. -* Data transforms remain disabled until you manually re-enable them. -* Audit log history from the source cluster is not available (new audit logs begin immediately). - -=== Failover considerations and limitations - -**Data consistency:** - -* Some data loss may occur due to replication lag at the time of failover. -* Consumer group offsets are preserved, allowing applications to resume from their last committed position. -* In-flight transactions at the source cluster are not replicated and will be lost. - -**Recovery-point-objective (RPO):** - -The amount of potential data loss depends on replication lag when disaster occurs. Monitor lag metrics to understand your effective RPO. - -**Network partitions:** - -If the source cluster becomes accessible again after failover, do not attempt to write to both clusters simultaneously. This creates a scenario with potential data inconsistencies, since metadata starts to diverge. - -**Testing requirements:** - -Regularly test failover procedures in non-production environments to validate your disaster recovery processes and measure RTO. - -== Monitor Shadowing - -Monitor your shadow links to ensure proper replication performance and understand your disaster recovery readiness. Use `rpk` commands, metrics, and status information to track shadow link health and troubleshoot issues. - -=== Status commands - -List existing shadow links: - -[,bash] ----- -rpk shadow list ----- - -View shadow link configuration details: - -[,bash] ----- -rpk shadow describe ----- - -This command shows the complete configuration of the shadow link, including connection settings, filters, and synchronization options. - -Check your shadow link status to ensure proper operation: - -[,bash] ----- -rpk shadow status ----- - -**Status command options:** - -[,bash] ----- -rpk shadow status ----- - -For troubleshooting specific issues, you can use command options to show individual status sections. See the rpk reference for available status options. - -The status output includes: - -* **Shadow link state**: Overall operational state (`ACTIVE`) -* **Individual topic states**: Current state of each replicated topic (`ACTIVE`, `FAULTED`, `FAILING_OVER`, `FAILED_OVER`) -* **Task status**: Health of replication tasks across brokers (`ACTIVE`, `FAULTED`, `NOT_RUNNING`, `LINK_UNAVAILABLE`) -* **Lag information**: Replication lag per partition showing source vs shadow watermarks - -[[shadow-link-metrics]] -=== Metrics - -Shadowing provides comprehensive metrics to track replication performance and health: - -[cols="1,1,2"] -|=== -|Metric |Type |Description - -|`redpanda_shadow_link_shadow_lag` -|Gauge -|The lag of the shadow partition against the source partition, calculated as source partition LSO minus shadow partition HWM. Monitor by `shadow_link_name`, `topic`, and `partition` to understand replication lag for each partition. - -|`redpanda_shadow_link_total_bytes_fetched` -|Count -|The total number of bytes fetched by a sharded replicator (bytes received by the client). Labeled by `shadow_link_name` and `shard` to track data transfer volume from the source cluster. - -|`redpanda_shadow_link_total_bytes_written` -|Count -|The total number of bytes written by a sharded replicator (bytes written to the write_at_offset_stm). Uses `shadow_link_name` and `shard` labels to monitor data written to the shadow cluster. - -|`redpanda_shadow_link_client_errors` -|Count -|The number of errors seen by the client. Track by `shadow_link_name` and `shard` to identify connection or protocol issues between clusters. - -|`redpanda_shadow_link_shadow_topic_state` -|Gauge -|Number of shadow topics in the respective states. Labeled by `shadow_link_name` and `state` to monitor topic state distribution across your shadow links. - -|`redpanda_shadow_link_total_records_fetched` -|Count -|The total number of records fetched by the sharded replicator (records received by the client). Monitor by `shadow_link_name` and `shard` to track message throughput from the source. - -|`redpanda_shadow_link_total_records_written` -|Count -|The total number of records written by a sharded replicator (records written to the write_at_offset_stm). Uses `shadow_link_name` and `shard` labels to monitor message throughput to the shadow cluster. -|=== - -See also: xref:reference:public-metrics-reference.adoc[] - -=== Monitoring best practices - -==== Health check procedures - -Establish regular monitoring workflows to ensure shadow link health: - -**Health checks:** -[,bash] ----- -# Check all shadow links are active -rpk shadow list | grep -v "ACTIVE" || echo "All shadow links healthy" - -# Monitor lag for critical topics -rpk shadow status | grep -E "LAG|Lag" ----- - -==== Alert thresholds - -Configure monitoring alerts for: - -* **High replication lag**: When `redpanda_shadow_link_shadow_lag` exceeds your RPO requirements -* **Connection errors**: When `redpanda_shadow_link_client_errors` increases rapidly -* **Topic state changes**: When topics move to `FAULTED` state -* **Task failures**: When replication tasks enter `FAULTED` or `NOT_RUNNING` states -* **Link unavailability**: When tasks show `LINK_UNAVAILABLE` indicating source cluster connectivity issues -* **Throughput drops**: When bytes/records fetched drops significantly - -== Disaster readiness checklist - -Before a disaster occurs, ensure you have: - -* [ ] Access to shadow cluster administrative credentials -* [ ] Shadow link names and configuration details, and networking documented -* [ ] Application connection strings for the shadow cluster prepared -* [ ] Tested failover procedures in a non-production environment - -== Next steps - -After setting up Shadowing for your Redpanda clusters, consider these additional steps: - -* **Test your disaster recovery procedures**: Regularly practice failover scenarios in a non-production environment. See xref:deploy:redpanda/manual/resilience/shadowing-guide.adoc[Emergency Shadowing Guide] for step-by-step emergency procedures. - -* **Monitor shadow link health**: Set up alerting on the metrics described above to ensure early detection of replication issues. - -* **Implement automated failover**: Consider developing automation scripts that can detect outages and initiate failover based on predefined criteria. - -* **Review security policies**: Ensure your ACL filters replicate the appropriate security settings for your disaster recovery environment. - -* **Document your configuration**: Maintain up-to-date documentation of your shadow link configuration, including network settings, authentication details, and filter definitions. +See also: link:/api/doc/admin/[Admin API v2 reference^] \ No newline at end of file diff --git a/modules/deploy/pages/redpanda/manual/disaster-recovery/topic-recovery.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/topic-recovery.adoc new file mode 100644 index 0000000000..6f483a49df --- /dev/null +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/topic-recovery.adoc @@ -0,0 +1,7 @@ += Topic Recovery +:description: Restore a single topic from object storage. +:page-categories: Management, High Availability, Disaster Recovery +:page-aliases: manage:topic-recovery.adoc, deploy:redpanda/manual/resilience/disaster-recovery/topic-recovery.adoc +:env-linux: true + +include::manage:partial$topic-recovery.adoc[] \ No newline at end of file diff --git a/modules/manage/pages/whole-cluster-restore.adoc b/modules/deploy/pages/redpanda/manual/disaster-recovery/whole-cluster-restore.adoc similarity index 56% rename from modules/manage/pages/whole-cluster-restore.adoc rename to modules/deploy/pages/redpanda/manual/disaster-recovery/whole-cluster-restore.adoc index 531bbdc4f4..82ba98bb13 100644 --- a/modules/manage/pages/whole-cluster-restore.adoc +++ b/modules/deploy/pages/redpanda/manual/disaster-recovery/whole-cluster-restore.adoc @@ -1,6 +1,7 @@ -= Whole Cluster Restore for Disaster Recovery += Whole Cluster Restore :description: Restore a failed cluster, including its metadata. :page-categories: Management, High Availability, Disaster Recovery +:page-aliases: manage:whole-cluster-restore.adoc, deploy:redpanda/manual/resilience/disaster-recovery/whole-cluster-restore.adoc :env-linux: true include::manage:partial$whole-cluster-restore.adoc[] \ No newline at end of file diff --git a/modules/deploy/pages/redpanda/manual/high-availability.adoc b/modules/deploy/pages/redpanda/manual/high-availability.adoc index 28a279343c..bae99ec133 100644 --- a/modules/deploy/pages/redpanda/manual/high-availability.adoc +++ b/modules/deploy/pages/redpanda/manual/high-availability.adoc @@ -1,6 +1,6 @@ = High Availability :description: Learn about the trade-offs with different high availability configurations. -:page-aliases: deploy:deployment-option/self-hosted/manual/high-availability.adoc, deployment:high-availability.adoc +:page-aliases: deploy:deployment-option/self-hosted/manual/high-availability.adoc, deployment:high-availability.adoc, deploy:redpanda/manual/resilience/high-availability.adoc :env-linux: true :page-categories: Deployment diff --git a/modules/deploy/pages/redpanda/manual/index.adoc b/modules/deploy/pages/redpanda/manual/index.adoc deleted file mode 100644 index c5b9c59ade..0000000000 --- a/modules/deploy/pages/redpanda/manual/index.adoc +++ /dev/null @@ -1,6 +0,0 @@ -= Deploy on Linux -:page-aliases: deploy:deployment-option/self-hosted/manual/index.adoc -:description: Learn about deployment options on Linux, as well as considerations for high availability and sizing. -:page-layout: index -:env-linux: true -:page-categories: Deployment \ No newline at end of file diff --git a/modules/manage/pages/remote-read-replicas.adoc b/modules/deploy/pages/redpanda/manual/remote-read-replicas.adoc similarity index 51% rename from modules/manage/pages/remote-read-replicas.adoc rename to modules/deploy/pages/redpanda/manual/remote-read-replicas.adoc index edf3cea076..8a99d09007 100644 --- a/modules/manage/pages/remote-read-replicas.adoc +++ b/modules/deploy/pages/redpanda/manual/remote-read-replicas.adoc @@ -1,7 +1,7 @@ = Remote Read Replicas :description: Learn how to create a Remote Read Replica topic, which is a read-only topic that mirrors a topic on a different cluster. -:page-aliases: data-management:remote-read-replicas.adoc -:page-categories: Management, High Availability, Data Replication +:page-aliases: data-management:remote-read-replicas.adoc, manage:remote-read-replicas.adoc, deploy:redpanda/manual/resilience/remote-read-replicas.adoc +:page-categories: Management, High Availability, Data Replication, Disaster Recovery :env-linux: true -include::manage:partial$remote-read-replicas.adoc[] +include::manage:partial$remote-read-replicas.adoc[] \ No newline at end of file diff --git a/modules/deploy/partials/high-availability.adoc b/modules/deploy/partials/high-availability.adoc index 7d7126bb82..bb0e176293 100644 --- a/modules/deploy/partials/high-availability.adoc +++ b/modules/deploy/partials/high-availability.adoc @@ -5,7 +5,7 @@ ifdef::env-kubernetes[] :tiered-storage: manage:kubernetes/storage/tiered-storage/k-tiered-storage.adoc endif::[] ifndef::env-kubernetes[] -:remote-read-replicas: manage:remote-read-replicas.adoc +:remote-read-replicas: deploy:redpanda/manual/remote-read-replicas.adoc :rack-awareness: manage:rack-awareness.adoc :tiered-storage: manage:tiered-storage.adoc endif::[] diff --git a/modules/get-started/pages/quick-start.adoc b/modules/get-started/pages/quick-start.adoc index 5086bc8e5e..404c45741b 100644 --- a/modules/get-started/pages/quick-start.adoc +++ b/modules/get-started/pages/quick-start.adoc @@ -22,7 +22,7 @@ Redpanda Self-Managed is a modern streaming platform, compatible with Kafka APIs [NOTE] ==== -This quickstart uses Docker to run Redpanda, which is only for development and testing purposes. For production deployments, see the xref:deploy:redpanda/manual/index.adoc[Linux deployment guides] or the xref:deploy:redpanda/kubernetes/index.adoc[Kubernetes deployment guides]. To download the Redpanda binary, see https://github.com/redpanda-data/redpanda/releases/latest[GitHub^]. +This quickstart uses Docker to run Redpanda, which is only for development and testing purposes. For production deployments, see the xref:deploy:redpanda/manual/production/index.adoc[Linux deployment guides] or the xref:deploy:redpanda/kubernetes/index.adoc[Kubernetes deployment guides]. To download the Redpanda binary, see https://github.com/redpanda-data/redpanda/releases/latest[GitHub^]. **Looking for a managed solution?** You can also get started quickly with a hosted Redpanda cluster by signing up for https://cloud.redpanda.com[Redpanda Cloud^]. ==== diff --git a/modules/get-started/pages/release-notes/redpanda.adoc b/modules/get-started/pages/release-notes/redpanda.adoc index 22504eb941..3d66e33d94 100644 --- a/modules/get-started/pages/release-notes/redpanda.adoc +++ b/modules/get-started/pages/release-notes/redpanda.adoc @@ -9,9 +9,9 @@ This topic includes new content added in version {page-component-version}. For a == Shadowing -Redpanda v25.3 introduces xref:deploy:redpanda/manual/resilience/shadowing.adoc[Shadowing], an Enterprise-licensed disaster recovery solution that provides asynchronous, offset-preserving replication between distinct Redpanda clusters. Shadowing enables cross-region data protection by replicating topic data, configurations, consumer group offsets, ACLs, and Schema Registry data with byte-level fidelity. +Redpanda v25.3 introduces xref:deploy:redpanda/manual/disaster-recovery/shadowing/index.adoc[], an Enterprise-licensed disaster recovery solution that provides asynchronous, offset-preserving replication between distinct Redpanda clusters. Shadowing enables cross-region data protection by replicating topic data, configurations, consumer group offsets, ACLs, and Schema Registry data with byte-level fidelity. -The shadow cluster operates in read-only mode while continuously receiving updates from the source cluster. During a disaster, you can fail over individual topics or an entire shadow link to make resources fully writable for production traffic. See xref:deploy:redpanda/manual/resilience/shadowing-guide.adoc[Emergency Shadowing Guide] for emergency procedures. +The shadow cluster operates in read-only mode while continuously receiving updates from the source cluster. During a disaster, you can failover individual topics or an entire shadow link to make resources fully writable for production traffic. See xref:deploy:redpanda/manual/disaster-recovery/shadowing/failover-runbook.adoc[] for emergency procedures. == Connected client monitoring diff --git a/modules/manage/pages/kubernetes/tiered-storage/k-whole-cluster-restore.adoc b/modules/manage/pages/kubernetes/tiered-storage/k-whole-cluster-restore.adoc index 28cb9a0ecd..cd60bdab39 100644 --- a/modules/manage/pages/kubernetes/tiered-storage/k-whole-cluster-restore.adoc +++ b/modules/manage/pages/kubernetes/tiered-storage/k-whole-cluster-restore.adoc @@ -1,4 +1,4 @@ -= Whole Cluster Restore for Disaster Recovery in Kubernetes += Whole Cluster Restore in Kubernetes :description: Restore a failed cluster, including its metadata. :page-categories: Management, High Availability, Disaster Recovery :env-kubernetes: true diff --git a/modules/manage/pages/topic-recovery.adoc b/modules/manage/pages/topic-recovery.adoc deleted file mode 100644 index 188f60aa88..0000000000 --- a/modules/manage/pages/topic-recovery.adoc +++ /dev/null @@ -1,6 +0,0 @@ -= Topic Recovery -:description: Restore a single topic from object storage. -:page-categories: Management, High Availability -:env-linux: true - -include::manage:partial$topic-recovery.adoc[] \ No newline at end of file diff --git a/modules/manage/partials/whole-cluster-restore.adoc b/modules/manage/partials/whole-cluster-restore.adoc index db15556b94..c2ced17a32 100644 --- a/modules/manage/partials/whole-cluster-restore.adoc +++ b/modules/manage/partials/whole-cluster-restore.adoc @@ -19,7 +19,7 @@ With xref:{link-tiered-storage}[Tiered Storage] enabled, you can use Whole Clust Whole Cluster Restore is not a fully-functional disaster recovery solution. It does not provide snapshot-style consistency. Some partitions in some topics will be more up-to-date than others. Committed transactions are not guaranteed to be atomic. ==== -TIP: If you need to restore only a subset of topic data, consider using xref:manage:topic-recovery.adoc[topic recovery] instead of a Whole Cluster Restore. +TIP: If you need to restore only a subset of topic data, consider using xref:deploy:redpanda/manual/disaster-recovery/topic-recovery.adoc[topic recovery] instead of a Whole Cluster Restore. The following metadata is included in a Whole Cluster Restore: @@ -157,7 +157,7 @@ include::manage:partial$kubernetes/extraclusterconfig.adoc[] endif::[] ifndef::env-kubernetes[] -Follow the steps to xref:deploy:deployment-option/self-hosted/manual/index.adoc[deploy a new cluster]. +Follow the steps to xref:deploy:redpanda/manual/production/index.adoc[deploy a new cluster]. NOTE: Make sure to configure the target cluster with the same Tiered Storage settings as the source cluster. endif::[] diff --git a/modules/migrate/pages/data-migration.adoc b/modules/migrate/pages/data-migration.adoc index 8719eef242..eb4e29b072 100644 --- a/modules/migrate/pages/data-migration.adoc +++ b/modules/migrate/pages/data-migration.adoc @@ -14,7 +14,7 @@ See the https://kafka.apache.org/downloads[Kafka downloads page^] for download i To set up the replication, you need: -* 2 Redpanda clusters - Redpanda's migration mechanism is independent of the underlying cluster version or cluster platform, so you can set up these clusters in any deployment you like, including xref:deploy:deployment-option/self-hosted/kubernetes/get-started-dev.adoc[Kubernetes], and xref:deploy:deployment-option/self-hosted/manual/index.adoc[Linux]. +* 2 Redpanda clusters - Redpanda's migration mechanism is independent of the underlying cluster version or cluster platform, so you can set up these clusters in any deployment you like, including xref:deploy:redpanda/kubernetes/get-started-dev.adoc[Kubernetes], and xref:deploy:redpanda/manual/production/index.adoc[Linux]. * MirrorMaker 2 host - You install MirrorMaker 2 on a separate system or on one of the Redpanda clusters, as long as the IP addresses and ports on each cluster are accessible from the MirrorMaker 2 host. You must install the https://docs.oracle.com/javase/10/install/toc.htm[Java Runtime Engine (JRE)^] on the MirrorMaker 2 host. diff --git a/modules/shared/partials/emergency-shadowing-callout.adoc b/modules/shared/partials/emergency-shadowing-callout.adoc index 4bc5894eea..4f5bfda5e9 100644 --- a/modules/shared/partials/emergency-shadowing-callout.adoc +++ b/modules/shared/partials/emergency-shadowing-callout.adoc @@ -1,5 +1,5 @@ [IMPORTANT] .Experiencing an active disaster? ==== -See xref:deploy:redpanda/manual/resilience/shadowing-guide.adoc[Emergency Shadowing Failover] for immediate step-by-step emergency procedures. +See xref:deploy:redpanda/manual/disaster-recovery/shadowing/failover-runbook.adoc[] for immediate step-by-step disaster procedures. ==== \ No newline at end of file