DBA survival BLOG

DBA stuff and Oracle Data Guard

New views in Oracle Data Guard 23c

Posted on January 3, 2024 by Ludovico

Oracle Data Guard 23c comes with many nice improvements for observability, which greatly increase the usability of Data Guard in environments with a high level of automation.

For the 23c version, we have the following new views.V$DG_BROKER_ROLE_CHANGE

This view tracks the last role transitions that occurred in the configuration. Example:

SQL> select * from v$dg_broker_role_change;

EVENT         STANDBY_TYPE    OLD_PRIMARY       NEW_PRIMARY       FS_FAILOVER_REASON    BEGIN_TIME                         END_TIME                              CON_ID
_____________ _______________ _________________ _________________ _____________________ __________________________________ __________________________________ _________
Switchover    Physical        adghol_53k_lhr    adghol_p4n_lhr                          18-DEC-23 10.40.12.000000000 AM    18-DEC-23 10.40.32.000000000 AM            0
Switchover    Physical        adghol_p4n_lhr    adghol_53k_lhr                          18-DEC-23 10.48.55.000000000 AM    18-DEC-23 10.49.15.000000000 AM            0

SQL> select * from v$dg_broker_role_change;

EVENT STANDBY_TYPE OLD_PRIMARY NEW_PRIMARY FS_FAILOVER_REASON BEGIN_TIME END_TIME CON_ID

_____________ _______________ _________________ _________________ _____________________ __________________________________ __________________________________ _________

Switchover Physical adghol_53k_lhr adghol_p4n_lhr 18-DEC-23 10.40.12.000000000 AM 18-DEC-23 10.40.32.000000000 AM 0

Switchover Physical adghol_p4n_lhr adghol_53k_lhr 18-DEC-23 10.48.55.000000000 AM 18-DEC-23 10.49.15.000000000 AM 0

The event might be a Switchover, Failover, or Fast-Start Failover.

In the case of Fast-Start Failover, you will see the reason (typically “Primary Disconnected” if it comes from the observer, or whatever reason you put in DBMS_DG.INITIATE_FS_FAILOVER.

No more need to analyze the logs to find out which database was primary at any moment in time!

V$DG_BROKER_PROPERTY

Before 23c, the only possible way to get a broker property from SQL was to use undocumented (unsupported) procedures in the fixed package DBMS_DRS. I’ve blogged about it in the past, before joining Oracle.

Now, it’s as easy as selecting from a view, where you can get the properties per member or per configuration:

SQL> select member, property, value from V$DG_BROKER_PROPERTY where value is not null;

MEMBER      PROPERTY                        VALUE
___________ _______________________________ _________
mydb        FastStartFailoverThreshold      180
mydb        OperationTimeout                30
mydb        TraceLevel                      USER
mydb        FastStartFailoverLagLimit       300
mydb        CommunicationTimeout            180
mydb        ObserverReconnect               0
mydb        ObserverPingInterval            0
mydb        ObserverPingRetry               0
mydb        FastStartFailoverAutoReinstate  TRUE
mydb        FastStartFailoverPmyShutdown    TRUE
...
mydb_site1  DGConnectIdentifier             mydb_site1
mydb_site1  FastStartFailoverTarget         mydb_site2
mydb_site1  LogShipping                     ON
mydb_site1  LogXptMode                      ASYNC
mydb_site1  DelayMins                       0
...
mydb_site1  StaticConnectIdentifier         (DESCRIPTION=<...>)))
mydb_site1  TopWaitEvents                   (monitor)
mydb_site1  SidName                         (monitor)
mydb_site2  DGConnectIdentifier             mydb_site2
mydb_site2  FastStartFailoverTarget         mydb_site1

SQL> select member, property, value from V$DG_BROKER_PROPERTY where value is not null;

MEMBER PROPERTY VALUE

___________ _______________________________ _________

mydb FastStartFailoverThreshold 180

mydb OperationTimeout 30

mydb TraceLevel USER

mydb FastStartFailoverLagLimit 300

mydb CommunicationTimeout 180

mydb ObserverReconnect 0

mydb ObserverPingInterval 0

mydb ObserverPingRetry 0

mydb FastStartFailoverAutoReinstate TRUE

mydb FastStartFailoverPmyShutdown TRUE

...

mydb_site1 DGConnectIdentifier mydb_site1

mydb_site1 FastStartFailoverTarget mydb_site2

mydb_site1 LogShipping ON

mydb_site1 LogXptMode ASYNC

mydb_site1 DelayMins 0

...

mydb_site1 StaticConnectIdentifier (DESCRIPTION=<...>)))

mydb_site1 TopWaitEvents (monitor)

mydb_site1 SidName (monitor)

mydb_site2 DGConnectIdentifier mydb_site2

mydb_site2 FastStartFailoverTarget mydb_site1

The example selects just three columns, but the view is rich in detailing which properties apply to which situation (scope, valid_role):

SQL> set sqlformat json-formatted
SQL> select * from v$dg_broker_property where member='adghol_p4n_lhr' and upper(property) like '%REDO%';
{
  "results" : [
    {
    ...
      "items" : [
        {
          "member" : "adghol_p4n_lhr",
          "instance" : "N/A",
          "dataguard_role" : "PHYSICAL STANDBY",
          "property" : "PreferredObserverHosts",
          "property_type" : "CONFIGURABLE",
          "value" : "",
          "value_type" : "STRING",
          "scope" : "MEMBER",
          "valid_role" : "N/A",
          "con_id" : 0
        },
        {
          "member" : "adghol_p4n_lhr",
          "instance" : "N/A",
          "dataguard_role" : "PHYSICAL STANDBY",
          "property" : "RedoRoutes",
          "property_type" : "CONFIGURABLE",
          "value" : "",
          "value_type" : "STRING",
          "scope" : "MEMBER",
          "valid_role" : "N/A",
          "con_id" : 0
        },
        {
          "member" : "adghol_p4n_lhr",
          "instance" : "N/A",
          "dataguard_role" : "PHYSICAL STANDBY",
          "property" : "RedoCompression",
          "property_type" : "CONFIGURABLE",
          "value" : "DISABLE",
          "value_type" : "STRING",
          "scope" : "MEMBER",
          "valid_role" : "STANDBY",
          "con_id" : 0
        }
      ]
    }
  ]
}

SQL> set sqlformat json-formatted

SQL> select * from v$dg_broker_property where member='adghol_p4n_lhr' and upper(property) like '%REDO%';

{

"results" : [

{

...

"items" : [

{

"member" : "adghol_p4n_lhr",

"instance" : "N/A",

"dataguard_role" : "PHYSICAL STANDBY",

"property" : "PreferredObserverHosts",

"property_type" : "CONFIGURABLE",

"value" : "",

"value_type" : "STRING",

"scope" : "MEMBER",

"valid_role" : "N/A",

"con_id" : 0

{

"member" : "adghol_p4n_lhr",

"instance" : "N/A",

"dataguard_role" : "PHYSICAL STANDBY",

"property" : "RedoRoutes",

"property_type" : "CONFIGURABLE",

"value" : "",

"value_type" : "STRING",

"scope" : "MEMBER",

"valid_role" : "N/A",

"con_id" : 0

{

"member" : "adghol_p4n_lhr",

"instance" : "N/A",

"dataguard_role" : "PHYSICAL STANDBY",

"property" : "RedoCompression",

"property_type" : "CONFIGURABLE",

"value" : "DISABLE",

"value_type" : "STRING",

"scope" : "MEMBER",

"valid_role" : "STANDBY",

"con_id" : 0

}

]

}

]

}

The monitorable properties can be monitored using DBMS_DG.GET_PROPERTY(). I’ll write a blog post about the new PL/SQL APIs in the upcoming weeks.

I wish I had this view when I was a DBA 🙂

V$FAST_START_FAILOVER_CONFIG

If you have a Fast-Start Failover configuration, this view will show its details:

SQL> SELECT fsfo_mode, status, current_target, threshold, observer_present, observer_host,
 2> protection_mode, lag_limit, auto_reinstate, observer_override, shutdown_primary FROM V$FAST_START_FAILOVER_CONFIG;

FSFO_MODE           STATUS                 CURRENT_TARGET THRESHOLD OBSERVE OBSERVER_HOST PROTECTION_MODE  LAG_LIMIT AUTO_ OBSER SHUTD
___________________ ______________________ ______________ _________ _______ _____________ ________________ _________ _____ _____ _____
POTENTIAL DATA LOSS TARGET UNDER LAG LIMIT mydb_site2           180 YES     mydb-obs      MaxPerformance         300 TRUE  FALSE TRUE

SQL> SELECT fsfo_mode, status, current_target, threshold, observer_present, observer_host,

2> protection_mode, lag_limit, auto_reinstate, observer_override, shutdown_primary FROM V$FAST_START_FAILOVER_CONFIG;

FSFO_MODE STATUS CURRENT_TARGET THRESHOLD OBSERVE OBSERVER_HOST PROTECTION_MODE LAG_LIMIT AUTO_ OBSER SHUTD

___________________ ______________________ ______________ _________ _______ _____________ ________________ _________ _____ _____ _____

POTENTIAL DATA LOSS TARGET UNDER LAG LIMIT mydb_site2 180 YES mydb-obs MaxPerformance 300 TRUE FALSE TRUE

This view replaces some columns currently in v$database, that are therefore deprecated:

SQL> desc v$database

Name                            Null?    Type
_______________________________ ________ ________________
...
FS_FAILOVER_MODE                         VARCHAR2(19)
FS_FAILOVER_STATUS                       VARCHAR2(22)
FS_FAILOVER_CURRENT_TARGET               VARCHAR2(30)
FS_FAILOVER_THRESHOLD                    NUMBER
FS_FAILOVER_OBSERVER_PRESENT             VARCHAR2(7)
FS_FAILOVER_OBSERVER_HOST                VARCHAR2(512)
...

SQL> desc v$database

Name Null? Type

_______________________________ ________ ________________

...

FS_FAILOVER_MODE VARCHAR2(19)

FS_FAILOVER_STATUS VARCHAR2(22)

FS_FAILOVER_CURRENT_TARGET VARCHAR2(30)

FS_FAILOVER_THRESHOLD NUMBER

FS_FAILOVER_OBSERVER_PRESENT VARCHAR2(7)

FS_FAILOVER_OBSERVER_HOST VARCHAR2(512)

...

V$FS_LAG_HISTOGRAM

This view is useful to calculate the optimal FastStartFailoverLagTime.

SQL> select * from v$fs_lag_histogram;

   THREAD# LAG_TYPE      LAG_TIME  LAG_COUNT LAST_UPDATE_TIME         CON_ID                        
---------- ----------- ---------- ---------- -------------------- ----------                        
         1 APPLY                5        122 01/23/2023 10:46:07           0                        
         1 APPLY               10          5 01/02/2023 16:12:42           0                        
         1 APPLY               15          2 12/25/2022 12:01:23           0                        
         1 APPLY               30          0                               0                        
         1 APPLY               60          0                               0                        
         1 APPLY              120          0                               0                        
         1 APPLY              180          0                               0                        
         1 APPLY              300          0                               0                        
         1 APPLY            65535          0                               0

SQL> select * from v$fs_lag_histogram;

THREAD# LAG_TYPE LAG_TIME LAG_COUNT LAST_UPDATE_TIME CON_ID

---------- ----------- ---------- ---------- -------------------- ----------

1 APPLY 5 122 01/23/2023 10:46:07 0

1 APPLY 10 5 01/02/2023 16:12:42 0

1 APPLY 15 2 12/25/2022 12:01:23 0

1 APPLY 30 0 0

1 APPLY 60 0 0

1 APPLY 120 0 0

1 APPLY 180 0 0

1 APPLY 300 0 0

1 APPLY 65535 0 0

It shows the frequency of Fast-Start Failover lags and the most recent occurrence for each bucket.

LAG_TIME is the upper bound of the bucket, e.g.

5 -> between 0 and 5 seconds
10 -> between 5 and 10 seconds
etc.

It’s refreshed every minute, only when Fast-Start Failover is enabled (also in observe-only mode).

V$FS_FAILOVER_OBSERVERS

This view is not new, however, its definition now contains more columns:

SQL> desc  v$fs_failover_observers
 Name                           Null?    Type
 ------------------------------ -------- -----------------
 NAME                                    VARCHAR2(513)
 REGISTERED                              VARCHAR2(4)
 HOST                                    VARCHAR2(513)
 ISMASTER                                VARCHAR2(4)
 TIME_SELECTED                           TIMESTAMP(9)
 PINGING_PRIMARY                         VARCHAR2(4)
 PINGING_TARGET                          VARCHAR2(4)
 CON_ID                                  NUMBER
 
 -- new in 23c:
 LAST_PING_PRIMARY                       NUMBER
 LAST_PING_TARGET                        NUMBER
 LOG_FILE                                VARCHAR2(513)
 STATE_FILE                              VARCHAR2(513)
 CURRENT_TIME                            TIMESTAMP(9)

SQL> desc v$fs_failover_observers

Name Null? Type

------------------------------ -------- -----------------

NAME VARCHAR2(513)

REGISTERED VARCHAR2(4)

HOST VARCHAR2(513)

ISMASTER VARCHAR2(4)

TIME_SELECTED TIMESTAMP(9)

PINGING_PRIMARY VARCHAR2(4)

PINGING_TARGET VARCHAR2(4)

CON_ID NUMBER

-- new in 23c:

LAST_PING_PRIMARY NUMBER

LAST_PING_TARGET NUMBER

LOG_FILE VARCHAR2(513)

STATE_FILE VARCHAR2(513)

CURRENT_TIME TIMESTAMP(9)

This gives important additional information about the observers, for example, the last time a specific observer was able to ping the primary or the target (in seconds).

Also, the path of the log file and runtime data file are available, making it easier to find them on the observer host in case of a problem.

Conclusion

These new views should greatly improve the experience when monitoring or diagnosing problems with Data Guard. But they are just a part of many improvements we introduced in 23c. Stay tuned for more 🙂

—

Ludovico

New in Data Guard 21c and 23c: Automatic preparation of the primary

Posted on December 22, 2023 by Ludovico

Oracle Data Guard 21c came with a new command:

prepare database for data guard
with db_unique_name is {db_unique_name}
db_recovery_file_dest_size is "{size}"
db_recovery_file_dest is "{dest}" ;

prepare database for data guard

with db_unique_name is {db_unique_name}

db_recovery_file_dest_size is "{size}"

db_recovery_file_dest is "{dest}" ;

This command prepares a database to become primary in a Data Guard configuration.

It sets many recommended parameters:

DB_FILES                      = 1024
LOG_BUFFER                    = 256M
DB_BLOCK_CHECKSUM             = TYPICAL
DB_LOST_WRITE_PROTECT         = TYPICAL
DB_FLASHBACK_RETENTION_TARGET = 120
PARALLEL_THREADS_PER_CPU      = 1
STANDBY_FILE_MANAGEMENT       = AUTO
DG_BROKER_START               = TRUE

DB_FILES = 1024

LOG_BUFFER = 256M

DB_BLOCK_CHECKSUM = TYPICAL

DB_LOST_WRITE_PROTECT = TYPICAL

DB_FLASHBACK_RETENTION_TARGET = 120

PARALLEL_THREADS_PER_CPU = 1

STANDBY_FILE_MANAGEMENT = AUTO

DG_BROKER_START = TRUE

Sets the RMAN archive deletion policy, enables flashback and force logging, creates the standby logs according to the online redo logs configuration, and creates an spfile if the database is running with an init file.

If you tried this in 21c, you have noticed that there is an automatic restart of the database to set all the static parameters. If you weren’t expecting this, the sudden restart could be a bit brutal approach.

In 23c, we added an additional keyword “restart” to specify that you are OK with the restart of the database. If you don’t specify it, the broker will complain that it cannot proceed without a restart:

DGMGRL> prepare database for data guard
> with db_unique_name is chol23c_hwq_lhr
> db_recovery_file_dest_size is "200g"
> db_recovery_file_dest is "/u03/app/oracle/fast_recovery_area"
> ;
Validating database "cdb1" before executing the command.
  DGM-17552: Primary database must be restarted after setting static initialization parameters.
  DGM-17327: Primary database must be restarted to enable archivelog mode.
Failed.
DGMGRL>

DGMGRL> prepare database for data guard

> with db_unique_name is chol23c_hwq_lhr

> db_recovery_file_dest_size is "200g"

> db_recovery_file_dest is "/u03/app/oracle/fast_recovery_area"

> ;

Validating database "cdb1" before executing the command.

DGM-17552: Primary database must be restarted after setting static initialization parameters.

DGM-17327: Primary database must be restarted to enable archivelog mode.

Failed.

DGMGRL>

If you specify it, it will proceed with the restart:

DGMGRL> prepare database for data guard
>   with db_unique_name is chol23c_hwq_lhr
>   db_recovery_file_dest_size is "200g"
>   db_recovery_file_dest is "/u03/app/oracle/fast_recovery_area"
>   restart;
Validating database "chol23c_hwq_lhr" before executing the command.
Preparing database "chol23c_hwq_lhr" for Data Guard.
Initialization parameter DB_FILES set to 1024.
Initialization parameter LOG_BUFFER set to 268435456.
Primary database must be restarted after setting static initialization parameters.
Shutting down database "chol23c_hwq_lhr".
Database closed.
Database dismounted.
ORACLE instance shut down.
Starting database "chol23c_hwq_lhr" to mounted mode.
ORACLE instance started.
Database mounted.
Initialization parameter DB_FLASHBACK_RETENTION_TARGET set to 120.
Initialization parameter DB_LOST_WRITE_PROTECT set to 'TYPICAL'.
RMAN configuration archivelog deletion policy set to SHIPPED TO ALL STANDBY.
Initialization parameter DB_RECOVERY_FILE_DEST_SIZE set to '200g'.
Initialization parameter DB_RECOVERY_FILE_DEST set to '/u03/app/oracle/fast_recovery_area'.
LOG_ARCHIVE_DEST_n initialization parameter already set for local archival.
Initialization parameter LOG_ARCHIVE_DEST_2 set to 'location=use_db_recovery_file_dest valid_for=(all_logfiles, all_roles)'.
Initialization parameter LOG_ARCHIVE_DEST_STATE_2 set to 'Enable'.
Adding standby log group size 1073741824 and assigning it to thread 1.
Adding standby log group size 1073741824 and assigning it to thread 1.
Adding standby log group size 1073741824 and assigning it to thread 1.
Initialization parameter STANDBY_FILE_MANAGEMENT set to 'AUTO'.
Initialization parameter DG_BROKER_START set to TRUE.
Database set to FLASHBACK ON.
Database opened.
Succeeded.
DGMGRL>

DGMGRL> prepare database for data guard

> with db_unique_name is chol23c_hwq_lhr

> db_recovery_file_dest_size is "200g"

> db_recovery_file_dest is "/u03/app/oracle/fast_recovery_area"

> restart;

Validating database "chol23c_hwq_lhr" before executing the command.

Preparing database "chol23c_hwq_lhr" for Data Guard.

Initialization parameter DB_FILES set to 1024.

Initialization parameter LOG_BUFFER set to 268435456.

Primary database must be restarted after setting static initialization parameters.

Shutting down database "chol23c_hwq_lhr".

Database closed.

Database dismounted.

ORACLE instance shut down.

Starting database "chol23c_hwq_lhr" to mounted mode.

ORACLE instance started.

Database mounted.

Initialization parameter DB_FLASHBACK_RETENTION_TARGET set to 120.

Initialization parameter DB_LOST_WRITE_PROTECT set to 'TYPICAL'.

RMAN configuration archivelog deletion policy set to SHIPPED TO ALL STANDBY.

Initialization parameter DB_RECOVERY_FILE_DEST_SIZE set to '200g'.

Initialization parameter DB_RECOVERY_FILE_DEST set to '/u03/app/oracle/fast_recovery_area'.

LOG_ARCHIVE_DEST_n initialization parameter already set for local archival.

Initialization parameter LOG_ARCHIVE_DEST_2 set to 'location=use_db_recovery_file_dest valid_for=(all_logfiles, all_roles)'.

Initialization parameter LOG_ARCHIVE_DEST_STATE_2 set to 'Enable'.

Adding standby log group size 1073741824 and assigning it to thread 1.

Initialization parameter STANDBY_FILE_MANAGEMENT set to 'AUTO'.

Initialization parameter DG_BROKER_START set to TRUE.

Database set to FLASHBACK ON.

Database opened.

Succeeded.

DGMGRL>

Notice that if you already have these static parameters set, the broker will just set the missing dynamic parameters without the need for a restart:

DGMGRL> prepare database for data guard
>   with db_unique_name is chol23c_hwq_lhr
>   db_recovery_file_dest_size is "200g"
>   db_recovery_file_dest is "/u03/app/oracle/fast_recovery_area"
> ;
Validating database "chol23c_hwq_lhr" before executing the command.
Preparing database "chol23c_hwq_lhr" for Data Guard.
Initialization parameter DB_RECOVERY_FILE_DEST_SIZE set to '200g'.
Initialization parameter DB_RECOVERY_FILE_DEST set to '/u03/app/oracle/fast_recovery_area'.
LOG_ARCHIVE_DEST_n initialization parameter already set for local archival.
Initialization parameter LOG_ARCHIVE_DEST_1 set to 'location=use_db_recovery_file_dest valid_for=(all_logfiles, all_roles)'.
Initialization parameter LOG_ARCHIVE_DEST_STATE_1 set to 'Enable'.
Succeeded.

DGMGRL> prepare database for data guard

> with db_unique_name is chol23c_hwq_lhr

> db_recovery_file_dest_size is "200g"

> db_recovery_file_dest is "/u03/app/oracle/fast_recovery_area"

> ;

Validating database "chol23c_hwq_lhr" before executing the command.

Preparing database "chol23c_hwq_lhr" for Data Guard.

Initialization parameter DB_RECOVERY_FILE_DEST_SIZE set to '200g'.

Initialization parameter DB_RECOVERY_FILE_DEST set to '/u03/app/oracle/fast_recovery_area'.

LOG_ARCHIVE_DEST_n initialization parameter already set for local archival.

Initialization parameter LOG_ARCHIVE_DEST_1 set to 'location=use_db_recovery_file_dest valid_for=(all_logfiles, all_roles)'.

Initialization parameter LOG_ARCHIVE_DEST_STATE_1 set to 'Enable'.

Succeeded.

This new command greatly simplifies the preparation of a Data Guard configuration!

Before 21c, you had to do everything by hand.

—

Ludo

When it comes to using Oracle, trust Oracle…

Posted on July 14, 2023 by Ludovico

A month ago, I saw this article published on the AWS architecture blog:

Disaster Recovery for Oracle Database on Amazon EC2 with Fast-Start Failover

I love seeing people suggesting Oracle Data Guard Fast-Start Failover for high availability. Nevertheless, there are a few problems with the architecture and steps proposed in the article.

I sent my comments via Disqus on the AWS blogging platform, but after a month, my comment was rejected, and the blog content hasn’t changed.

For this reason, I don’t have other places to post my comment but here…

The link to the setup procedure is from 2009.
We have official documentation that we keep up to date. The Fast-Start Failover part:
https://docs.oracle.com/en/database/oracle/oracle-database/19/dgbkr/using-data-guard-broker-to-manage-switchovers-failovers.html#GUID-D26D79F2-0093-4C0E-98CD-224A5C8CBFA4
and the Best Practices guide:
https://docs.oracle.com/en/database/oracle/oracle-database/19/haovw/oracle-data-guard-best-practices.html#GUID-C3A78B07-6584-4380-8D53-E5B831A5894C
The part about cascading standbys references a step-by-step guide from an external blog written many years ago for 11gR2.
The DBMS_SERVICE doc is from 12cR1, while other links are from 21c doc or 19c doc. As of today, most implement 19c. That’s probably the version to use.
https://docs.oracle.com/en/database/oracle/oracle-database/19/arpls/DBMS_SERVICE.html#GUID-C11449DC-EEDE-4BB8-9D2C-0A45198C1928
The steps used to create the database service do not include any HA property, which will make most efforts useless. (see Table 153-6 in the link above).
The article talks about TAF, but no steps exist to configure it. We don’t recommend TAF since 12c anyway. Today (19c), the recommendation is TAC (Transparent Application Continuity).
https://www.oracle.com/docs/tech/application-checklist-for-continuous-availability-for-maa.pdf
But, most important, TAF (or Oracle connectivity in general) does NOT require a host IP change! There is no need to change the DNS when using the recommended connection string with multiple address_lists.
Some RedoRoutes examples are not correct. In this video I explain how they work and how to set them up:
https://www.youtube.com/watch?v=huG8JPu_s4Q
The diagram shows the master observer together with the standby database, which is a bad practice. I explain why and how here:
https://www.youtube.com/watch?v=e81UPLfnLi0

The central message is:

If you need to implement a complex architecture using a software solution, pay attention that the practices suggested by the partner/integrator/3rd party match the ones from the software vendor. In the case of Oracle Data Guard, Oracle knows better 😉

Cheers

—

Ludovico

Video: Where should I put the Observer in a Fast-Start Failover configuration?

Posted on November 29, 2022 by Ludovico

The video explains best practices and different failure scenarios for different observer placements. It also shows how to configure high availability for the observer.

Here’s the summary:

Always try to put the observer(s) on an external site.
If you don’t have any, put it where the primary database is, and have one ready on the secondary site after the role transition.
Don’t put the observer together with the standby database!
Configure multiple observers for high availability, and use the PreferredObserverHosts Data Guard member property to ensure you never run the observer where the standby database is.

Video: The importance of Fast-Start Failover in an Oracle Data Guard configuration

Posted on November 29, 2022 by Ludovico

Why is Fast-Start Failover a crucial component for mission-critical Data Guard deployments?
The observer lowers the RTO in case of failure, and the Fast-Start Failover protection modes protect the database from split-brain and data loss.

Far Sync and Fast-Start Failover Protection modes

Posted on April 14, 2022 by Ludovico

Oracle advertises Far Sync as a solution for “Zero Data Loss at any distance”. This is because the primary sends its redo stream synchronously to the Far Sync, which relays it to the remote physical standby.

There are many reasons why Far Sync is an optimal solution for this use case, but that’s not the topic of this post 🙂

Some customers ask: Can I configure Far Sync to receive the redo stream asynchronously?

Although a direct standby receiving asynchronously would be a better idea, Far Sync can receive asynchronously as well.

And one reason might be to send asynchronously to one Far Sync member that redistributes locally to many standbys.

It is very simple to achieve: just changing the RedoRoutes property on the primary.

RedoRoutes = '(LOCAL : cdgsima_farsync1 ASYNC)'

1	RedoRoutes = '(LOCAL : cdgsima_farsync1 ASYNC)'

This will work seamlessly. The v$dataguard_process will show the async transport process:

NAME PID TYP ACTION CLIENT_PID CLIENT_ROLE GROUP# RESETLOG_ID THREAD# SEQUENCE# BLOCK#
TT02 440 KSV async ORL multi 0 none 2 1098480879 1 146 456

1 2	NAME PID TYP ACTION CLIENT_PID CLIENT_ROLE GROUP# RESETLOG_ID THREAD# SEQUENCE# BLOCK# TT02 440 KSV async ORL multi 0 none 2 1098480879 1 146 456

What about Fast-Start Failover?

Up to and including 19c, ASYNC transport to Far Sync will not work with Fast-Start Failover (FSFO).

ASYNC redo transport mandates Maximum Performance protection mode, and FSFO supports that in conjunction with Far Sync only starting with 21c.

Before 21c, trying to enable FSFO with a Far Sync will fail with:

effective redo transport mode is incompatible with the configuration protection mode

1	effective redo transport mode is incompatible with the configuration protection mode

DGMGRL> show fast_start failover

Fast-Start Failover:  Disabled

  Protection Mode:    MaxPerformance
  Lag Limit:          30 seconds

  Threshold:          30 seconds
  Active Target:      (none)
  Potential Targets:  "cdgsima_lhr1bm"
    cdgsima_lhr1bm invalid - effective redo transport mode is incompatible with the configuration protection mode
  Observer:           (none)
  Shutdown Primary:   TRUE
  Auto-reinstate:     TRUE
  Observer Reconnect: (none)
  Observer Override:  FALSE

Configurable Failover Conditions
  Health Conditions:
    Corrupted Controlfile          YES
    Corrupted Dictionary           YES
    Inaccessible Logfile            NO
    Stuck Archiver                  NO
    Datafile Write Errors          YES

  Oracle Error Conditions:
    (none)

DGMGRL> show fast_start failover

Fast-Start Failover: Disabled

Protection Mode: MaxPerformance

Lag Limit: 30 seconds

Threshold: 30 seconds

Active Target: (none)

Potential Targets: "cdgsima_lhr1bm"

cdgsima_lhr1bm invalid - effective redo transport mode is incompatible with the configuration protection mode

Observer: (none)

Shutdown Primary: TRUE

Auto-reinstate: TRUE

Observer Reconnect: (none)

Observer Override: FALSE

Configurable Failover Conditions

Health Conditions:

Corrupted Controlfile YES

Corrupted Dictionary YES

Inaccessible Logfile NO

Stuck Archiver NO

Datafile Write Errors YES

Oracle Error Conditions:

(none)

So if you want FSFO with Far Sync in 19c, it has to be MaxAvailability (and SYNC redo transport to the FarSync).

If you don’t need FSFO, as we have seen, there is no problem. The only protection mode that will not work with Far Sync is Maximum Protection:

If FSFO is required, and you want Maximum Performance before 21c, or Maximum Protection, you have to remove Far Sync from the redo route.

—

Ludovico

Can a physical standby database receive the redo SYNC if the Far Sync instance fails?

Posted on April 7, 2022 by Ludovico

The answer is YES.

In the following configuration, cdgsima_lhr1pq (primary) sends synchronously to cdgsima_farsync1 (far sync), which forwards the redo stream asynchronously to cdgsima_lhr1bm (physical standby):

DGMGRL> show configuration verbose

Configuration - cdgsima

  Protection Mode: MaxPerformance
  Members:
  cdgsima_lhr1pq   - Primary database
    cdgsima_farsync1 - Far sync instance
      cdgsima_lhr1bm   - Physical standby database
    cdgsima_lhr1bm   - Physical standby database (alternate of cdgsima_farsync1)

  Members Not Receiving Redo:
  cdgsima_farsync2 - Far sync instance

DGMGRL> show configuration verbose

Configuration - cdgsima

Protection Mode: MaxPerformance

Members:

cdgsima_lhr1pq - Primary database

cdgsima_farsync1 - Far sync instance

cdgsima_lhr1bm - Physical standby database

cdgsima_lhr1bm - Physical standby database (alternate of cdgsima_farsync1)

Members Not Receiving Redo:

cdgsima_farsync2 - Far sync instance

But if cdgsima_farsync1 is not available, I want the primary to send synchronously to the physical standby database. I accept a performance penalty, but I do not want to compromise my data protection.

I just need to set up the Redoroutes as follows:

-- when primary is cdgsima_lhr1pq 
EDIT DATABASE 'cdgsima_lhr1pq' SET PROPERTY 'RedoRoutes' = '(LOCAL : (cdgsima_farsync1 SYNC PRIORITY=1, cdgsima_lhr1bm SYNC PRIORITY=2 ))';
EDIT FAR_SYNC 'cdgsima_farsync1' SET PROPERTY 'RedoRoutes' = '(cdgsima_lhr1pq : cdgsima_lhr1bm ASYNC)';

-- when primary is cdgsima_lhr1bm
EDIT DATABASE 'cdgsima_lhr1bm' SET PROPERTY 'RedoRoutes' = '(LOCAL : (cdgsima_farsync2 SYNC PRIORITY=1, cdgsima_lhr1pq SYNC PRIORITY=2 ))';
EDIT FAR_SYNC 'cdgsima_farsync2' SET PROPERTY 'RedoRoutes' = '(cdgsima_lhr1bm : cdgsima_lhr1pq ASYNC)';

-- when primary is cdgsima_lhr1pq

EDIT DATABASE 'cdgsima_lhr1pq' SET PROPERTY 'RedoRoutes' = '(LOCAL : (cdgsima_farsync1 SYNC PRIORITY=1, cdgsima_lhr1bm SYNC PRIORITY=2 ))';

EDIT FAR_SYNC 'cdgsima_farsync1' SET PROPERTY 'RedoRoutes' = '(cdgsima_lhr1pq : cdgsima_lhr1bm ASYNC)';

-- when primary is cdgsima_lhr1bm

EDIT DATABASE 'cdgsima_lhr1bm' SET PROPERTY 'RedoRoutes' = '(LOCAL : (cdgsima_farsync2 SYNC PRIORITY=1, cdgsima_lhr1pq SYNC PRIORITY=2 ))';

EDIT FAR_SYNC 'cdgsima_farsync2' SET PROPERTY 'RedoRoutes' = '(cdgsima_lhr1bm : cdgsima_lhr1pq ASYNC)';

This is defined the second part of the RedoRoutes rules:

cdgsima_lhr1bm SYNC PRIORITY=2

1	cdgsima_lhr1bm SYNC PRIORITY=2

Let’s test. If I shutdown abort the farsync instance:

$ rlwrap sqlplus / as sysdba

SQL*Plus: Release 19.0.0.0.0 - Production on Sat Mar 26 10:55:31 2022
Version 19.13.0.0.0

Copyright (c) 1982, 2021, Oracle.  All rights reserved.


Connected to:
Oracle Database 19c EE Extreme Perf Release 19.0.0.0.0 - Production
Version 19.13.0.0.0

SQL> shutdown abort
ORACLE instance shut down.
SQL>

$ rlwrap sqlplus / as sysdba

SQL*Plus: Release 19.0.0.0.0 - Production on Sat Mar 26 10:55:31 2022

Version 19.13.0.0.0

Connected to:

Oracle Database 19c EE Extreme Perf Release 19.0.0.0.0 - Production

Version 19.13.0.0.0

SQL> shutdown abort

ORACLE instance shut down.

SQL>

I can see the new SYNC destination being open almost instantaneously (because the old destination fails immediately with ORA-03113):

2022-03-26T10:55:35.581460+00:00
LGWR (PID:42101): Attempting LAD:2 network reconnect (3113)
LGWR (PID:42101): LAD:2 network reconnect abandoned
2022-03-26T10:55:35.602542+00:00
Errors in file /u01/app/oracle/diag/rdbms/cdgsima_lhr1pq/cdgsima/trace/cdgsima_lgwr_42101.trc:
ORA-03113: end-of-file on communication channel
LGWR (PID:42101): Error 3113 for LNO:3 to 'dgsima1.dbdgsima.misclabs.oraclevcn.com:1521/cdgsima_farsync1.dbdgsima.misclabs.oraclevcn.com'
2022-03-26T10:55:35.608691+00:00
LGWR (PID:42101): LAD:2 is UNSYNCHRONIZED
2022-03-26T10:55:36.610098+00:00
LGWR (PID:42101): Failed to archive LNO:3 T-1.S-141, error=3113
LGWR (PID:42101): Error 1041 disconnecting from LAD:2 standby host 'dgsima1.dbdgsima.misclabs.oraclevcn.com:1521/cdgsima_farsync1.dbdgsima.misclabs.oraclevcn.com'
2022-03-26T10:55:37.143448+00:00
LGWR (PID:42101): LAD:3 is UNSYNCHRONIZED
2022-03-26T10:55:37.143569+00:00
LGWR (PID:42101): LAD:2 no longer supports SYNCHRONIZATION
Starting background process NSS3
2022-03-26T10:55:37.227954+00:00
NSS3 started with pid=38, OS id=78251
2022-03-26T10:55:40.733905+00:00
Thread 1 advanced to log sequence 142 (LGWR switch),  current SCN: 8068734
  Current log# 1 seq# 142 mem# 0: /u03/app/oracle/redo/CDGSIMA_LHR1PQ/onlinelog/o1_mf_1_k251hfvk_.log
2022-03-26T10:55:40.781499+00:00
ARC0 (PID:42266): Archived Log entry 220 added for T-1.S-141 ID 0x9eb046ef LAD:1
2022-03-26T10:55:41.606175+00:00
ALTER SYSTEM SET log_archive_dest_state_3='ENABLE' SCOPE=MEMORY SID='*';
2022-03-26T10:55:43.747483+00:00
LGWR (PID:42101): LAD:3 is SYNCHRONIZED
2022-03-26T10:55:43.816978+00:00
Thread 1 advanced to log sequence 143 (LGWR switch),  current SCN: 8068743
  Current log# 2 seq# 143 mem# 0: /u03/app/oracle/redo/CDGSIMA_LHR1PQ/onlinelog/o1_mf_2_k251hfwz_.log

2022-03-26T10:55:35.581460+00:00

LGWR (PID:42101): Attempting LAD:2 network reconnect (3113)

LGWR (PID:42101): LAD:2 network reconnect abandoned

2022-03-26T10:55:35.602542+00:00

Errors in file /u01/app/oracle/diag/rdbms/cdgsima_lhr1pq/cdgsima/trace/cdgsima_lgwr_42101.trc:

ORA-03113: end-of-file on communication channel

LGWR (PID:42101): Error 3113 for LNO:3 to 'dgsima1.dbdgsima.misclabs.oraclevcn.com:1521/cdgsima_farsync1.dbdgsima.misclabs.oraclevcn.com'

2022-03-26T10:55:35.608691+00:00

LGWR (PID:42101): LAD:2 is UNSYNCHRONIZED

2022-03-26T10:55:36.610098+00:00

LGWR (PID:42101): Failed to archive LNO:3 T-1.S-141, error=3113

LGWR (PID:42101): Error 1041 disconnecting from LAD:2 standby host 'dgsima1.dbdgsima.misclabs.oraclevcn.com:1521/cdgsima_farsync1.dbdgsima.misclabs.oraclevcn.com'

2022-03-26T10:55:37.143448+00:00

LGWR (PID:42101): LAD:3 is UNSYNCHRONIZED

2022-03-26T10:55:37.143569+00:00

LGWR (PID:42101): LAD:2 no longer supports SYNCHRONIZATION

Starting background process NSS3

2022-03-26T10:55:37.227954+00:00

NSS3 started with pid=38, OS id=78251

2022-03-26T10:55:40.733905+00:00

Thread 1 advanced to log sequence 142 (LGWR switch), current SCN: 8068734

Current log# 1 seq# 142 mem# 0: /u03/app/oracle/redo/CDGSIMA_LHR1PQ/onlinelog/o1_mf_1_k251hfvk_.log

2022-03-26T10:55:40.781499+00:00

ARC0 (PID:42266): Archived Log entry 220 added for T-1.S-141 ID 0x9eb046ef LAD:1

2022-03-26T10:55:41.606175+00:00

ALTER SYSTEM SET log_archive_dest_state_3='ENABLE' SCOPE=MEMORY SID='*';

2022-03-26T10:55:43.747483+00:00

LGWR (PID:42101): LAD:3 is SYNCHRONIZED

2022-03-26T10:55:43.816978+00:00

Thread 1 advanced to log sequence 143 (LGWR switch), current SCN: 8068743

Current log# 2 seq# 143 mem# 0: /u03/app/oracle/redo/CDGSIMA_LHR1PQ/onlinelog/o1_mf_2_k251hfwz_.log

Indeed, I can see the new NSS process (synchronous redo transport) spawn at that time:

SQL> r
  1  select NAME
  2  ,PID
  3  ,TYPE
  4  ,ROLE ACTION
  5  ,CLIENT_PID
  6  ,CLIENT_ROLE
  7  ,GROUP#
  8  ,RESETLOG_ID
  9  ,THREAD#
 10  ,SEQUENCE#
 11  ,BLOCK#
 12* from v$dataguard_process where name like 'NSS%'

NAME  PID                      TYP ACTION                   CLIENT_PID CLIENT_ROLE          GROUP# RESETLOG_ID    THREAD#  SEQUENCE#     BLOCK#
----- ------------------------ --- ------------------------ ---------- ---------------- ---------- ----------- ---------- ---------- ----------
NSS2  54961                    KSB sync                              0 none                      0           0          0          0          0
NSS3  78251                    KSB sync                              0 none                      0           0          0          0          0

SQL> !ps -eaf | grep ora_nss
oracle   54961     1  0 Mar10 ?        00:00:55 ora_nss2_cdgsima
oracle   78251     1  0 10:55 ?        00:00:00 ora_nss3_cdgsima

SQL> r

1 select NAME

2 ,PID

3 ,TYPE

4 ,ROLE ACTION

5 ,CLIENT_PID

6 ,CLIENT_ROLE

7 ,GROUP#

8 ,RESETLOG_ID

9 ,THREAD#

10 ,SEQUENCE#

11 ,BLOCK#

12* from v$dataguard_process where name like 'NSS%'

NAME PID TYP ACTION CLIENT_PID CLIENT_ROLE GROUP# RESETLOG_ID THREAD# SEQUENCE# BLOCK#

----- ------------------------ --- ------------------------ ---------- ---------------- ---------- ----------- ---------- ---------- ----------

NSS2 54961 KSB sync 0 none 0 0 0 0 0

NSS3 78251 KSB sync 0 none 0 0 0 0 0

SQL> !ps -eaf | grep ora_nss

oracle 54961 1 0 Mar10 ? 00:00:55 ora_nss2_cdgsima

oracle 78251 1 0 10:55 ? 00:00:00 ora_nss3_cdgsima

—

Ludo

Can I rename a PDB in a Data Guard configuration?

Posted on November 21, 2021 by Ludovico

Someone asked me this question recently.

The answer is: yes!

Let’s see it in action.

On the primary I have:

----- PRIMARY
SQL> show pdbs;

    CON_ID CON_NAME                       OPEN MODE  RESTRICTED
---------- ------------------------------ ---------- ----------
         2 PDB$SEED                       READ ONLY  NO
         3 RED                            READ WRITE NO
         4 SAND                           READ WRITE NO

----- PRIMARY

SQL> show pdbs;

CON_ID CON_NAME OPEN MODE RESTRICTED

---------- ------------------------------ ---------- ----------

2 PDB$SEED READ ONLY NO

3 RED READ WRITE NO

4 SAND READ WRITE NO

And of course the same PDBs on the standby:

----- STANDBY
SQL> show pdbs

    CON_ID CON_NAME                       OPEN MODE  RESTRICTED
---------- ------------------------------ ---------- ----------
         2 PDB$SEED                       MOUNTED
         3 RED                            MOUNTED
         4 SAND                           MOUNTED

----- STANDBY

SQL> show pdbs

CON_ID CON_NAME OPEN MODE RESTRICTED

---------- ------------------------------ ---------- ----------

2 PDB$SEED MOUNTED

3 RED MOUNTED

4 SAND MOUNTED

Let’s change the PDB RED name to TOBY: The PDB rename operation is straightforward (but it requires a brief downtime). To be done on the primary:

SQL> alter pluggable database red close;

Pluggable database altered.

SQL> alter pluggable database red open restricted;

Pluggable database altered.

SQL> alter session set container=red;

Session altered.

SQL> alter pluggable database rename global_name to toby;

Pluggable database altered.

SQL> alter session set container=cdb$root;

Session altered.

SQL> show pdbs

    CON_ID CON_NAME                       OPEN MODE  RESTRICTED
---------- ------------------------------ ---------- ----------
         2 PDB$SEED                       READ ONLY  NO
         3 TOBY                           READ WRITE YES
         4 SAND                           READ WRITE NO

SQL> alter pluggable database toby close;

Pluggable database altered.


SQL> alter pluggable database toby open;

Pluggable database altered.

SQL>

SQL> alter pluggable database red close;

Pluggable database altered.

SQL> alter pluggable database red open restricted;

Pluggable database altered.

SQL> alter session set container=red;

Session altered.

SQL> alter pluggable database rename global_name to toby;

Pluggable database altered.

SQL> alter session set container=cdb$root;

Session altered.

SQL> show pdbs

CON_ID CON_NAME OPEN MODE RESTRICTED

---------- ------------------------------ ---------- ----------

2 PDB$SEED READ ONLY NO

3 TOBY READ WRITE YES

4 SAND READ WRITE NO

SQL> alter pluggable database toby close;

Pluggable database altered.

SQL> alter pluggable database toby open;

Pluggable database altered.

SQL>

On the standby, I can see that the PDB changed its name:

SQL> show pdbs

    CON_ID CON_NAME                       OPEN MODE  RESTRICTED
---------- ------------------------------ ---------- ----------
         2 PDB$SEED                       MOUNTED
         3 TOBY                           MOUNTED
         4 SAND                           MOUNTED
SQL>

SQL> show pdbs

CON_ID CON_NAME OPEN MODE RESTRICTED

---------- ------------------------------ ---------- ----------

2 PDB$SEED MOUNTED

3 TOBY MOUNTED

4 SAND MOUNTED

SQL>

The PDB name change is propagated transparently with the redo apply.

—

Ludo

Data Guard, Easy Connect and the Observer for multiple configurations

Posted on August 14, 2020 by Ludovico

EZConnect

One of the challenges of automation in bin Oracle Environments is dealing with tnsnames.ora files.
These files might grow big and are sometimes hard to distribute/maintain properly.
The worst is when manual modifications are needed: manual operations, if not made carefully, can screw up the connection to the databases.
The best solution is always using LDAP naming resolution. I have seen customers using OID, OUD, Active Directory, openldapd, all with a great level of control and automation. However, some customer don’t have/want this possibility and keep relying on TNS naming resolution.
When Data Guard (and eventually RAC) are in place, the tnsnames.ora gets filled by entries for the DGConnectIdentifiers and StaticConnectIdentifier. If I add the observer, an additional entry is required to access the dbname_CFG service created by the Fast Start Failover.

Actually, all these entries are not required if I use Easy Connect.

My friend Franck Pachot wrote a couple of nice blog posts about Easy Connect while working with me at CERN:
https://medium.com/@FranckPachot/19c-easy-connect-e0c3b77968d7

https://medium.com/@FranckPachot/19c-ezconnect-and-wallet-easy-connect-and-external-password-file-8e326bb8c9f5

Basic Data Guard configuration

The basic configuration with Data Guard is quite simple to achieve with Easy Connect. In this examples I have:
– The primary database TOOLCDB1_SITE1
– The duplicated database for standby TOOLCDB1_SITE2

After setting up the static registration (no Grid Infrastructure in my lab):

SID_LIST_LISTENER=
  (SID_LIST=
    (SID_DESC=
      (GLOBAL_DBNAME=TOOLCDB1_SITE1_DGMGRL)
      (SID_NAME=TOOLCDB1)
      (ORACLE_HOME=/u01/app/oracle/product/db_19_8_0)
    )
  )

SID_LIST_LISTENER=

(SID_LIST=

(SID_DESC=

(GLOBAL_DBNAME=TOOLCDB1_SITE1_DGMGRL)

(SID_NAME=TOOLCDB1)

(ORACLE_HOME=/u01/app/oracle/product/db_19_8_0)

)

and copying the passwordfile, the configuration can be created with:

DGMGRL> create configuration TOOLCDB1 as primary database is TOOLCDB1_SITE1 connect identifier is 'newbox01:1521/TOOLCDB1_SITE1';
Configuration "toolcdb1" created with primary database "toolcdb1_site1"

DGMGRL>  edit database TOOLCDB1_SITE1 set property 'StaticConnectIdentifier'='newbox01:1521/TOOLCDB1_SITE1_DGMGRL';
Property "StaticConnectIdentifier" updated

DGMGRL>  add database TOOLCDB1_SITE2 as connect identifier is 'newbox02:1521/TOOLCDB1_SITE2';
Database "toolcdb1_site2" added

DGMGRL>  edit database TOOLCDB1_SITE2 set property 'StaticConnectIdentifier'='newbox02:1521/TOOLCDB1_SITE2_DGMGRL';
Property "StaticConnectIdentifier" updated

DGMGRL>  enable configuration;
Enabled.

DGMGRL> create configuration TOOLCDB1 as primary database is TOOLCDB1_SITE1 connect identifier is 'newbox01:1521/TOOLCDB1_SITE1';

Configuration "toolcdb1" created with primary database "toolcdb1_site1"

DGMGRL> edit database TOOLCDB1_SITE1 set property 'StaticConnectIdentifier'='newbox01:1521/TOOLCDB1_SITE1_DGMGRL';

Property "StaticConnectIdentifier" updated

DGMGRL> add database TOOLCDB1_SITE2 as connect identifier is 'newbox02:1521/TOOLCDB1_SITE2';

Database "toolcdb1_site2" added

DGMGRL> edit database TOOLCDB1_SITE2 set property 'StaticConnectIdentifier'='newbox02:1521/TOOLCDB1_SITE2_DGMGRL';

Property "StaticConnectIdentifier" updated

DGMGRL> enable configuration;

Enabled.

That’s it.

Now, if I want to have the configuration observed, I need to activate the Fast Start Failover:

DGMGRL> edit database toolcdb1_site1 set property LogXptMode='SYNC';
Property "logxptmode" updated

DGMGRL> edit database toolcdb1_site2 set property LogXptMode='SYNC';
Property "logxptmode" updated

DGMGRL> edit database toolcdb1_site1 set property FastStartFailoverTarget='toolcdb1_site2';
Property "faststartfailovertarget" updated

DGMGRL> edit database toolcdb1_site2 set property FastStartFailoverTarget='toolcdb1_site1';
Property "faststartfailovertarget" updated

DGMGRL> edit configuration set protection mode as maxavailability;
Succeeded.

DGMGRL> enable fast_start failover;
Enabled in Zero Data Loss Mode.

DGMGRL> edit database toolcdb1_site1 set property LogXptMode='SYNC';

Property "logxptmode" updated

DGMGRL> edit database toolcdb1_site2 set property LogXptMode='SYNC';

Property "logxptmode" updated

DGMGRL> edit database toolcdb1_site1 set property FastStartFailoverTarget='toolcdb1_site2';

Property "faststartfailovertarget" updated

DGMGRL> edit database toolcdb1_site2 set property FastStartFailoverTarget='toolcdb1_site1';

Property "faststartfailovertarget" updated

DGMGRL> edit configuration set protection mode as maxavailability;

Succeeded.

DGMGRL> enable fast_start failover;

Enabled in Zero Data Loss Mode.

With just two databases, FastStartFailoverTarget is not explicitly needed, but I usually do it as other databases might be added to the configuration in the future.
After that, the broker complains that FSFO is enabled but there is no observer yet:

DGMGRL> show fast_start failover;

Fast-Start Failover: Enabled in Zero Data Loss Mode

  Protection Mode:    MaxAvailability
  Lag Limit:          0 seconds

  Threshold:          180 seconds
  Active Target:      toolcdb1_site2
  Potential Targets:  "toolcdb1_site2"
    toolcdb1_site2 valid
  Observer:           (none)
  Shutdown Primary:   TRUE
  Auto-reinstate:     TRUE
  Observer Reconnect: 180 seconds
  Observer Override:  FALSE

Configurable Failover Conditions
  Health Conditions:
    Corrupted Controlfile          YES
    Corrupted Dictionary           YES
    Inaccessible Logfile            NO
    Stuck Archiver                  NO
    Datafile Write Errors          YES

  Oracle Error Conditions:
    (none)


DGMGRL> show configuration;

Configuration - toolcdb1

  Protection Mode: MaxAvailability
  Members:
  toolcdb1_site1 - Primary database
    Warning: ORA-16819: fast-start failover observer not started

    toolcdb1_site2 - (*) Physical standby database

Fast-Start Failover: Enabled in Zero Data Loss Mode

Configuration Status:
WARNING   (status updated 39 seconds ago)

DGMGRL> show fast_start failover;

Fast-Start Failover: Enabled in Zero Data Loss Mode

Protection Mode: MaxAvailability

Lag Limit: 0 seconds

Threshold: 180 seconds

Active Target: toolcdb1_site2

Potential Targets: "toolcdb1_site2"

toolcdb1_site2 valid

Observer: (none)

Shutdown Primary: TRUE

Auto-reinstate: TRUE

Observer Reconnect: 180 seconds

Observer Override: FALSE

Configurable Failover Conditions

Health Conditions:

Corrupted Controlfile YES

Corrupted Dictionary YES

Inaccessible Logfile NO

Stuck Archiver NO

Datafile Write Errors YES

Oracle Error Conditions:

(none)

DGMGRL> show configuration;

Configuration - toolcdb1

Protection Mode: MaxAvailability

Members:

toolcdb1_site1 - Primary database

Warning: ORA-16819: fast-start failover observer not started

toolcdb1_site2 - (*) Physical standby database

Fast-Start Failover: Enabled in Zero Data Loss Mode

Configuration Status:

WARNING (status updated 39 seconds ago)

Observer for multiple configurations

This feature has been introduced in 12.2 but it is still not widely used.
Before 12.2, the Observer was a foreground process: the DBAs had to start it in a wrapper script executed with nohup in order to keep it live.
Since 12.2, the observer can run as a background process as far as there is a valid wallet for the connection to the databases.
Also, 12.2 introduced the capability of starting multiple configurations with a single dgmgrl command: “START OBSERVING”.

For more information about it, you can check the documentation here:
https://docs.oracle.com/en/database/oracle/oracle-database/19/dgbkr/using-data-guard-broker-to-manage-switchovers-failovers.html#GUID-BC513CDB-1E06-4EB3-9FE1-E1331E15E492

How to set it up with Easy Connect?

First, I need a wallet. And here comes the first compromise:
Having a single dgmgrl session to start all my configurations means that I have a single wallet for all the databases that I want to observe.
Fair enough, all the DBs (CDBs?) are managed by the same team in this case.
If I have only observers on my host I can easily point to the wallet from my central sqlnet.ora:

WALLET_LOCATION =
   (SOURCE =
      (METHOD = FILE)
      (METHOD_DATA = (DIRECTORY = /u01/app/oracle/admin/observers/wallet))
  )
SQLNET.WALLET_OVERRIDE = TRUE

WALLET_LOCATION =

(SOURCE =

(METHOD = FILE)

(METHOD_DATA = (DIRECTORY = /u01/app/oracle/admin/observers/wallet))

)

SQLNET.WALLET_OVERRIDE = TRUE

Otherwise I need to create a separate TNS_ADMIN for my observer management environment.
Then, I create the wallet:

$ WALLET_DIR=$ORACLE_BASE/admin/observers/wallet
$ mkdir -p $WALLET_DIR
$ orapki wallet create -wallet $WALLET_DIR -auto_login_local -pwd Password2020
Oracle PKI Tool Release 21.0.0.0.0 - Production
Version 21.0.0.0.0
Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.

Operation is successfully completed.

$ WALLET_DIR=$ORACLE_BASE/admin/observers/wallet

$ mkdir -p $WALLET_DIR

$ orapki wallet create -wallet $WALLET_DIR -auto_login_local -pwd Password2020

Oracle PKI Tool Release 21.0.0.0.0 - Production

Version 21.0.0.0.0

Operation is successfully completed.

Now I need to add the connection descriptors.

Which connection descriptors do I need?
The Observer uses the DGConnectIdentifier to keep observing the databases, but needs a connection to both of them using the TOOLCDB1_CFG service (unless I specify something different with the broker configuration property ConfigurationWideServiceName) to connect to the configuration and get the DGConnectIdentifier information. Again, you can check it in the doc. or the note Oracle 12.2 – Simplified OBSERVER Management for Multiple Fast-Start Failover Configurations (Doc ID 2285891.1)

So I need to specify three secrets for three connection descriptors:

$ mkstore -wrl "$TNS_ADMIN" -createCredential newbox01,newbox02:1521/TOOLCDB1_CFG sysdg
Oracle Secret Store Tool Release 21.0.0.0.0 - Production
Version 21.0.0.0.0
Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.

Your secret/Password is missing in the command line
Enter your secret/Password:
Re-enter your secret/Password:
Enter wallet password:

$ mkstore -wrl "$TNS_ADMIN" -createCredential newbox01:1521/TOOLCDB1_SITE1 sysdg
Oracle Secret Store Tool Release 21.0.0.0.0 - Production
Version 21.0.0.0.0
Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.

Your secret/Password is missing in the command line
Enter your secret/Password:
Re-enter your secret/Password:
Enter wallet password:


$ mkstore -wrl "$TNS_ADMIN" -createCredential newbox02:1521/TOOLCDB1_SITE2 sysdg
Oracle Secret Store Tool Release 21.0.0.0.0 - Production
Version 21.0.0.0.0
Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.

Your secret/Password is missing in the command line
Enter your secret/Password:
Re-enter your secret/Password:
Enter wallet password:

$ mkstore -wrl "$TNS_ADMIN" -createCredential newbox01,newbox02:1521/TOOLCDB1_CFG sysdg

Oracle Secret Store Tool Release 21.0.0.0.0 - Production

Version 21.0.0.0.0

Your secret/Password is missing in the command line

Enter your secret/Password:

Re-enter your secret/Password:

Enter wallet password:

$ mkstore -wrl "$TNS_ADMIN" -createCredential newbox01:1521/TOOLCDB1_SITE1 sysdg

Oracle Secret Store Tool Release 21.0.0.0.0 - Production

Version 21.0.0.0.0

Your secret/Password is missing in the command line

Enter your secret/Password:

Re-enter your secret/Password:

Enter wallet password:

$ mkstore -wrl "$TNS_ADMIN" -createCredential newbox02:1521/TOOLCDB1_SITE2 sysdg

Oracle Secret Store Tool Release 21.0.0.0.0 - Production

Version 21.0.0.0.0

Your secret/Password is missing in the command line

Enter your secret/Password:

Re-enter your secret/Password:

Enter wallet password:

The first one will be used for the initial connection. The other two to observe the Primary and Standby.
I need to be careful that the first EZConnect descriptor matches EXACTLY what I put in observer.ora (see next step) and the last two match my DGConnectIdentifier (unless I specify something different with ObserverConnectIdentifier), otherwise I will get some errors and the observer will not observe correctly (or will not start at all).

The dgmgrl needs then a file named observer.ora.
$ORACLE_BASE/admin/observers or the central TNS_ADMIN would be good locations, but what if I have observers that must be started from multiple Oracle Homes?
In that case, having a observer.ora in $ORACLE_HOME/network/admin (or $ORACLE_BASE/homes/{OHNAME}/network/admin/ if Read-Only Oracle Home is enabled) would be a better solution: in this case I would need to start one session per Oracle Home

The content of my observer.ora must be something like:

BROKER_CONFIGS=
   (
     (CONFIG=
       (NAME=TOOLCDB1)
       (CONNECT_ID=newbox01,newbox02:1521/TOOLCDB1_CFG)
       (CONFIG_HOME=/export/soft/oracle/admin/TOOLCDB1/observer)
     )
   )

BROKER_CONFIGS=

(

(CONFIG=

(NAME=TOOLCDB1)

(CONNECT_ID=newbox01,newbox02:1521/TOOLCDB1_CFG)

(CONFIG_HOME=/export/soft/oracle/admin/TOOLCDB1/observer)

)

This is the example for my configuration, but I can put as many (CONFIG=…) as I want in order to observe multiple configurations.
Then, if everything is configured properly, I can start all the observers with a single command:

DGMGRL> SET OBSERVERCONFIGFILE=/u01/app/oracle/admin/observers/observer.ora
DGMGRL> START OBSERVING
ObserverConfigFile=observer.ora
observer configuration file parsing succeeded
Submitted command "START OBSERVER" using connect identifier "newbox01,newbox02:1521/TOOLCDB1_CFG"

Check superobserver.log, individual observer logs and Data Guard Broker logs for execution details.

DGMGRL> show observers
ObserverConfigFile=/u01/app/oracle/admin/observers/observer.ora
observer configuration file parsing succeeded
Submitted command "SHOW OBSERVER" using connect identifier "newbox01,newbox02:1521/TOOLCDB1_CFG"
Connected to "TOOLCDB1_SITE2"

Configuration - toolcdb1

  Primary:            toolcdb1_site1
  Active Target:      toolcdb1_site2

Observer "newbox03.trivadistraining.com1" - Master

  Host Name:                    newbox03.trivadistraining.com
  Last Ping to Primary:         1 second ago
  Last Ping to Target:          2 seconds ago

DGMGRL> SET OBSERVERCONFIGFILE=/u01/app/oracle/admin/observers/observer.ora

DGMGRL> START OBSERVING

ObserverConfigFile=observer.ora

observer configuration file parsing succeeded

Submitted command "START OBSERVER" using connect identifier "newbox01,newbox02:1521/TOOLCDB1_CFG"

Check superobserver.log, individual observer logs and Data Guard Broker logs for execution details.

DGMGRL> show observers

ObserverConfigFile=/u01/app/oracle/admin/observers/observer.ora

observer configuration file parsing succeeded

Submitted command "SHOW OBSERVER" using connect identifier "newbox01,newbox02:1521/TOOLCDB1_CFG"

Connected to "TOOLCDB1_SITE2"

Configuration - toolcdb1

Primary: toolcdb1_site1

Active Target: toolcdb1_site2

Observer "newbox03.trivadistraining.com1" - Master

Host Name: newbox03.trivadistraining.com

Last Ping to Primary: 1 second ago

Last Ping to Target: 2 seconds ago

Troubleshooting

If the observer does not work, sometimes it is not easy to understand the cause.

Has SYSDG been granted to SYSDG user? Is SYSDG account unlocked?
Does sqlnet.ora contain the correct wallet location?
Is the wallet accessible in autologin?
Are the entries in the wallet correct? (check with “sqlplus /@connstring as sysdg”)

Missing pieces

Here, a few features that I think would be a nice addition in the future:

Awareness for the ORACLE_HOME to be used for each observer
Possibility to specify a different TNS_ADMIN per observer (different wallets)
Integration with Grid Infrastructure (srvctl add observer…) and support for multiple observers

—

Ludovico

Real-Time Cascade Standby Container Databases without Oracle Managed Files

Posted on July 10, 2020 by Ludovico

OK, the title might not be the best… I just would like to add more detail to content you can already find in other blogs (E.g. this nice one from Philippe Fierens http://pfierens.blogspot.com/2020/04/19c-data-guard-series-part-iii-adding.html).

I have this Cascade Standby configuration:

DGMGRL> connect /
Connected to "TOOLCDB1_SITE1"
Connected as SYSDG.
DGMGRL> show configuration;

Configuration - toolcdb1

  Protection Mode: MaxPerformance
  Members:
  toolcdb1_site1 - Primary database
    toolcdb1_site2 - Physical standby database
      toolcdx1_site2 - Physical standby database (receiving current redo)

Fast-Start Failover:  Disabled

Configuration Status:
SUCCESS   (status updated 42 seconds ago)

DGMGRL> connect /

Connected to "TOOLCDB1_SITE1"

Connected as SYSDG.

DGMGRL> show configuration;

Configuration - toolcdb1

Protection Mode: MaxPerformance

Members:

toolcdb1_site1 - Primary database

toolcdb1_site2 - Physical standby database

toolcdx1_site2 - Physical standby database (receiving current redo)

Fast-Start Failover: Disabled

Configuration Status:

SUCCESS (status updated 42 seconds ago)

Years ago I wrote this whitepaper about cascaded standbys:
https://fr.slideshare.net/ludovicocaldara/2014-603-caldarappr
While it is still relevant for non-CDBs, things have changed with Multitenant architecture.

In my config, the Oracle Database version is 19.7 and the databases are actually CDBs. No Grid Infrastructure, non-OMF datafiles.
It is important to highlight that a lot of things have changed since 12.1. And because 19c is the LTS version now, it does not make sense to try anything older.

First, I just want to make sure that my standbys are aligned.

Primary:

alter system switch logfile;

1	alter system switch logfile;

1st Standby alert log:

2020-07-07T10:20:23.370868+02:00
 rfs (PID:6408): Archived Log entry 58 added for B-1044796516.T-1.S-39 ID 0xf15601c6 LAD:2
 rfs (PID:6408): No SRLs available for T-1
2020-07-07T10:20:23.386410+02:00
 rfs (PID:6408): Opened log for T-1.S-40 dbid 4048667172 branch 1044796516
2020-07-07T10:20:24.552766+02:00
PR00 (PID:6478): Media Recovery Log /u03/oradata/fra/TOOLCDB1_SITE2/archivelog/2020_07_07/o1_mf_1_39_hj8cs7vo_.arc
PR00 (PID:6478): Media Recovery Waiting for T-1.S-40 (in transit)

2020-07-07T10:20:23.370868+02:00

rfs (PID:6408): Archived Log entry 58 added for B-1044796516.T-1.S-39 ID 0xf15601c6 LAD:2

rfs (PID:6408): No SRLs available for T-1

2020-07-07T10:20:23.386410+02:00

rfs (PID:6408): Opened log for T-1.S-40 dbid 4048667172 branch 1044796516

2020-07-07T10:20:24.552766+02:00

PR00 (PID:6478): Media Recovery Log /u03/oradata/fra/TOOLCDB1_SITE2/archivelog/2020_07_07/o1_mf_1_39_hj8cs7vo_.arc

PR00 (PID:6478): Media Recovery Waiting for T-1.S-40 (in transit)

2nd Standby alert log:

2020-07-07T10:20:31.051281+02:00
 rfs (PID:6498): Opened log for T-1.S-39 dbid 4048667172 branch 1044796516
2020-07-07T10:20:31.150748+02:00
 rfs (PID:6498): Archived Log entry 38 added for B-1044796516.T-1.S-39 ID 0xf15601c6 LAD:2
2020-07-07T10:20:31.862337+02:00
PR00 (PID:6718): Media Recovery Log /u03/oradata/fra/TOOLCDX1_SITE2/archivelog/2020_07_07/o1_mf_1_39_hj8d2h1k_.arc
PR00 (PID:6718): Media Recovery Waiting for T-1.S-40

2020-07-07T10:20:31.051281+02:00

rfs (PID:6498): Opened log for T-1.S-39 dbid 4048667172 branch 1044796516

2020-07-07T10:20:31.150748+02:00

rfs (PID:6498): Archived Log entry 38 added for B-1044796516.T-1.S-39 ID 0xf15601c6 LAD:2

2020-07-07T10:20:31.862337+02:00

PR00 (PID:6718): Media Recovery Log /u03/oradata/fra/TOOLCDX1_SITE2/archivelog/2020_07_07/o1_mf_1_39_hj8d2h1k_.arc

PR00 (PID:6718): Media Recovery Waiting for T-1.S-40

Then, I create a pluggable database (from PDB$SEED):

SQL>         CREATE PLUGGABLE DATABASE LATERALUS ADMIN USER PDBADMIN IDENTIFIED BY "NfrwTgbjwq7MbPNT92cH"  ROLES=(DBA)
  2                  FILE_NAME_CONVERT=('/pdbseed/','/LATERALUS/')
  3                  DEFAULT TABLESPACE USERS DATAFILE '/u02/oradata/TOOLCDB1/data/LATERALUS/USERS01.dbf' SIZE 50M AUTOEXTEND ON NEXT 50M MAXSIZE 1G;

Pluggable database created.

SQL>         ALTER PLUGGABLE DATABASE LATERALUS OPEN;

Pluggable database altered.

SQL>         ALTER PLUGGABLE DATABASE LATERALUS SAVE STATE;

Pluggable database altered.

SQL> CREATE PLUGGABLE DATABASE LATERALUS ADMIN USER PDBADMIN IDENTIFIED BY "NfrwTgbjwq7MbPNT92cH" ROLES=(DBA)

2 FILE_NAME_CONVERT=('/pdbseed/','/LATERALUS/')

3 DEFAULT TABLESPACE USERS DATAFILE '/u02/oradata/TOOLCDB1/data/LATERALUS/USERS01.dbf' SIZE 50M AUTOEXTEND ON NEXT 50M MAXSIZE 1G;

Pluggable database created.

SQL> ALTER PLUGGABLE DATABASE LATERALUS OPEN;

Pluggable database altered.

SQL> ALTER PLUGGABLE DATABASE LATERALUS SAVE STATE;

Pluggable database altered.

On the first standby I get:

2020-07-07T10:23:33.148457+02:00
 rfs (PID:6408): Archived Log entry 60 added for B-1044796516.T-1.S-40 ID 0xf15601c6 LAD:2
 rfs (PID:6408): No SRLs available for T-1
2020-07-07T10:23:33.184335+02:00
 rfs (PID:6408): Opened log for T-1.S-41 dbid 4048667172 branch 1044796516
2020-07-07T10:23:33.887665+02:00
PR00 (PID:6478): Media Recovery Log /u03/oradata/fra/TOOLCDB1_SITE2/archivelog/2020_07_07/o1_mf_1_40_hj8d27d0_.arc
Recovery created pluggable database LATERALUS
Recovery copied files for tablespace SYSTEM
Recovery successfully copied file /u02/oradata/TOOLCDB1/data/LATERALUS/system01.dbf from /u02/oradata/TOOLCDB1/data/pdbseed/system01.dbf
LATERALUS(4):WARNING: File being created with same name as in Primary
LATERALUS(4):Existing file may be overwritten
LATERALUS(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/system01.dbf
LATERALUS(4):Successfully added datafile 16 to media recovery
LATERALUS(4):Datafile #16: '/u02/oradata/TOOLCDB1/data/LATERALUS/system01.dbf'
2020-07-07T10:23:35.846985+02:00
Recovery copied files for tablespace SYSAUX
Recovery successfully copied file /u02/oradata/TOOLCDB1/data/LATERALUS/sysaux01.dbf from /u02/oradata/TOOLCDB1/data/pdbseed/sysaux01.dbf
LATERALUS(4):WARNING: File being created with same name as in Primary
LATERALUS(4):Existing file may be overwritten
LATERALUS(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/sysaux01.dbf
LATERALUS(4):Successfully added datafile 17 to media recovery
LATERALUS(4):Datafile #17: '/u02/oradata/TOOLCDB1/data/LATERALUS/sysaux01.dbf'
2020-07-07T10:23:41.004383+02:00
Recovery copied files for tablespace UNDOTBS1
Recovery successfully copied file /u02/oradata/TOOLCDB1/data/LATERALUS/undotbs01.dbf from /u02/oradata/TOOLCDB1/data/pdbseed/undotbs01.dbf
LATERALUS(4):WARNING: File being created with same name as in Primary
LATERALUS(4):Existing file may be overwritten
LATERALUS(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/undotbs01.dbf
LATERALUS(4):Successfully added datafile 18 to media recovery
LATERALUS(4):Datafile #18: '/u02/oradata/TOOLCDB1/data/LATERALUS/undotbs01.dbf'
2020-07-07T10:23:42.191607+02:00
(4):WARNING: File being created with same name as in Primary
(4):Existing file may be overwritten
(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/USERS01.dbf
(4):Successfully added datafile 19 to media recovery
(4):Datafile #19: '/u02/oradata/TOOLCDB1/data/LATERALUS/USERS01.dbf'
PR00 (PID:6478): Media Recovery Waiting for T-1.S-41 (in transit)

2020-07-07T10:23:33.148457+02:00

rfs (PID:6408): Archived Log entry 60 added for B-1044796516.T-1.S-40 ID 0xf15601c6 LAD:2

rfs (PID:6408): No SRLs available for T-1

2020-07-07T10:23:33.184335+02:00

rfs (PID:6408): Opened log for T-1.S-41 dbid 4048667172 branch 1044796516

2020-07-07T10:23:33.887665+02:00

PR00 (PID:6478): Media Recovery Log /u03/oradata/fra/TOOLCDB1_SITE2/archivelog/2020_07_07/o1_mf_1_40_hj8d27d0_.arc

Recovery created pluggable database LATERALUS

Recovery copied files for tablespace SYSTEM

Recovery successfully copied file /u02/oradata/TOOLCDB1/data/LATERALUS/system01.dbf from /u02/oradata/TOOLCDB1/data/pdbseed/system01.dbf

LATERALUS(4):WARNING: File being created with same name as in Primary

LATERALUS(4):Existing file may be overwritten

LATERALUS(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/system01.dbf

LATERALUS(4):Successfully added datafile 16 to media recovery

LATERALUS(4):Datafile #16: '/u02/oradata/TOOLCDB1/data/LATERALUS/system01.dbf'

2020-07-07T10:23:35.846985+02:00

Recovery copied files for tablespace SYSAUX

Recovery successfully copied file /u02/oradata/TOOLCDB1/data/LATERALUS/sysaux01.dbf from /u02/oradata/TOOLCDB1/data/pdbseed/sysaux01.dbf

LATERALUS(4):WARNING: File being created with same name as in Primary

LATERALUS(4):Existing file may be overwritten

LATERALUS(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/sysaux01.dbf

LATERALUS(4):Successfully added datafile 17 to media recovery

LATERALUS(4):Datafile #17: '/u02/oradata/TOOLCDB1/data/LATERALUS/sysaux01.dbf'

2020-07-07T10:23:41.004383+02:00

Recovery copied files for tablespace UNDOTBS1

Recovery successfully copied file /u02/oradata/TOOLCDB1/data/LATERALUS/undotbs01.dbf from /u02/oradata/TOOLCDB1/data/pdbseed/undotbs01.dbf

LATERALUS(4):WARNING: File being created with same name as in Primary

LATERALUS(4):Existing file may be overwritten

LATERALUS(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/undotbs01.dbf

LATERALUS(4):Successfully added datafile 18 to media recovery

LATERALUS(4):Datafile #18: '/u02/oradata/TOOLCDB1/data/LATERALUS/undotbs01.dbf'

2020-07-07T10:23:42.191607+02:00

(4):WARNING: File being created with same name as in Primary

(4):Existing file may be overwritten

(4):Recovery created file /u02/oradata/TOOLCDB1/data/LATERALUS/USERS01.dbf

(4):Successfully added datafile 19 to media recovery

(4):Datafile #19: '/u02/oradata/TOOLCDB1/data/LATERALUS/USERS01.dbf'

PR00 (PID:6478): Media Recovery Waiting for T-1.S-41 (in transit)

On the second:

2020-07-07T10:24:31.393410+02:00
 rfs (PID:6500): Opened log for T-1.S-40 dbid 4048667172 branch 1044796516
2020-07-07T10:24:31.460391+02:00
 rfs (PID:6500): Archived Log entry 39 added for B-1044796516.T-1.S-40 ID 0xf15601c6 LAD:2
2020-07-07T10:24:32.360726+02:00
PR00 (PID:6718): Media Recovery Log /u03/oradata/fra/TOOLCDX1_SITE2/archivelog/2020_07_07/o1_mf_1_40_hj8d9zd7_.arc
Recovery created pluggable database LATERALUS
2020-07-07T10:24:36.000250+02:00
Recovery copied files for tablespace SYSTEM
Recovery successfully copied file /u02/oradata/TOOLCDX1/data/LATERALUS/system01.dbf from /u02/oradata/TOOLCDX1/data/pdbseed/system01.dbf
LATERALUS(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/system01.dbf
LATERALUS(4):Successfully added datafile 16 to media recovery
LATERALUS(4):Datafile #16: '/u02/oradata/TOOLCDX1/data/LATERALUS/system01.dbf'
2020-07-07T10:24:40.657596+02:00
Recovery copied files for tablespace SYSAUX
Recovery successfully copied file /u02/oradata/TOOLCDX1/data/LATERALUS/sysaux01.dbf from /u02/oradata/TOOLCDX1/data/pdbseed/sysaux01.dbf
LATERALUS(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/sysaux01.dbf
LATERALUS(4):Successfully added datafile 17 to media recovery
LATERALUS(4):Datafile #17: '/u02/oradata/TOOLCDX1/data/LATERALUS/sysaux01.dbf'
2020-07-07T10:24:47.688298+02:00
Recovery copied files for tablespace UNDOTBS1
Recovery successfully copied file /u02/oradata/TOOLCDX1/data/LATERALUS/undotbs01.dbf from /u02/oradata/TOOLCDX1/data/pdbseed/undotbs01.dbf
LATERALUS(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/undotbs01.dbf
LATERALUS(4):Successfully added datafile 18 to media recovery
LATERALUS(4):Datafile #18: '/u02/oradata/TOOLCDX1/data/LATERALUS/undotbs01.dbf'
(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/USERS01.dbf
(4):Successfully added datafile 19 to media recovery
(4):Datafile #19: '/u02/oradata/TOOLCDX1/data/LATERALUS/USERS01.dbf'
2020-07-07T10:24:48.924510+02:00
PR00 (PID:6718): Media Recovery Waiting for T-1.S-41

2020-07-07T10:24:31.393410+02:00

rfs (PID:6500): Opened log for T-1.S-40 dbid 4048667172 branch 1044796516

2020-07-07T10:24:31.460391+02:00

rfs (PID:6500): Archived Log entry 39 added for B-1044796516.T-1.S-40 ID 0xf15601c6 LAD:2

2020-07-07T10:24:32.360726+02:00

PR00 (PID:6718): Media Recovery Log /u03/oradata/fra/TOOLCDX1_SITE2/archivelog/2020_07_07/o1_mf_1_40_hj8d9zd7_.arc

Recovery created pluggable database LATERALUS

2020-07-07T10:24:36.000250+02:00

Recovery copied files for tablespace SYSTEM

Recovery successfully copied file /u02/oradata/TOOLCDX1/data/LATERALUS/system01.dbf from /u02/oradata/TOOLCDX1/data/pdbseed/system01.dbf

LATERALUS(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/system01.dbf

LATERALUS(4):Successfully added datafile 16 to media recovery

LATERALUS(4):Datafile #16: '/u02/oradata/TOOLCDX1/data/LATERALUS/system01.dbf'

2020-07-07T10:24:40.657596+02:00

Recovery copied files for tablespace SYSAUX

Recovery successfully copied file /u02/oradata/TOOLCDX1/data/LATERALUS/sysaux01.dbf from /u02/oradata/TOOLCDX1/data/pdbseed/sysaux01.dbf

LATERALUS(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/sysaux01.dbf

LATERALUS(4):Successfully added datafile 17 to media recovery

LATERALUS(4):Datafile #17: '/u02/oradata/TOOLCDX1/data/LATERALUS/sysaux01.dbf'

2020-07-07T10:24:47.688298+02:00

Recovery copied files for tablespace UNDOTBS1

Recovery successfully copied file /u02/oradata/TOOLCDX1/data/LATERALUS/undotbs01.dbf from /u02/oradata/TOOLCDX1/data/pdbseed/undotbs01.dbf

LATERALUS(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/undotbs01.dbf

LATERALUS(4):Successfully added datafile 18 to media recovery

LATERALUS(4):Datafile #18: '/u02/oradata/TOOLCDX1/data/LATERALUS/undotbs01.dbf'

(4):Recovery created file /u02/oradata/TOOLCDX1/data/LATERALUS/USERS01.dbf

(4):Successfully added datafile 19 to media recovery

(4):Datafile #19: '/u02/oradata/TOOLCDX1/data/LATERALUS/USERS01.dbf'

2020-07-07T10:24:48.924510+02:00

PR00 (PID:6718): Media Recovery Waiting for T-1.S-41

So, yeah, not having OMF might get you some warnings like: WARNING: File being created with same name as in Primary
But it is good to know that the cascade standby deals well with new PDBs.

Of course, this is not of big interest as I know that the problem with Multitenant comes from CLONING PDBs from either local or remote PDBs in read-write mode.

So let’s try a relocate from another CDB:

 CREATE PLUGGABLE DATABASE PNEUMA FROM PNEUMA@LUDOCDB1_PNEUMA_tempclone
         RELOCATE AVAILABILITY NORMAL
         file_name_convert=('/LUDOCDB1/data/PNEUMA/','/TOOLCDB1/data/PNEUMA/')
         PARALLEL 2;

Pluggable database created.

SQL>         ALTER PLUGGABLE DATABASE PNEUMA OPEN;

Pluggable database altered.

SQL>         ALTER PLUGGABLE DATABASE PNEUMA SAVE STATE;

Pluggable database altered.

CREATE PLUGGABLE DATABASE PNEUMA FROM PNEUMA@LUDOCDB1_PNEUMA_tempclone

RELOCATE AVAILABILITY NORMAL

file_name_convert=('/LUDOCDB1/data/PNEUMA/','/TOOLCDB1/data/PNEUMA/')

PARALLEL 2;

Pluggable database created.

SQL> ALTER PLUGGABLE DATABASE PNEUMA OPEN;

Pluggable database altered.

SQL> ALTER PLUGGABLE DATABASE PNEUMA SAVE STATE;

Pluggable database altered.

This is what I get on the first standby:

2020-07-07T12:03:02.364271+02:00
Recovery created pluggable database PNEUMA
PNEUMA(5):Tablespace-SYSTEM during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #20 added to control file as 'UNNAMED00020'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.
PNEUMA(5):Tablespace-SYSAUX during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #21 added to control file as 'UNNAMED00021'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.
PNEUMA(5):Tablespace-UNDOTBS1 during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #22 added to control file as 'UNNAMED00022'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.
PNEUMA(5):Tablespace-TEMP during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):Tablespace-USERS during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #23 added to control file as 'UNNAMED00023'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.

2020-07-07T12:03:02.364271+02:00

Recovery created pluggable database PNEUMA

PNEUMA(5):Tablespace-SYSTEM during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #20 added to control file as 'UNNAMED00020'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

PNEUMA(5):Tablespace-SYSAUX during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #21 added to control file as 'UNNAMED00021'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

PNEUMA(5):Tablespace-UNDOTBS1 during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #22 added to control file as 'UNNAMED00022'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

PNEUMA(5):Tablespace-TEMP during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):Tablespace-USERS during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #23 added to control file as 'UNNAMED00023'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

and this is on the cascaded standby:

2020-07-07T12:03:02.368014+02:00
Recovery created pluggable database PNEUMA
PNEUMA(5):Tablespace-SYSTEM during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #20 added to control file as 'UNNAMED00020'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.
PNEUMA(5):Tablespace-SYSAUX during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #21 added to control file as 'UNNAMED00021'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.
PNEUMA(5):Tablespace-UNDOTBS1 during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #22 added to control file as 'UNNAMED00022'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.
PNEUMA(5):Tablespace-TEMP during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):Tablespace-USERS during PDB create skipped since source is in            r/w mode or this is a refresh clone
PNEUMA(5):File #23 added to control file as 'UNNAMED00023'. Originally created as:
PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf'
PNEUMA(5):because the pluggable database was created with nostandby
PNEUMA(5):or the tablespace belonging to the pluggable database is
PNEUMA(5):offline.

2020-07-07T12:03:02.368014+02:00

Recovery created pluggable database PNEUMA

PNEUMA(5):Tablespace-SYSTEM during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #20 added to control file as 'UNNAMED00020'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

PNEUMA(5):Tablespace-SYSAUX during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #21 added to control file as 'UNNAMED00021'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

PNEUMA(5):Tablespace-UNDOTBS1 during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #22 added to control file as 'UNNAMED00022'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

PNEUMA(5):Tablespace-TEMP during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):Tablespace-USERS during PDB create skipped since source is in r/w mode or this is a refresh clone

PNEUMA(5):File #23 added to control file as 'UNNAMED00023'. Originally created as:

PNEUMA(5):'/u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf'

PNEUMA(5):because the pluggable database was created with nostandby

PNEUMA(5):or the tablespace belonging to the pluggable database is

PNEUMA(5):offline.

So absolutely the same behavior between the two levels of standby.
According to the documentation: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/CREATE-PLUGGABLE-DATABASE.html#GUID-F2DBA8DD-EEA8-4BB7-A07F-78DC04DB1FFC
I quote what is specified for the parameter STANDBYS={ALL|NONE|…}:
“If you include a PDB in a standby CDB, then during standby recovery the standby CDB will search for the data files for the PDB. If the data files are not found, then standby recovery will stop and you must copy the data files to the correct location before you can restart recovery.”

“Specify ALL to include the new PDB in all standby CDBs. This is the default.”

“Specify NONE to exclude the new PDB from all standby CDBs. When a PDB is excluded from all standby CDBs, the PDB’s data files are unnamed and marked offline on all of the standby CDBs. Standby recovery will not stop if the data files for the PDB are not found on the standby. […]”

So, in order to avoid the MRP to crash, I should have included STANDBYS=NONE
But the documentation is not up to date, because in my case the PDB is skipped automatically and the recovery process DOES NOT STOP:

SQL> r
  1* select process, status, sequence#, client_process from v$managed_standby

PROCESS   STATUS        SEQUENCE# CLIENT_P
--------- ------------ ---------- --------
ARCH      CONNECTED             0 ARCH
DGRD      ALLOCATED             0 N/A
DGRD      ALLOCATED             0 N/A
ARCH      CLOSING              43 ARCH
ARCH      CLOSING              40 ARCH
ARCH      CLOSING              42 ARCH
RFS       IDLE                  0 Archival
RFS       IDLE                  0 UNKNOWN
RFS       IDLE                 44 LGWR
RFS       IDLE                  0 UNKNOWN
MRP0      APPLYING_LOG         44 N/A
LNS       WRITING              44 LNS
DGRD      ALLOCATED             0 N/A

13 rows selected.

SQL> r

1* select process, status, sequence#, client_process from v$managed_standby

PROCESS STATUS SEQUENCE# CLIENT_P

--------- ------------ ---------- --------

ARCH CONNECTED 0 ARCH

DGRD ALLOCATED 0 N/A

ARCH CLOSING 43 ARCH

ARCH CLOSING 40 ARCH

ARCH CLOSING 42 ARCH

RFS IDLE 0 Archival

RFS IDLE 0 UNKNOWN

RFS IDLE 44 LGWR

RFS IDLE 0 UNKNOWN

MRP0 APPLYING_LOG 44 N/A

LNS WRITING 44 LNS

DGRD ALLOCATED 0 N/A

13 rows selected.

However, the recovery is marked ENABLED for the PDB on the standby, while usind STANDBYS=NONE it would have been DISABLED.

  1* select name, recovery_status from v$pdbs

NAME                           RECOVERY
------------------------------ --------
PDB$SEED                       ENABLED
LATERALUS                      ENABLED
PNEUMA                         ENABLED

1* select name, recovery_status from v$pdbs

NAME RECOVERY

------------------------------ --------

PDB$SEED ENABLED

LATERALUS ENABLED

PNEUMA ENABLED

So, another difference with the doc who states:
“You can enable a PDB on a standby CDB after it was excluded on that standby CDB by copying the data files to the correct location, bringing the PDB online, and marking it as enabled for recovery.”

This reflects the findings of Philippe Fierens in his blog (http://pfierens.blogspot.com/2020/04/19c-data-guard-series-part-iii-adding.html).

This behavior has been introduced probably between 12.2 and 19c, but I could not manage to find exactly when, as it is not explicitly stated in the documentation.
However, I remember well that in 12.1.0.2, the MRP process was crashing.

In my configuration, not on purpose, but interesting for this article, the first standby has the very same directory structure, while the cascaded standby has not.

In any case, there is a potentially big problem for all the customers implementing Multitenant on Data Guard:

With the old behaviour (MRP crashing), it was easy to spot when a PDB was cloned online into a primary database, because a simple dgmgrl “show configuration” whould have displayed a warning because of the increasing lag (following the MRP crash).

With the current behavior, the MRP keeps recovering and the “show configuration” displays “SUCCESS” despite there is a PDB not copied on the standby (thus not protected).

Indeed, this is what I get after the clone:

DGMGRL> show configuration;

Configuration - toolcdb1

  Protection Mode: MaxPerformance
  Members:
  toolcdb1_site1 - Primary database
    toolcdb1_site2 - Physical standby database
      toolcdx1_site2 - Physical standby database (receiving current redo)

Fast-Start Failover:  Disabled

Configuration Status:
SUCCESS   (status updated 21 seconds ago)

DGMGRL> show database  toolcdb1_site2;

Database - toolcdb1_site2

  Role:               PHYSICAL STANDBY
  Intended State:     APPLY-ON
  Transport Lag:      0 seconds (computed 1 second ago)
  Apply Lag:          0 seconds (computed 1 second ago)
  Average Apply Rate: 8.00 KByte/s
  Real Time Query:    ON
  Instance(s):
    TOOLCDB1

Database Status:
SUCCESS

DGMGRL> show configuration;

Configuration - toolcdb1

Protection Mode: MaxPerformance

Members:

toolcdb1_site1 - Primary database

toolcdb1_site2 - Physical standby database

toolcdx1_site2 - Physical standby database (receiving current redo)

Fast-Start Failover: Disabled

Configuration Status:

SUCCESS (status updated 21 seconds ago)

DGMGRL> show database toolcdb1_site2;

Database - toolcdb1_site2

Role: PHYSICAL STANDBY

Intended State: APPLY-ON

Transport Lag: 0 seconds (computed 1 second ago)

Apply Lag: 0 seconds (computed 1 second ago)

Average Apply Rate: 8.00 KByte/s

Real Time Query: ON

Instance(s):

TOOLCDB1

Database Status:

SUCCESS

I can see that the Data Guard Broker is completely silent about the missing PDB. So I might think my PDB is protected while it is not!

I actually have to add a check on the standby DBs to check if I have any missing datafiles:

1* select con_id, name, status from v$datafile where status not in ('SYSTEM','ONLINE');

    CON_ID NAME                                                  STATUS
---------- ----------------------------------------------------- -------
         5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00020    SYSOFF
         5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00021    RECOVER
         5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00022    RECOVER
         5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00023    RECOVER

Although this first query seems OK to get the missing datafiles, actually the next one is the correct one to use:

SQL> select * from v$recover_file where online_status='OFFLINE';

     FILE# ONLINE  ONLINE_ ERROR               CHANGE# TIME             CON_ID
---------- ------- ------- ---------------- ---------- ------------ ----------
        20 OFFLINE OFFLINE FILE MISSING              0                       5
        21 OFFLINE OFFLINE FILE MISSING              0                       5
        22 OFFLINE OFFLINE FILE MISSING              0                       5
        23 OFFLINE OFFLINE FILE MISSING              0                       5

1* select con_id, name, status from v$datafile where status not in ('SYSTEM','ONLINE');

CON_ID NAME STATUS

---------- ----------------------------------------------------- -------

5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00020 SYSOFF

5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00021 RECOVER

5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00022 RECOVER

5 /u01/app/oracle/product/db_19_7_0/dbs/UNNAMED00023 RECOVER

Although this first query seems OK to get the missing datafiles, actually the next one is the correct one to use:

SQL> select * from v$recover_file where online_status='OFFLINE';

FILE# ONLINE ONLINE_ ERROR CHANGE# TIME CON_ID

---------- ------- ------- ---------------- ---------- ------------ ----------

20 OFFLINE OFFLINE FILE MISSING 0 5

21 OFFLINE OFFLINE FILE MISSING 0 5

22 OFFLINE OFFLINE FILE MISSING 0 5

23 OFFLINE OFFLINE FILE MISSING 0 5

This check should be implemented and put under monitoring (custom metrics in OEM?)

SQL> select 'ERROR: CON_ID '||con_id||' has '||count(*)||' datafiles offline!' from v$recover_file where online_status='OFFLINE' group by con_id;

'ERROR:CON_ID'||CON_ID||'HAS'||COUNT(*)||'DATAFILESOFFLINE!'
--------------------------------------------------------------------------------
ERROR: CON_ID 5 has 4 datafiles offline!

SQL> select 'ERROR: CON_ID '||con_id||' has '||count(*)||' datafiles offline!' from v$recover_file where online_status='OFFLINE' group by con_id;

'ERROR:CON_ID'||CON_ID||'HAS'||COUNT(*)||'DATAFILESOFFLINE!'

--------------------------------------------------------------------------------

ERROR: CON_ID 5 has 4 datafiles offline!

The missing PDB is easy to spot once I know that I have to do it. However, for each PDB to recover (I might have many!), I have to prepare the rename of datafiles and creation of directory (do not forget I am using non-OMF here).

Now, the datafile names on the standby got changed to …/UNNAMEDnnnnn.

So I have to get the original ones from the primary database and do the same replace that db_file_name_convert would do:

set trim on
col rename_file for a300
set lines 400
select 'set newname for datafile '||file#||' to '''||replace(name,'/TOOLCDB1/','/TOOLCDX1/')||''';' as rename_file  from v$datafile where con_id=6;

set trim on

col rename_file for a300

set lines 400

select 'set newname for datafile '||file#||' to '''||replace(name,'/TOOLCDB1/','/TOOLCDX1/')||''';' as rename_file from v$datafile where con_id=6;

and put this in a rman script (this will be for the second standby, the first has the same name so same PATH):

run {
set newname for datafile 20 to '/u02/oradata/TOOLCDX1/data/PNEUMA/system01.dbf';
set newname for datafile 21 to '/u02/oradata/TOOLCDX1/data/PNEUMA/sysaux01.dbf';
set newname for datafile 22 to '/u02/oradata/TOOLCDX1/data/PNEUMA/undotbs01.dbf';
set newname for datafile 23 to '/u02/oradata/TOOLCDX1/data/PNEUMA/USERS01.dbf';
restore pluggable database PNEUMA from service 'newbox01:1521/TOOLCDB1_SITE1_DGMGRL' ;
}
switch pluggable database PNEUMA to copy;

executing command: SET NEWNAME

executing command: SET NEWNAME

executing command: SET NEWNAME

executing command: SET NEWNAME

Starting restore at 07-JUL-2020 14:19:22
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: SID=1530 device type=DISK

channel ORA_DISK_1: starting datafile backup set restore
channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL
channel ORA_DISK_1: specifying datafile(s) to restore from backup set
channel ORA_DISK_1: restoring datafile 00020 to /u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf
channel ORA_DISK_1: restore complete, elapsed time: 00:00:03
channel ORA_DISK_1: starting datafile backup set restore
channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL
channel ORA_DISK_1: specifying datafile(s) to restore from backup set
channel ORA_DISK_1: restoring datafile 00021 to /u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf
channel ORA_DISK_1: restore complete, elapsed time: 00:00:07
channel ORA_DISK_1: starting datafile backup set restore
channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL
channel ORA_DISK_1: specifying datafile(s) to restore from backup set
channel ORA_DISK_1: restoring datafile 00022 to /u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf
channel ORA_DISK_1: restore complete, elapsed time: 00:00:03
channel ORA_DISK_1: starting datafile backup set restore
channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL
channel ORA_DISK_1: specifying datafile(s) to restore from backup set
channel ORA_DISK_1: restoring datafile 00023 to /u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf
channel ORA_DISK_1: restore complete, elapsed time: 00:00:07
Finished restore at 07-JUL-2020 14:19:43

datafile 20 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf"
datafile 21 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf"
datafile 22 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf"
datafile 23 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf"

run {

set newname for datafile 20 to '/u02/oradata/TOOLCDX1/data/PNEUMA/system01.dbf';

set newname for datafile 21 to '/u02/oradata/TOOLCDX1/data/PNEUMA/sysaux01.dbf';

set newname for datafile 22 to '/u02/oradata/TOOLCDX1/data/PNEUMA/undotbs01.dbf';

set newname for datafile 23 to '/u02/oradata/TOOLCDX1/data/PNEUMA/USERS01.dbf';

restore pluggable database PNEUMA from service 'newbox01:1521/TOOLCDB1_SITE1_DGMGRL' ;

}

switch pluggable database PNEUMA to copy;

executing command: SET NEWNAME

Starting restore at 07-JUL-2020 14:19:22

using target database control file instead of recovery catalog

allocated channel: ORA_DISK_1

channel ORA_DISK_1: SID=1530 device type=DISK

channel ORA_DISK_1: starting datafile backup set restore

channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL

channel ORA_DISK_1: specifying datafile(s) to restore from backup set

channel ORA_DISK_1: restoring datafile 00020 to /u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf

channel ORA_DISK_1: restore complete, elapsed time: 00:00:03

channel ORA_DISK_1: starting datafile backup set restore

channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL

channel ORA_DISK_1: specifying datafile(s) to restore from backup set

channel ORA_DISK_1: restoring datafile 00021 to /u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf

channel ORA_DISK_1: restore complete, elapsed time: 00:00:07

channel ORA_DISK_1: starting datafile backup set restore

channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL

channel ORA_DISK_1: specifying datafile(s) to restore from backup set

channel ORA_DISK_1: restoring datafile 00022 to /u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf

channel ORA_DISK_1: restore complete, elapsed time: 00:00:03

channel ORA_DISK_1: starting datafile backup set restore

channel ORA_DISK_1: using network backup set from service newbox01:1521/TOOLCDB1_SITE1_DGMGRL

channel ORA_DISK_1: specifying datafile(s) to restore from backup set

channel ORA_DISK_1: restoring datafile 00023 to /u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf

channel ORA_DISK_1: restore complete, elapsed time: 00:00:07

Finished restore at 07-JUL-2020 14:19:43

datafile 20 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/system01.dbf"

datafile 21 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/sysaux01.dbf"

datafile 22 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/undotbs01.dbf"

datafile 23 switched to datafile copy "/u02/oradata/TOOLCDB1/data/PNEUMA/USERS01.dbf"

Then, I need to stop the recovery, start it and stopping again, put the datafiles online and finally restart the recover.
These are the same steps used my Philippe in his blog post, just adapted to my taste 🙂

DGMGRL> edit database "TOOLCDB1_SITE2" set state='APPLY-OFF';

1	DGMGRL> edit database "TOOLCDB1_SITE2" set state='APPLY-OFF';

For the second part, I use this HEREDOC to online all offline datafiles:

$ sqlplus / as sysdba <<EOF
RECOVER STANDBY DATABASE UNTIL CANCEL;
CANCEL
ALTER SESSION SET CONTAINER=PNEUMA;
DECLARE
        CURSOR c_fileids IS
                SELECT  file#  FROM v\$recover_file where online_STATUS='OFFLINE';

		r_fileid c_fileids%ROWTYPE;
BEGIN
        OPEN c_fileids;
        LOOP
                FETCH  c_fileids  INTO r_fileid;
                EXIT WHEN c_fileids%NOTFOUND;
                BEGIN
					EXECUTE IMMEDIATE 'ALTER DATABASE DATAFILE '||to_char(r_fileid.file#)||' ONLINE';
                END;
        END LOOP;
END;
/
exit
EOF

$ sqlplus / as sysdba <<EOF

RECOVER STANDBY DATABASE UNTIL CANCEL;

CANCEL

ALTER SESSION SET CONTAINER=PNEUMA;

DECLARE

CURSOR c_fileids IS

SELECT file# FROM v\$recover_file where online_STATUS='OFFLINE';

r_fileid c_fileids%ROWTYPE;

BEGIN

OPEN c_fileids;

LOOP

FETCH c_fileids INTO r_fileid;

EXIT WHEN c_fileids%NOTFOUND;

BEGIN

EXECUTE IMMEDIATE 'ALTER DATABASE DATAFILE '||to_char(r_fileid.file#)||' ONLINE';

END;

END LOOP;

END;

exit

EOF

and finally:

DGMGRL> edit database "TOOLCDB1_SITE2" set state='APPLY-ON';

1	DGMGRL> edit database "TOOLCDB1_SITE2" set state='APPLY-ON';

Now, I do not have anymore any datafiles offline on the standby:

SQL> select 'ERROR: CON_ID '||con_id||' has '||count(*)||' datafiles offline!' from v$recover_file where online_status='OFFLINE' group by con_id;

no rows selected

SQL> select 'ERROR: CON_ID '||con_id||' has '||count(*)||' datafiles offline!' from v$recover_file where online_status='OFFLINE' group by con_id;

no rows selected

I will not publish the steps for the second standby, they are exactly the same (same output as well).

At the end, for me it is important to highlight that monitoring the OFFLINE datafiles on the standby becomes a crucial point to guarantee the health of Data Guard in Multitenant. Relying on the Broker status or “PDB recovery disabled” is not enough.

On the bright side, it is nice to see that Cascade Standby configurations do not introduce any variation, so cascaded standbys can be threated the same as “direct” standby databases.

HTH

—

Ludovico