DBA survival BLOG

DBA stuff and Oracle Data Guard

Where are Extended Data Types stored? (12c)

Posted on May 23, 2014 by Ludovico

Oracle has introduced Extended Data Types in 12c. If you still don’t know what they are and how enable them, refer to the awesome oracle-base blog or the official documentation.

Here, I’ll show where they are stored, and when.

Tables created with extended varchar2 (length >4000)

If the table is created with a varchar2 length above 4000, a LOBSEGMENT and a LOBINDEX segments are also created automatically. But actually, the rows inserted may be short enough to be stored inline.

SQL>  create table ludovico.t32k2 (a number, b varchar2(20000));

Table created.

SQL>  DECLARE
  j number;
BEGIN
  FOR j IN 1..1000
  LOOP
    INSERT INTO ludovico.t32k2 values (j, RPAD('LONGSTRING',100,'*') );
  END LOOP;
END;
/   2    3    4    5    6    7    8    9

PL/SQL procedure successfully completed.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME                   SEGMENT_TYPE            BYTES     BLOCKS
------------------------------ ------------------ ---------- ----------
T32K2                          TABLE                  196608         24
SYS_IL0000096543C00002$$       LOBINDEX                65536          8
SYS_LOB0000096543C00002$$      LOBSEGMENT             131072         16

SQL> create table ludovico.t32k2 (a number, b varchar2(20000));

Table created.

SQL> DECLARE

j number;

BEGIN

FOR j IN 1..1000

LOOP

INSERT INTO ludovico.t32k2 values (j, RPAD('LONGSTRING',100,'*') );

END LOOP;

END;

/ 2 3 4 5 6 7 8 9

PL/SQL procedure successfully completed.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

------------------------------ ------------------ ---------- ----------

T32K2 TABLE 196608 24

SYS_IL0000096543C00002$$ LOBINDEX 65536 8

SYS_LOB0000096543C00002$$ LOBSEGMENT 131072 16

As you can see from the previous output, the LOB segments are almost empty after 1000 rows inserted, whereas the table has 24 blocks. Not enough to be a proof. Let’s try to update the rows with a column size of 1000:

SQL> update ludovico.t32k2 set b=RPAD('LONGSTRING',1000,'*');

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME                   SEGMENT_TYPE            BYTES     BLOCKS
------------------------------ ------------------ ---------- ----------
T32K2                          TABLE                 2097152        256
SYS_IL0000096537C00002$$       LOBINDEX                65536          8
SYS_LOB0000096537C00002$$      LOBSEGMENT             131072         16

SQL> update ludovico.t32k2 set b=RPAD('LONGSTRING',1000,'*');

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

------------------------------ ------------------ ---------- ----------

T32K2 TABLE 2097152 256

SYS_IL0000096537C00002$$ LOBINDEX 65536 8

SYS_LOB0000096537C00002$$ LOBSEGMENT 131072 16

A-ah! The row size is increasing but the blocks are actually allocated in the table segment, this prove that the rows are stored inline!

Where’s the break point? When the rows will be migrated to the out-of-line storage?

SQL>  update ludovico.t32k2 set b=RPAD('LONGSTRING',3950,'*');

1000 rows updated.

SQL>  commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME                  SEGMENT_TYPE            BYTES     BLOCKS
----------------------------- ------------------ ---------- ----------
SYS_LOB0000096549C00002$$     LOBSEGMENT             131072         16
SYS_IL0000096549C00002$$      LOBINDEX                65536          8
T32K2                         TABLE                 8388608       1024

SQL>  update ludovico.t32k2 set b=RPAD('LONGSTRING',3960,'*');

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME                  SEGMENT_TYPE            BYTES     BLOCKS
----------------------------- ------------------ ---------- ----------
SYS_LOB0000096549C00002$$     LOBSEGMENT             131072         16
SYS_IL0000096549C00002$$      LOBINDEX                65536          8
T32K2                         TABLE                 8388608       1024

SQL>  update ludovico.t32k2 set b=RPAD('LONGSTRING',3970,'*');

1000 rows updated.

SQL>  commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME                 SEGMENT_TYPE            BYTES     BLOCKS
---------------------------- ------------------ ---------- ----------
SYS_LOB0000096549C00002$$    LOBSEGMENT           10682368       1304 <<<<<<<<< HERE!
SYS_IL0000096549C00002$$     LOBINDEX                65536          8
T32K2                        TABLE                 8388608       1024

SQL>

SQL> update ludovico.t32k2 set b=RPAD('LONGSTRING',3950,'*');

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

----------------------------- ------------------ ---------- ----------

SYS_LOB0000096549C00002$$ LOBSEGMENT 131072 16

SYS_IL0000096549C00002$$ LOBINDEX 65536 8

T32K2 TABLE 8388608 1024

SQL> update ludovico.t32k2 set b=RPAD('LONGSTRING',3960,'*');

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

----------------------------- ------------------ ---------- ----------

SYS_LOB0000096549C00002$$ LOBSEGMENT 131072 16

SYS_IL0000096549C00002$$ LOBINDEX 65536 8

T32K2 TABLE 8388608 1024

SQL> update ludovico.t32k2 set b=RPAD('LONGSTRING',3970,'*');

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

---------------------------- ------------------ ---------- ----------

SYS_LOB0000096549C00002$$ LOBSEGMENT 10682368 1304 <<<<<<<<< HERE!

SYS_IL0000096549C00002$$ LOBINDEX 65536 8

T32K2 TABLE 8388608 1024

SQL>

Actually, it’s somewhere between 3960 and 3970 bytes, but it may depend on other factors (I havn’t tested it deeply).

Lesson learned: when you design your table with extended types, you should plan how many rows you expect with a size below 4000, because they can make your row access slower and chained rows higher than expected.

Tables created with standard varchar2 and altered afterward

Let’s try almost the same excercise, but this time starting with a table created with a small length (lower than 4000) and altered afterward to make use of the extended size.

 SQL> create table ludovico.t32k (a number, b varchar2(500));

Table created.

SQL>

SQL> DECLARE  
2  j number;  
3 BEGIN  
4   FOR j IN 1..1000  
5  LOOP  
6    INSERT INTO ludovico.t32k values (j, RPAD('LONGSTRING',100,'*') );  
7  END LOOP;  
8 END;  
9 /  
  
PL/SQL procedure successfully completed.  
  
SQL> select count(*) from ludovico.t32k;  
  
  COUNT(*)  
----------  
      1000  

SQL>  exec dbms_stats.gather_table_stats('LUDOVICO','T32K');  
  
PL/SQL procedure successfully completed.  

SQL> select TABLE_NAME, OBJECT_TYPE, NUM_ROWS, BLOCKS, 
2 EMPTY_BLOCKS, AVG_ROW_LEN from dba_tab_statistics 
3 where owner='LUDOVICO';

TABLE_NAME OBJECT_TYPE    NUM_ROWS     BLOCKS EMPTY_BLOCKS AVG_ROW_LEN
---------- ------------ ---------- ---------- ------------ -----------
T32K       TABLE              1000         24            0         105

SQL> create table ludovico.t32k (a number, b varchar2(500));

Table created.

SQL>

SQL> DECLARE

2 j number;

3 BEGIN

4 FOR j IN 1..1000

5 LOOP

6 INSERT INTO ludovico.t32k values (j, RPAD('LONGSTRING',100,'*') );

7 END LOOP;

8 END;

9 /

PL/SQL procedure successfully completed.

SQL> select count(*) from ludovico.t32k;

COUNT(*)

----------

1000

SQL> exec dbms_stats.gather_table_stats('LUDOVICO','T32K');

PL/SQL procedure successfully completed.

SQL> select TABLE_NAME, OBJECT_TYPE, NUM_ROWS, BLOCKS,

2 EMPTY_BLOCKS, AVG_ROW_LEN from dba_tab_statistics

3 where owner='LUDOVICO';

TABLE_NAME OBJECT_TYPE NUM_ROWS BLOCKS EMPTY_BLOCKS AVG_ROW_LEN

---------- ------------ ---------- ---------- ------------ -----------

T32K TABLE 1000 24 0 105

So far, so good. Let’s try to extend the column:

SQL>  alter table ludovico.t32k  modify (b varchar2(20000));

Table altered.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME    SEGMENT_TYPE            BYTES     BLOCKS
--------------- ------------------ ---------- ----------
T32K            TABLE                  196608         24

SQL> alter table ludovico.t32k modify (b varchar2(20000));

Table altered.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

--------------- ------------------ ---------- ----------

T32K TABLE 196608 24

The LOB segments have not been created. Meaning that the rows are still stored inline. Let’s try then to update all the rows to a size above 4000:

SQL> update ludovico.t32k set b=RPAD('LONGSTRING',15000 ,'*') ;

1000 rows updated.

SQL> commit;

Commit complete.

SQL>  select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME    SEGMENT_TYPE            BYTES     BLOCKS
--------------- ------------------ ---------- ----------
T32K            TABLE                16777216       2048

SQL> update ludovico.t32k set b=RPAD('LONGSTRING',15000 ,'*') ;

1000 rows updated.

SQL> commit;

Commit complete.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

--------------- ------------------ ---------- ----------

T32K TABLE 16777216 2048

The blocks have grown in the table segment, the rows are still inline!!

SQL> analyze table ludovico.t32k list chained rows into chained_rows;  
  
Table analyzed.  
  
SQL> select count(*) from chained_rows;  
  
  COUNT(*)  
----------  
      1000

SQL> analyze table ludovico.t32k list chained rows into chained_rows;

Table analyzed.

SQL> select count(*) from chained_rows;

COUNT(*)

----------

1000

This lead to a huge amount of chained rows!! In this example, 2 blocks per row, but it can be as high as 5 blocks for a single column varchar2(32767) with db_block_size=8192.

So we need to rebuild the table, right?

SQL> alter table  ludovico.t32k move;

Table altered.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME    SEGMENT_TYPE            BYTES     BLOCKS
--------------- ------------------ ---------- ----------
T32K            TABLE                17825792       2176

SQL> alter table ludovico.t32k move;

Table altered.

SQL> select segment_name, segment_type, bytes, blocks from dba_segments where owner='LUDOVICO';

SEGMENT_NAME SEGMENT_TYPE BYTES BLOCKS

--------------- ------------------ ---------- ----------

T32K TABLE 17825792 2176

WRONG! Even after a rebuild the extended varchars are stored INLINE if they have been created as standard varchars. Actually you need to recreate the table and migrate with dbms_redefinition.

Migrating to extended datatypes instead of converting your application to use secure files can be a disaster for the physical layout of your tables, thus for the performance of your application. Be careful!

Update:

the limit is actually 3964 and it’s an internal mechanism of lobs since longtime: http://www.juliandyke.com/Presentations/LOBInternals.ppt
there is a hidden parameter: _scalar_type_lob_storage_threshold that controls the threshold of the varchar size specified in the definition before creating a segment for the lobs (default 4000). see this post: http://orasql.org/2013/07/13/oracle-12c-extended-varchars/
using extended datatypes instead of clobs may save tons of roundtrips when retrieving many rows: http://www.dbi-services.com/index.php/blog/entry/12c-extended-datatypes-better-than-clob

Many thanks to Sayan Malakshinov (@xtner) and Franck Pachot (@FranckPachot) for the information 🙂

Oracle Instances and real memory consumption on Linux and Solaris

Posted on December 13, 2013 by Ludovico

There’s a way to know the REAL memory usage by Oracle Instance, including all connecting processes and using the shell rather than a connection to oracle?

The short answer is “I think so” 🙂

Summing up RSS column from ps output, is not reliable because Linux uses a copy-on-write on process forks and also doesn’t take into account correctly the shared memory and other shared allocations.

I’ve come across this post on Pythian’s Blog from Marc Billette.

While it seems good I’ve had discording results depending on platform and release.

Instead, I’ve tried to create a shell snippet that always uses pmap but works differently and SEEMS to work correctly on Linux ans Solaris.

Basically, using the pmap script I get a lot of information about the different memory areas allocated to the process:

21010:  ora_d000_db1p

0000000000400000     208908K r-x--  /ccv/app/oracle/product/11.2.0.3/bin/oracle

000000000D012000       1536K rw---  /ccv/app/oracle/product/11.2.0.3/bin/oracle

000000000D192000       1040K rw---    [ heap ]

0000000060000000      12288K rwxs-    [ dism shmid=0x4300000e ]

0000000080000000    1036288K rwxs-    [ dism shmid=0x7600000f ]

00000000C0000000         12K rwxs-    [ dism shmid=0x4f000011 ]

FFFFFD7FFC7A0000         64K rwx--    [ anon ]

FFFFFD7FFC7BD000        704K rw---    [ anon ]

FFFFFD7FFC86E000        200K rw---    [ anon ]

FFFFFD7FFC8A0000        312K rw---    [ anon ]

FFFFFD7FFC8EF000       1280K rw---    [ anon ]

FFFFFD7FFCA30000         64K rwx--    [ anon ]

FFFFFD7FFCA4F000        256K rw---    [ anon ]

FFFFFD7FFCA90000         64K rwx--    [ anon ]

FFFFFD7FFCAB0000         36K r-x--  /lib/amd64/libuutil.so.1
...

21010: ora_d000_db1p

0000000000400000 208908K r-x-- /ccv/app/oracle/product/11.2.0.3/bin/oracle

000000000D012000 1536K rw--- /ccv/app/oracle/product/11.2.0.3/bin/oracle

000000000D192000 1040K rw--- [ heap ]

0000000060000000 12288K rwxs- [ dism shmid=0x4300000e ]

0000000080000000 1036288K rwxs- [ dism shmid=0x7600000f ]

00000000C0000000 12K rwxs- [ dism shmid=0x4f000011 ]

FFFFFD7FFC7A0000 64K rwx-- [ anon ]

FFFFFD7FFC7BD000 704K rw--- [ anon ]

FFFFFD7FFC86E000 200K rw--- [ anon ]

FFFFFD7FFC8A0000 312K rw--- [ anon ]

FFFFFD7FFC8EF000 1280K rw--- [ anon ]

FFFFFD7FFCA30000 64K rwx-- [ anon ]

FFFFFD7FFCA4F000 256K rw--- [ anon ]

FFFFFD7FFCA90000 64K rwx-- [ anon ]

FFFFFD7FFCAB0000 36K r-x-- /lib/amd64/libuutil.so.1

...

Initially I’ve tried to decode correctly the different kinds of memory the same way other scripts I’ve found online do:

rwxs- = shared memory

rw--- = private heap

rwx-- = private code stack

r-x-- = shared code stack (?)

etc...

rwxs- = shared memory

rw--- = private heap

rwx-- = private code stack

r-x-- = shared code stack (?)

etc...

but finally the ADDRESS is the same from different processes when the memory area is shared, so my script now just get a unique line for each address and sums up the memory size (not the rss one!):

username=`whoami`

sids=`ps -eaf | grep "^$username" | grep pmon | grep -v " grep "  | awk '{print substr($NF,10)}'`


total=0
for sid in $sids ; do
        pids=`ps -eaf | grep "^$username" | grep -- "$sid" | grep -v " grep " | awk '{print $2}'`
        mem=`pmap $pids 2>&1 | grep "K " | sort | awk '{print $1 " " substr($2,1,length($2)-1)}' | uniq | awk ' BEGIN { sum=0 } { sum+=$2} END {print sum}' `

        echo "$sid : $mem"
        total=`expr $total + $mem`
done

echo "total :  $total"

username=`whoami`

sids=`ps -eaf | grep "^$username" | grep pmon | grep -v " grep " | awk '{print substr($NF,10)}'`

total=0

for sid in $sids ; do

pids=`ps -eaf | grep "^$username" | grep -- "$sid" | grep -v " grep " | awk '{print $2}'`

mem=`pmap $pids 2>&1 | grep "K " | sort | awk '{print $1 " " substr($2,1,length($2)-1)}' | uniq | awk ' BEGIN { sum=0 } { sum+=$2} END {print sum}' `

echo "$sid : $mem"

total=`expr $total + $mem`

done

echo "total : $total"

This should give the total virtual memory allocated by the different Oracle instances.

The results I get are plausible both on Linux and Solaris.

Example:

$ ./test_mem.ksh

db1p: 3334852

db2p: 2052048

db3p: 6765280

db4p: 2687928

db5p: 4385616

total :  19225724

$ ./test_mem.ksh

db1p: 3334852

db2p: 2052048

db3p: 6765280

db4p: 2687928

db5p: 4385616

total : 19225724

If you find any error let me know and I’ll fix the script!

—

Ludovico

A good news to start November in a good mood

Posted on November 1, 2013 by Ludovico

I’ve just noticed that my room mate at #OOW13 has been recognised as ACE Director.

It is my honour to announce that @ChrisAntognini have been finally recognised as Oracle ACE Director! Please join me to congratulate him 🙂

— Yury (@yvelik) November 1, 2013

Even if he was already a world-known performance specialist, this announcement makes me one more time proud to know him. Well deserved Chris! 🙂

Generating graphs massively from Windows Performance Counters logs

Posted on April 23, 2013 by Ludovico

Windows Performance Monitor is an invaluable tool when you don’t have external enterprise monitoring tools and you need to face performance problems, whether you have a web/application server, a mail server or a database server.

But what I don’t personally like of it is what you get in terms of graphing. If you schedule and collect a big amount of performance metrics you will likely get lost in adding/removing such metrics from the graphical interface.

What I’ve done long time ago (and I’ve done again recently after my old laptop has been stolen 🙁 ) is to prepare a PHP script that parse the resulting CSV file and generate automatically one graph for each metric that could be found.

Unfortunately, most of Windows Sysadmin between you will disagree that I’ve done this using a Linux Box. But I guess you can use my script if you install php inside cygwin. The other tool you need, is rrdtool, again I use it massively to resolve my graphing needs.

How to collect your data

Basically you need to create any Data Collector within the Performance Monitor that generates a log file. You can specify directly a CSV file (Log format: Comma separated) or generate a BLG file and convert it later (Log format: Binary). System dumps are not used, so if you use the standard Performace template, you can delete it from your collection.

Remember that the more counters you take, the more the graph generation will take. The script does not run in parallel, so it will use only one core. Generally:

(Time to complete) = (Num Counters) * (Num Samples) * (Speed factor)

1	(Time to complete) = (Num Counters) * (Num Samples) * (Speed factor)

Where (Speed factor) is depending on both the CPU speed and the disk speed because of the huge number of syncs required to update several thousands of files. I’ve tried to reduce the number of rrdupdates by queuing several update values in a single command line and I’ve noticed an important increase of performances, but I know it’s not enough.

Converting a BLG (binary) log into a CSV log

Just use the relog tool:

C:\PerfLogs\Admin\Perftest\LUDO_20130423-000002> <strong>relog "Performance Counter.blg" -f csv -o "Performance Counter.csv"</strong>

Input
----------------
File(s):
     Performance Counter.blg (Binary)

Begin:    23.4.2013 14:56:02
End:      23.4.2013 15:33:37
Samples:  452

100.00%

Output
----------------
File:     Performance Counter.csv

Begin:    23.4.2013 14:56:02
End:      23.4.2013 15:33:37
Samples:  452

The command completed successfully.

C:\PerfLogs\Admin\Perftest\LUDO_20130423-000002> <strong>relog "Performance Counter.blg" -f csv -o "Performance Counter.csv"</strong>

Input

----------------

File(s):

Performance Counter.blg (Binary)

Begin: 23.4.2013 14:56:02

End: 23.4.2013 15:33:37

Samples: 452

100.00%

Output

----------------

File: Performance Counter.csv

Begin: 23.4.2013 14:56:02

End: 23.4.2013 15:33:37

Samples: 452

The command completed successfully.

Generating the graphs

Transfer the CSV on the box where you have the php and rrdtool configured, then run:

[root@lucrac01 temp]# php process_l.php PerformanceCounter.csv

453--Creating rrd /root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.rrd

Creating rrd /root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.rrd
Creating rrd /root/temp/LUDO/IPv4/Fragmented_Datagrams_sec.rrd
Creating rrd /root/temp/LUDO/IPv4/Datagrams_sec.rrd

...

Creating rrd /root/temp/LUDO/Memory/Pages_Input_sec.rrd
Creating rrd /root/temp/LUDO/Memory/Pool_Paged_Resident_Bytes.rrd
Creating rrd /root/temp/LUDO/Memory/Write_Copies_sec.rrd

...

Creating rrd /root/temp/LUDO/PhysicalDisk_2_E__/Avg._Disk_sec_Transfer.rrd
Creating rrd /root/temp/LUDO/PhysicalDisk_1_D__/Avg._Disk_sec_Transfer.rrd
Creating rrd /root/temp/LUDO/PhysicalDisk_0_C__/Avg._Disk_sec_Transfer.rrd
----......

1.Generating Graph: /root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.png
rrdtool graph /root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.png --start "1366721762" --end "1366724017" --width 453 DEF:ds0=/root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.rrd:value:LAST:step=5 LINE1:ds0#0000FF:"IPv4\Datagrams Received Delivered/sec" VDEF:ds0max=ds0,MAXIMUM VDEF:ds0avg=ds0,AVERAGE VDEF:ds0min=ds0,MINIMUM COMMENT:" " COMMENT:" Maximum " GPRINT:ds0max:"%6.2lf" COMMENT:" Average " GPRINT:ds0avg:"%6.2lf" COMMENT:" Minimum " GPRINT:ds0min:"%6.2lf"
534x177
2.Generating Graph: /root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.png
rrdtool graph /root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.png --start "1366721762" --end "1366724017" --width 453 DEF:ds0=/root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.rrd:value:LAST:step=5 LINE1:ds0#0000FF:"IPv4\Datagrams Received Unknown Protocol" VDEF:ds0max=ds0,MAXIMUM VDEF:ds0avg=ds0,AVERAGE VDEF:ds0min=ds0,MINIMUM COMMENT:" " COMMENT:" Maximum " GPRINT:ds0max:"%6.2lf" COMMENT:" Average " GPRINT:ds0avg:"%6.2lf" COMMENT:" Minimum " GPRINT:ds0min:"%6.2lf"
534x177
...

[root@lucrac01 temp]# php process_l.php PerformanceCounter.csv

453--Creating rrd /root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.rrd

Creating rrd /root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.rrd

Creating rrd /root/temp/LUDO/IPv4/Fragmented_Datagrams_sec.rrd

Creating rrd /root/temp/LUDO/IPv4/Datagrams_sec.rrd

...

Creating rrd /root/temp/LUDO/Memory/Pages_Input_sec.rrd

Creating rrd /root/temp/LUDO/Memory/Pool_Paged_Resident_Bytes.rrd

Creating rrd /root/temp/LUDO/Memory/Write_Copies_sec.rrd

...

Creating rrd /root/temp/LUDO/PhysicalDisk_2_E__/Avg._Disk_sec_Transfer.rrd

Creating rrd /root/temp/LUDO/PhysicalDisk_1_D__/Avg._Disk_sec_Transfer.rrd

Creating rrd /root/temp/LUDO/PhysicalDisk_0_C__/Avg._Disk_sec_Transfer.rrd

----......

1.Generating Graph: /root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.png

rrdtool graph /root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.png --start "1366721762" --end "1366724017" --width 453 DEF:ds0=/root/temp/LUDO/IPv4/Datagrams_Received_Delivered_sec.rrd:value:LAST:step=5 LINE1:ds0#0000FF:"IPv4\Datagrams Received Delivered/sec" VDEF:ds0max=ds0,MAXIMUM VDEF:ds0avg=ds0,AVERAGE VDEF:ds0min=ds0,MINIMUM COMMENT:" " COMMENT:" Maximum " GPRINT:ds0max:"%6.2lf" COMMENT:" Average " GPRINT:ds0avg:"%6.2lf" COMMENT:" Minimum " GPRINT:ds0min:"%6.2lf"

534x177

2.Generating Graph: /root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.png

rrdtool graph /root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.png --start "1366721762" --end "1366724017" --width 453 DEF:ds0=/root/temp/LUDO/IPv4/Datagrams_Received_Unknown_Protocol.rrd:value:LAST:step=5 LINE1:ds0#0000FF:"IPv4\Datagrams Received Unknown Protocol" VDEF:ds0max=ds0,MAXIMUM VDEF:ds0avg=ds0,AVERAGE VDEF:ds0min=ds0,MINIMUM COMMENT:" " COMMENT:" Maximum " GPRINT:ds0max:"%6.2lf" COMMENT:" Average " GPRINT:ds0avg:"%6.2lf" COMMENT:" Minimum " GPRINT:ds0min:"%6.2lf"

534x177

...

Now it’s done!

The script generate a folder with the name of the server (LUDO in my example) and a subfolder for each class of counters (as you see in Performance Monitor).

Inside each folder you will have a PNG (and an rrd) for each metric.

Important: The RRD are generated with a single round-robin archive with a size equal to the number of samples. If you want to have the rrd to store your historical data you’ll need to modify the script. Also, the size of the graph will be the same as the number of samples (for best reading), but limited to 1000 to avoid huge images.

Future Improvements

Would be nice to have a prepared set of graphs for standard graphs with multiple metrics (e.g. CPU user, system and idle together) and additional lines like regressions…

Download the script: process_l_php.txt and rename it with a .php extension.

Hope you’ll find it useful!

Cheers

Ludo

Oracle capacity planning with RRDTOOL

Posted on May 25, 2009 by Ludovico

RRDize everything, chapter 2

Oracle Database Server has the most powerful system catalog that allows to query almost any aspect inside an oracle instance.
You can query many v$ fixed views at regular intervals and populate many RRD files through rrdtool: space usage, wait events. system statistics and so on…

Since release 10.1 Oracle has introduced Automatic Workload Repository, a finer version of old good Statspack.
No matter if you are using AWR or statspack, you can rely on their views to collect data for your RRDs.

If you are administering a new instance and you haven’t collected its statistics so far, you can query (as example) the DBA_HIST_BG_EVENT_SUMMARY view to gather all AWR data about wait events. Historical views could be useful also to collect historical data once a week rather than query the fixed views every few minutes doing the hard work twice (you and AWR).

The whole process of gathering performance data and update rrd files can be resumed into the following steps:

– connect to the database
– query the AWR’s views
– build and execute an rrdtool update command
– check if rrd file exists or create it
– update the rrd file

The less rrdtool update commands you will execute, the better the whole process will perform.
Do it in a language you are comfortable with and that supports easily connection descriptors.

Since I’m very comfortable with php, I did it this way.

This is a very basilar script that works greatly for me with good performances:

#!/usr/bin/php -f
< ?php                                         

define('WD','/opt/oracle/awr');
$cs         = $_SERVER['argv'][1];
$user       = 'mymonitoruser';
$pass       = 'mystrongpassword'; 

/* open a new connection */
$ds = oci_connect($user, $pass, $cs)
        or die ("Cannot connect to Oracle Database ".$cs."\n");

/* setting client nls environment */
$sql = "alter session set nls_timestamp_format='MM/DD/YY HH24:MI'";
$stmt = oci_parse($ds, $sql);
oci_execute($stmt);
oci_free_statement($stmt);                                         

/* create directory that will contain rrds (if not exists) */
if(!file_exists(WD.'/'.$cs))
                mkdir(WD.'/'.$cs);
if(!file_exists(WD.'/'.$cs.'/wait'))
                mkdir(WD.'/'.$cs.'/wait');                   

/* function to create new RRDs */
function createRRD($name, $interval, $cs) {
        $hb = $interval*5; //heartbeat
        $cmd="rrdtool create ".WD."/".$cs."/wait/${name}.rrd -s ".$interval." \
                -b \"now -3month\" DS:waits:DERIVE:$hb:0:U \
                DS:mswaited:DERIVE:$hb:0:U \
                RRA:AVERAGE:0.5:1:1440 RRA:AVERAGE:0.5:30:336 \
                RRA:AVERAGE:0.5:120:372 RRA:AVERAGE:0.5:720:730 \
                RRA:MIN:0.5:1:1440 RRA:MIN:0.5:30:336 \
                RRA:MIN:0.5:120:372 RRA:MIN:0.5:720:730 \
                RRA:MAX:0.5:1:1440 RRA:MAX:0.5:30:336 \
                RRA:MAX:0.5:120:372 RRA:MAX:0.5:720:730 \
                RRA:LAST:0.5:1:1440";
        //print $cmd."\n";
        return passthru($cmd);
}                                                                              

/* take the snapshot frequency from dba_hist_wr_control
 to create the RDD with correct heartbeat value */
$sql = 'select extract(hour from snap_interval)*3600 +
extract(minute from snap_interval)*60 as SEED from DBA_HIST_WR_CONTROL';
$stmt = oci_parse($ds, $sql);
oci_execute($stmt);
$row = oci_fetch_assoc($stmt);
$interval = $row['SEED'];
unset($row);
oci_free_statement($stmt);                                              

/* statement definition that will collect
 all snapshots for a certain wait event with more than
 a certain amonut of time waited.
 Gathering ALL EVENTS could be time consuming and useless.
 I fetch rows ordered by event_name rather
 then by date because I can update many values
 into the same rrd with very few rrdupdate commands
*/
$sql = 'select s.END_INTERVAL_TIME END_INTERVAL_TIME,
    g.EVENT_NAME, g.WAIT_CLASS, g.TOTAL_WAITS,
    round(g.TIME_WAITED_MICRO/1000) MS
  from DBA_HIST_SNAPSHOT s,
   dba_hist_bg_event_summary g,
   v$instance i
 where s.SNAP_ID=g.SNAP_ID and g.wait_class!=\'Idle\'
  and g.TIME_WAITED_MICRO&gt;100000
  and s.instance_number=i.instance_number
  and s.instance_number=g.instance_number
 order by 2,1';                                      

/* default prefetch size (148) matches default snapshot retention (24hx7dd) */
$stmt = oci_parse($ds, $sql);
oci_set_prefetch($stmt, 148);
oci_execute($stmt);

$i=0;
$oldevent="";
while ($row = oci_fetch_assoc($stmt)) {
        if ($oldevent != $row['EVENT_NAME']) {
                //NEW EVENT DETECTED: WILL START A NEW UPDATE CMD
                if ($i != 0 &amp;&amp; !empty($cmd)) {
                        /* not the first occurrence,
                         I bet there's something in my buffer */
                        passthru($cmd);
                }
                $cleanName = preg_replace ("([^[:alnum:]_-])","_",$row['EVENT_NAME']);
                // if there is no rrd for this event, I create a new one
                if (!file_exists(WD."/".$cs."/wait/${cleanName}.rrd")) {
                        createRRD($cleanName, $interval, $cs);
                }
                /*
                * I initialize a new update command. This string act as a buffer: I append many
                * values to be updated so I'll update many values in a single command line:
                * less forks of rrdtool and less file opens: the whole update process has an
                * enormous improvement.
                */
                $precmd="rrdtool update ".WD."/".$cs."/wait/${cleanName}.rrd ";
                $lastcmd="rrdtool info ".WD."/".$cs."/wait/${cleanName}.rrd".
                        "| grep last_update | awk '{print \$NF}'";
                $last=trim(`$lastcmd`);
                printf ("%s - %s - last: %d\n", $row['EVENT_NAME'], $cleanName, $last);
                $i=0;
                $cmd=$precmd;
                $oldevent=$row['EVENT_NAME'];
        }
        $time=strtotime($row['END_INTERVAL_TIME']);
        //print "time: ".$time."  last: ".$last."\n";
        if ( $time &gt; $last ) {
                $cmd.=" ".$time.":".$row['TOTAL_WAITS'].":".$row['MS'];
                $i++;
        }
        if ($i &gt;= 40) {
                // when I reach 40 values per commandline I force
                // the update: next loop will reinitialize a new commandline.
                passthru($cmd);
                $cmd=$precmd;
                $i=0;
        }
        unset($row);

}
if ($i != 0) {
        /* one more update pending in my buffer */
        passthru($cmd);
}
oci_free_statement($stmt);
oci_close($ds);
?>

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

#!/usr/bin/php -f

< ?php

define('WD','/opt/oracle/awr');

$cs = $_SERVER['argv'][1];

$user = 'mymonitoruser';

$pass = 'mystrongpassword';

/* open a new connection */

$ds = oci_connect($user, $pass, $cs)

or die ("Cannot connect to Oracle Database ".$cs."\n");

/* setting client nls environment */

$sql = "alter session set nls_timestamp_format='MM/DD/YY HH24:MI'";

$stmt = oci_parse($ds, $sql);

oci_execute($stmt);

oci_free_statement($stmt);

/* create directory that will contain rrds (if not exists) */

if(!file_exists(WD.'/'.$cs))

mkdir(WD.'/'.$cs);

if(!file_exists(WD.'/'.$cs.'/wait'))

mkdir(WD.'/'.$cs.'/wait');

/* function to create new RRDs */

function createRRD($name, $interval, $cs) {

$hb = $interval*5; //heartbeat

$cmd="rrdtool create ".WD."/".$cs."/wait/${name}.rrd -s ".$interval." \

-b \"now -3month\" DS:waits:DERIVE:$hb:0:U \

DS:mswaited:DERIVE:$hb:0:U \

RRA:AVERAGE:0.5:1:1440 RRA:AVERAGE:0.5:30:336 \

RRA:AVERAGE:0.5:120:372 RRA:AVERAGE:0.5:720:730 \

RRA:MIN:0.5:1:1440 RRA:MIN:0.5:30:336 \

RRA:MIN:0.5:120:372 RRA:MIN:0.5:720:730 \

RRA:MAX:0.5:1:1440 RRA:MAX:0.5:30:336 \

RRA:MAX:0.5:120:372 RRA:MAX:0.5:720:730 \

RRA:LAST:0.5:1:1440";

//print $cmd."\n";

return passthru($cmd);

}

/* take the snapshot frequency from dba_hist_wr_control

to create the RDD with correct heartbeat value */

$sql = 'select extract(hour from snap_interval)*3600 +

extract(minute from snap_interval)*60 as SEED from DBA_HIST_WR_CONTROL';

$stmt = oci_parse($ds, $sql);

oci_execute($stmt);

$row = oci_fetch_assoc($stmt);

$interval = $row['SEED'];

unset($row);

oci_free_statement($stmt);

/* statement definition that will collect

all snapshots for a certain wait event with more than

a certain amonut of time waited.

Gathering ALL EVENTS could be time consuming and useless.

I fetch rows ordered by event_name rather

then by date because I can update many values

into the same rrd with very few rrdupdate commands

$sql = 'select s.END_INTERVAL_TIME END_INTERVAL_TIME,

g.EVENT_NAME, g.WAIT_CLASS, g.TOTAL_WAITS,

round(g.TIME_WAITED_MICRO/1000) MS

from DBA_HIST_SNAPSHOT s,

dba_hist_bg_event_summary g,

v$instance i

where s.SNAP_ID=g.SNAP_ID and g.wait_class!=\'Idle\'

and g.TIME_WAITED_MICRO>100000

and s.instance_number=i.instance_number

and s.instance_number=g.instance_number

order by 2,1';

/* default prefetch size (148) matches default snapshot retention (24hx7dd) */

$stmt = oci_parse($ds, $sql);

oci_set_prefetch($stmt, 148);

oci_execute($stmt);

$i=0;

$oldevent="";

while ($row = oci_fetch_assoc($stmt)) {

if ($oldevent != $row['EVENT_NAME']) {

//NEW EVENT DETECTED: WILL START A NEW UPDATE CMD

if ($i != 0 && !empty($cmd)) {

/* not the first occurrence,

I bet there's something in my buffer */

passthru($cmd);

}

$cleanName = preg_replace ("([^[:alnum:]_-])","_",$row['EVENT_NAME']);

// if there is no rrd for this event, I create a new one

if (!file_exists(WD."/".$cs."/wait/${cleanName}.rrd")) {

createRRD($cleanName, $interval, $cs);

}

* I initialize a new update command. This string act as a buffer: I append many

* values to be updated so I'll update many values in a single command line:

* less forks of rrdtool and less file opens: the whole update process has an

* enormous improvement.

$precmd="rrdtool update ".WD."/".$cs."/wait/${cleanName}.rrd ";

$lastcmd="rrdtool info ".WD."/".$cs."/wait/${cleanName}.rrd".

"| grep last_update | awk '{print \$NF}'";

$last=trim(`$lastcmd`);

printf ("%s - %s - last: %d\n", $row['EVENT_NAME'], $cleanName, $last);

$i=0;

$cmd=$precmd;

$oldevent=$row['EVENT_NAME'];

}

$time=strtotime($row['END_INTERVAL_TIME']);

//print "time: ".$time." last: ".$last."\n";

if ( $time > $last ) {

$cmd.=" ".$time.":".$row['TOTAL_WAITS'].":".$row['MS'];

$i++;

}

if ($i >= 40) {

// when I reach 40 values per commandline I force

// the update: next loop will reinitialize a new commandline.

passthru($cmd);

$cmd=$precmd;

$i=0;

}

unset($row);

}

if ($i != 0) {

/* one more update pending in my buffer */

passthru($cmd);

}

oci_free_statement($stmt);

oci_close($ds);

Depending on how many different wait events you have, you’ll have a certain number of rrd files:

# ls -l
total 3864
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 Streams_AQ__enqueue_blocked_on_low_memory.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 20 08:18 buffer_busy_waits.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 control_file_parallel_write.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 control_file_sequential_read.rrd
-rw-r--r-- 1 ludovico ludovico 165304 Apr 30 10:12 cursor__pin_S_wait_on_X.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 db_file_scattered_read.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 db_file_sequential_read.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 events_in_waitclass_Other.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__cache_buffers_chains.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__library_cache.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 11 13:22 latch__library_cache_lock.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 20 08:18 latch__redo_writing.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__row_cache_objects.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__shared_pool.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 library_cache_load_lock.rrd
-rw-r--r-- 1 ludovico ludovico 165304 Apr 15 13:17 library_cache_lock.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_buffer_space.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_parallel_write.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_sequential_read.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_single_write.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_switch_completion.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 11 13:22 log_file_sync.rrd
-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 os_thread_startup.rrd

# ls -l

total 3864

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 Streams_AQ__enqueue_blocked_on_low_memory.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 20 08:18 buffer_busy_waits.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 control_file_parallel_write.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 control_file_sequential_read.rrd

-rw-r--r-- 1 ludovico ludovico 165304 Apr 30 10:12 cursor__pin_S_wait_on_X.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 db_file_scattered_read.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 db_file_sequential_read.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 events_in_waitclass_Other.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__cache_buffers_chains.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__library_cache.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 11 13:22 latch__library_cache_lock.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 20 08:18 latch__redo_writing.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__row_cache_objects.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 latch__shared_pool.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 library_cache_load_lock.rrd

-rw-r--r-- 1 ludovico ludovico 165304 Apr 15 13:17 library_cache_lock.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_buffer_space.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_parallel_write.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_sequential_read.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_single_write.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 log_file_switch_completion.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 11 13:22 log_file_sync.rrd

-rw-r--r-- 1 ludovico ludovico 165304 May 25 15:00 os_thread_startup.rrd

As you can see, they are not so big…

Once you have your data in rrd files, it’s quite simple to script even complex plots with several datasources. Everything depends on the results you want.
This script stack all my wait events for a certain instance: it takes the directory containing all the rrds as first argument and the number of hours we want to be plotted as second argument:

cs=$1
hours=${2:-148}

eventlist=`ls $cs/wait/*rrd`

colors[1]="#000000"
colors[2]="#000055"
colors[3]="#0000aa"
colors[4]="#0000ff"
colors[5]="#550055"
colors[6]="#aa00aa"
colors[7]="#ff00ff"
colors[8]="#550000"
colors[9]="#aa0000"
colors[10]="#ff0000"
colors[11]="#555500"
colors[12]="#aaaa00"
colors[13]="#ffff00"
colors[14]="#005500"
colors[15]="#00aa00"
colors[16]="#00ff00"
colors[17]="#005555"
colors[18]="#00aaaa"
colors[19]="#00ffff"
colors[20]="#555555"
colors[21]="#aaaaaa"

i=0

for event in $eventlist ; do
        if [ $i -eq 0 ] ; then
                end=`rrdtool info $event | grep last_update | awk '{print $NF}'`
                end=`rrdtool info $cs/wait/control_file_parallel_write.rrd | grep last_update | awk '{print $NF}'`
                cmd="rrdtool graph - -s end-${hours}hours -e $end  -v \"milliseconds waited\" -l 0 -w 640 -h 240 -t \"$cs WAIT PROFILE\""
                i=$(($i+1))
        fi
        color=${colors[$i]}
        echo $color
        evname=`basename $event | sed -e s/\.rrd\$//`
        cmd="$cmd  DEF:$evname=$event:mswaited:AVERAGE"
        cmd="$cmd  AREA:${evname}${color}:"$evname":STACK"
        i=$(($i+1))
        if [ $i -eq 20 ] ; then
                i=1
        fi
done
        cmd="$cmd  |display /dev/input"
        echo $cmd
        eval $cmd
exit

cs=$1

hours=${2:-148}

eventlist=`ls $cs/wait/*rrd`

colors[1]="#000000"

colors[2]="#000055"

colors[3]="#0000aa"

colors[4]="#0000ff"

colors[5]="#550055"

colors[6]="#aa00aa"

colors[7]="#ff00ff"

colors[8]="#550000"

colors[9]="#aa0000"

colors[10]="#ff0000"

colors[11]="#555500"

colors[12]="#aaaa00"

colors[13]="#ffff00"

colors[14]="#005500"

colors[15]="#00aa00"

colors[16]="#00ff00"

colors[17]="#005555"

colors[18]="#00aaaa"

colors[19]="#00ffff"

colors[20]="#555555"

colors[21]="#aaaaaa"

i=0

for event in $eventlist ; do

if [ $i -eq 0 ] ; then

end=`rrdtool info $event | grep last_update | awk '{print $NF}'`

end=`rrdtool info $cs/wait/control_file_parallel_write.rrd | grep last_update | awk '{print $NF}'`

cmd="rrdtool graph - -s end-${hours}hours -e $end -v \"milliseconds waited\" -l 0 -w 640 -h 240 -t \"$cs WAIT PROFILE\""

i=$(($i+1))

color=${colors[$i]}

echo $color

evname=`basename $event | sed -e s/\.rrd\$//`

cmd="$cmd DEF:$evname=$event:mswaited:AVERAGE"

cmd="$cmd AREA:${evname}${color}:"$evname":STACK"

i=$(($i+1))

if [ $i -eq 20 ] ; then

i=1

done

cmd="$cmd |display /dev/input"

echo $cmd

eval $cmd

exit

The resulting command is very long:

rrdtool graph - -s end-148hours -e 1243252800 \
 -v "milliseconds waited" -l 0 -w 640 -h 240 -t "mydb WAIT PROFILE"\
 DEF:Streams_AQ__enqueue_blocked_on_low_memory=mydb/wait/Streams_AQ__enqueue_blocked_on_low_memory.rrd:mswaited:AVERAGE \
 AREA:Streams_AQ__enqueue_blocked_on_low_memory#000000:Streams_AQ__enqueue_blocked_on_low_memory:STACK\
 DEF:buffer_busy_waits=mydb/wait/buffer_busy_waits.rrd:mswaited:AVERAGE \
 AREA:buffer_busy_waits#000055:buffer_busy_waits:STACK\
 DEF:control_file_parallel_write=mydb/wait/control_file_parallel_write.rrd:mswaited:AVERAGE \
 AREA:control_file_parallel_write#0000aa:control_file_parallel_write:STACK\
 DEF:control_file_sequential_read=mydb/wait/control_file_sequential_read.rrd:mswaited:AVERAGE \
 AREA:control_file_sequential_read#0000ff:control_file_sequential_read:STACK\
 DEF:cursor__pin_S_wait_on_X=mydb/wait/cursor__pin_S_wait_on_X.rrd:mswaited:AVERAGE \
 AREA:cursor__pin_S_wait_on_X#550055:cursor__pin_S_wait_on_X:STACK\
 DEF:db_file_scattered_read=mydb/wait/db_file_scattered_read.rrd:mswaited:AVERAGE \
 AREA:db_file_scattered_read#aa00aa:db_file_scattered_read:STACK\
 DEF:db_file_sequential_read=mydb/wait/db_file_sequential_read.rrd:mswaited:AVERAGE \
 AREA:db_file_sequential_read#ff00ff:db_file_sequential_read:STACK\
 DEF:events_in_waitclass_Other=mydb/wait/events_in_waitclass_Other.rrd:mswaited:AVERAGE \
 AREA:events_in_waitclass_Other#550000:events_in_waitclass_Other:STACK\
 DEF:latch__cache_buffers_chains=mydb/wait/latch__cache_buffers_chains.rrd:mswaited:AVERAGE \
 AREA:latch__cache_buffers_chains#aa0000:latch__cache_buffers_chains:STACK\
 DEF:latch__library_cache=mydb/wait/latch__library_cache.rrd:mswaited:AVERAGE \
 AREA:latch__library_cache#ff0000:latch__library_cache:STACK\
 DEF:latch__library_cache_lock=mydb/wait/latch__library_cache_lock.rrd:mswaited:AVERAGE \
 AREA:latch__library_cache_lock#555500:latch__library_cache_lock:STACK\
 DEF:latch__redo_writing=mydb/wait/latch__redo_writing.rrd:mswaited:AVERAGE \
 AREA:latch__redo_writing#aaaa00:latch__redo_writing:STACK\
 DEF:latch__row_cache_objects=mydb/wait/latch__row_cache_objects.rrd:mswaited:AVERAGE \
 AREA:latch__row_cache_objects#ffff00:latch__row_cache_objects:STACK\
 DEF:latch__shared_pool=mydb/wait/latch__shared_pool.rrd:mswaited:AVERAGE \
 AREA:latch__shared_pool#005500:latch__shared_pool:STACK\
 DEF:library_cache_load_lock=mydb/wait/library_cache_load_lock.rrd:mswaited:AVERAGE \
 AREA:library_cache_load_lock#00aa00:library_cache_load_lock:STACK\
 DEF:library_cache_lock=mydb/wait/library_cache_lock.rrd:mswaited:AVERAGE \
 AREA:library_cache_lock#00ff00:library_cache_lock:STACK\
 DEF:log_buffer_space=mydb/wait/log_buffer_space.rrd:mswaited:AVERAGE \
 AREA:log_buffer_space#005555:log_buffer_space:STACK\
 DEF:log_file_parallel_write=mydb/wait/log_file_parallel_write.rrd:mswaited:AVERAGE \
 AREA:log_file_parallel_write#00aaaa:log_file_parallel_write:STACK\
 DEF:log_file_sequential_read=mydb/wait/log_file_sequential_read.rrd:mswaited:AVERAGE \
 AREA:log_file_sequential_read#00ffff:log_file_sequential_read:STACK\
 DEF:log_file_single_write=mydb/wait/log_file_single_write.rrd:mswaited:AVERAGE \
 AREA:log_file_single_write#000000:log_file_single_write:STACK\
 DEF:log_file_switch_completion=mydb/wait/log_file_switch_completion.rrd:mswaited:AVERAGE \
 AREA:log_file_switch_completion#000055:log_file_switch_completion:STACK\
 DEF:log_file_sync=mydb/wait/log_file_sync.rrd:mswaited:AVERAGE \
 AREA:log_file_sync#0000aa:log_file_sync:STACK\
 DEF:os_thread_startup=mydb/wait/os_thread_startup.rrd:mswaited:AVERAGE \
 AREA:os_thread_startup#0000ff:os_thread_startup:STACK |display /dev/input

rrdtool graph - -s end-148hours -e 1243252800 \

-v "milliseconds waited" -l 0 -w 640 -h 240 -t "mydb WAIT PROFILE"\

DEF:Streams_AQ__enqueue_blocked_on_low_memory=mydb/wait/Streams_AQ__enqueue_blocked_on_low_memory.rrd:mswaited:AVERAGE \

AREA:Streams_AQ__enqueue_blocked_on_low_memory#000000:Streams_AQ__enqueue_blocked_on_low_memory:STACK\

DEF:buffer_busy_waits=mydb/wait/buffer_busy_waits.rrd:mswaited:AVERAGE \

AREA:buffer_busy_waits#000055:buffer_busy_waits:STACK\

DEF:control_file_parallel_write=mydb/wait/control_file_parallel_write.rrd:mswaited:AVERAGE \

AREA:control_file_parallel_write#0000aa:control_file_parallel_write:STACK\

DEF:control_file_sequential_read=mydb/wait/control_file_sequential_read.rrd:mswaited:AVERAGE \

AREA:control_file_sequential_read#0000ff:control_file_sequential_read:STACK\

DEF:cursor__pin_S_wait_on_X=mydb/wait/cursor__pin_S_wait_on_X.rrd:mswaited:AVERAGE \

AREA:cursor__pin_S_wait_on_X#550055:cursor__pin_S_wait_on_X:STACK\

DEF:db_file_scattered_read=mydb/wait/db_file_scattered_read.rrd:mswaited:AVERAGE \

AREA:db_file_scattered_read#aa00aa:db_file_scattered_read:STACK\

DEF:db_file_sequential_read=mydb/wait/db_file_sequential_read.rrd:mswaited:AVERAGE \

AREA:db_file_sequential_read#ff00ff:db_file_sequential_read:STACK\

DEF:events_in_waitclass_Other=mydb/wait/events_in_waitclass_Other.rrd:mswaited:AVERAGE \

AREA:events_in_waitclass_Other#550000:events_in_waitclass_Other:STACK\

DEF:latch__cache_buffers_chains=mydb/wait/latch__cache_buffers_chains.rrd:mswaited:AVERAGE \

AREA:latch__cache_buffers_chains#aa0000:latch__cache_buffers_chains:STACK\

DEF:latch__library_cache=mydb/wait/latch__library_cache.rrd:mswaited:AVERAGE \

AREA:latch__library_cache#ff0000:latch__library_cache:STACK\

DEF:latch__library_cache_lock=mydb/wait/latch__library_cache_lock.rrd:mswaited:AVERAGE \

AREA:latch__library_cache_lock#555500:latch__library_cache_lock:STACK\

DEF:latch__redo_writing=mydb/wait/latch__redo_writing.rrd:mswaited:AVERAGE \

AREA:latch__redo_writing#aaaa00:latch__redo_writing:STACK\

DEF:latch__row_cache_objects=mydb/wait/latch__row_cache_objects.rrd:mswaited:AVERAGE \

AREA:latch__row_cache_objects#ffff00:latch__row_cache_objects:STACK\

DEF:latch__shared_pool=mydb/wait/latch__shared_pool.rrd:mswaited:AVERAGE \

AREA:latch__shared_pool#005500:latch__shared_pool:STACK\

DEF:library_cache_load_lock=mydb/wait/library_cache_load_lock.rrd:mswaited:AVERAGE \

AREA:library_cache_load_lock#00aa00:library_cache_load_lock:STACK\

DEF:library_cache_lock=mydb/wait/library_cache_lock.rrd:mswaited:AVERAGE \

AREA:library_cache_lock#00ff00:library_cache_lock:STACK\

DEF:log_buffer_space=mydb/wait/log_buffer_space.rrd:mswaited:AVERAGE \

AREA:log_buffer_space#005555:log_buffer_space:STACK\

DEF:log_file_parallel_write=mydb/wait/log_file_parallel_write.rrd:mswaited:AVERAGE \

AREA:log_file_parallel_write#00aaaa:log_file_parallel_write:STACK\

DEF:log_file_sequential_read=mydb/wait/log_file_sequential_read.rrd:mswaited:AVERAGE \

AREA:log_file_sequential_read#00ffff:log_file_sequential_read:STACK\

DEF:log_file_single_write=mydb/wait/log_file_single_write.rrd:mswaited:AVERAGE \

AREA:log_file_single_write#000000:log_file_single_write:STACK\

DEF:log_file_switch_completion=mydb/wait/log_file_switch_completion.rrd:mswaited:AVERAGE \

AREA:log_file_switch_completion#000055:log_file_switch_completion:STACK\

DEF:log_file_sync=mydb/wait/log_file_sync.rrd:mswaited:AVERAGE \

AREA:log_file_sync#0000aa:log_file_sync:STACK\

DEF:os_thread_startup=mydb/wait/os_thread_startup.rrd:mswaited:AVERAGE \

AREA:os_thread_startup#0000ff:os_thread_startup:STACK |display /dev/input

This is the resulting graph:

OHHHHHHHHHHHH COOOOL!!!
😉

Any comment is appreciated! thanks

How to collect Oracle Application Server performance data with DMS and RRDtool

Posted on March 2, 2009 by Ludovico

RRDize everything, chapter 1

If you are managing some Application Server deployments you should have wondered how to check and collect performance data.
As stated in documentation, you can gather performance metrics with the dmstool utility.
AFAIK, this can be done from 9.0.2 release upwards, but i’m concerned DMS will not work on Weblogic.

Mainly, you should have an external server that acts as collector (it could be a server in the Oracle AS farm as well): copy the dms.jar library from an Oracle AS installation to your collector and use it as you would use dmstool:

java -jar dms.jar [dmstool options]

1	java -jar dms.jar [dmstool options]

There are three basilar methods to get data:

Get all metrics at once:

java -jar dms.jar -dump -a "youraddress://..." [format=xml]

1	java -jar dms.jar -dump -a "youraddress://..." [format=xml]

Get only the interesting metrics:

java -jar dms.jar -a "youraddress://..." metric metric ...

1	java -jar dms.jar -a "youraddress://..." metric metric ...

Get metrics included into specific DMS tables:

java -jar dms.jar -a "youraddress://..." -table table table ...

1	java -jar dms.jar -a "youraddress://..." -table table table ...

What youraddress:// is, it depends on the component you are trying to connect:

opmn://asserver:6003
http://asserver:7200/dms0/Spy
ajp13://asserver:3301/dmsoc4j/Spy

opmn://asserver:6003

http://asserver:7200/dms0/Spy

ajp13://asserver:3301/dmsoc4j/Spy

If you are trying to connect to the OHS (Apache), be careful to allow remote access from the collector by editing the dms.conf file.

Now that you can query dms data, you should store it somewhere.
Personally, I did a first attempt with dmstool -dump format=xml. I wrote a parser in PHP with SimpleXML extension and I did a lot of inserts into a MySQL database. After a few months the whole data collected from tens of servers was too much to be mantained…
To avoid the maintenance of a DWH-grade database I investigated and found RRDTool. Now I’m asking how could I live without it!

I then wrote a parser in awk that parse the output of the dms.jar call and invoke an rrdtool update command.
I always use dms.jar -table command. The output has always the same format:

###SOF

Mon Mar 02 17:01:19 CET 2009

---------------
TABLE1_Name
---------------

record1_metric1.name:     value       units
record1_metric2.name:     value       units
....

record2_metric1.name:     value       units
record2_metric2.name:     value       units
....

---
TABLE2_Name
---

record1_metric1.name:     value       units
record1_metric2.name:     value       units
....

record2_metric1.name:     value       units
record2_metric2.name:     value       units
....

##EOF

###SOF

Mon Mar 02 17:01:19 CET 2009

---------------

TABLE1_Name

---------------

record1_metric1.name: value units

record1_metric2.name: value units

....

record2_metric1.name: value units

record2_metric2.name: value units

....

---

TABLE2_Name

---

record1_metric1.name: value units

record1_metric2.name: value units

....

record2_metric1.name: value units

record2_metric2.name: value units

....

##EOF

So I written an awk file that works for me.
use it this way:

 java -jar dms.jar ... | awk -f parse_output.awk

1	java -jar dms.jar ... \| awk -f parse_output.awk

####################
# parse_output.awk #
####################

#function pl() replaces all non alphanumeric occurrences with an underscore
function pl(input) {
        return gensub("[^[:alnum:]_-]","_","G",input);
}

# function get_rrd_path() returns a path where the rrd files should be placed
# I should rewrite a new path for each dms table... I'll skip many of them
function get_rrd_path() {
        if (table == "mod_oc4j_destination_metrics")
                return sprintf("%s/%s/%s/%s.rrd", record["Host"],
                    pl(table), pl(record["Name.value"]), pl(var) );
        if (table == "mod_oc4j_mount_pt_metrics")
                return sprintf("%s/%s/%s/%s/%s.rrd", record["Host"],
                    pl(table), pl(record["Destination.value"]), pl(record["Name.value"]), pl(var) );
        if (table == "ohs_server")
                return sprintf("%s/%s/%s.rrd", record["Host"], pl(table), pl(var) );
        if (table == "JVM")
                return sprintf("%s/%s/%s/%s.rrd", record["Host"],
                    pl(table), pl(record["Process"]), pl(var) );
        if (table == "opmn_process")
                return sprintf("%s/%s/%s/%s/%s/%s/%s/%s.rrd", record["Host"], pl(table),
                  pl(record["iasInstance.value"]), pl(record["opmn_ias_component"]),
                  pl(record["opmn_process_type"]),pl(record["opmn_process_set"]),
                  pl(record["Name"]), pl(var) );

        return sprintf("%s/%s/%s.rrd", record["Host"], pl(table), pl(var) );
}
# function process_record actually does the dirty work of invoking the update script
function process_record() {
        #every record has a timeStamp.ts metric that I should use to update my rrd
        ts=substr(record["timeStamp.ts"],0,10);
        for ( var in record ) {
        if ( var != "timeStamp.ts" &amp;&amp; record[var] ~ /^[[:digit:]]+$/ ) {
            if ( var ~ /\.(count|completed|time)$/ ) {
                dstype="DERIVE";
            } else {
                if ( var == "responseSize.value" ) {
                    dstype="DERIVE";
                } else {
                    dstype="GAUGE";
                }
            }
            rrdFile=sprintf("/path_to_data/%s",get_rrd_path());
            #### update_metric_rrd is a shell script listed below!!!!!
            cmd=sprintf("/path_to_scripts/update_metric_rrd %s %s %d %d",
                rrdFile,dstype,ts,record[var]);
            system(cmd);
            }
        }
}

# parse_record() populates an hash array
# with all metrics belonging to the table record
function parse_record() {
    #print "RRRR -  START OF RECORD (table " table ")"
    delete record
    while ( ! /^$/ ) {
        # I'm parsing the record as far I'm in this while statement
        # the array hash is the name of the dms metric basename.
        # $1 is the metric name but I have to trim the final ":"
        key=substr($1,0,length($1)-1)
        record[key]=$2
        getline
    }
    # this function is included in funcions.awk:
    # I invoke it to process the record I've just parsed
    process_record();
}
BEGIN {
    # as far as started is 0, I've never reached the first table
    started=0
}

#MAIN
{
    # I jump over the first lines until I reach the first table
    if (started==0) {
        while ( ! /^---/ ) {
           getline
        }
        started=1
    }

    # looking for the next occurrence of a table
    # all tables start with:
    # ----------
    # table_name
    # ----------
    if ( /^---/ ) {
        # first table reached: the next row is my table name,
        # then I reach again a dashed line -----
        getline table
        getline trash
        #print ""
        #print "##########################"
        print "  TABELLA " table
        #print "##########################"
        next
    }

    if ( ! /^$/ ) {
        # reached an empty line: could be the end of a record or the and of a table
        # since a new table is threated in previous "if" statement, I'm starting a new record.
        parse_record()
    }

}

END {
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

####################

# parse_output.awk #

####################

#function pl() replaces all non alphanumeric occurrences with an underscore

function pl(input) {

return gensub("[^[:alnum:]_-]","_","G",input);

}

# function get_rrd_path() returns a path where the rrd files should be placed

# I should rewrite a new path for each dms table... I'll skip many of them

function get_rrd_path() {

if (table == "mod_oc4j_destination_metrics")

return sprintf("%s/%s/%s/%s.rrd", record["Host"],

pl(table), pl(record["Name.value"]), pl(var) );

if (table == "mod_oc4j_mount_pt_metrics")

return sprintf("%s/%s/%s/%s/%s.rrd", record["Host"],

pl(table), pl(record["Destination.value"]), pl(record["Name.value"]), pl(var) );

if (table == "ohs_server")

return sprintf("%s/%s/%s.rrd", record["Host"], pl(table), pl(var) );

if (table == "JVM")

return sprintf("%s/%s/%s/%s.rrd", record["Host"],

pl(table), pl(record["Process"]), pl(var) );

if (table == "opmn_process")

return sprintf("%s/%s/%s/%s/%s/%s/%s/%s.rrd", record["Host"], pl(table),

pl(record["iasInstance.value"]), pl(record["opmn_ias_component"]),

pl(record["opmn_process_type"]),pl(record["opmn_process_set"]),

pl(record["Name"]), pl(var) );

return sprintf("%s/%s/%s.rrd", record["Host"], pl(table), pl(var) );

}

# function process_record actually does the dirty work of invoking the update script

function process_record() {

#every record has a timeStamp.ts metric that I should use to update my rrd

ts=substr(record["timeStamp.ts"],0,10);

for ( var in record ) {

if ( var != "timeStamp.ts" && record[var] ~ /^[[:digit:]]+$/ ) {

if ( var ~ /\.(count|completed|time)$/ ) {

dstype="DERIVE";

} else {

if ( var == "responseSize.value" ) {

dstype="DERIVE";

} else {

dstype="GAUGE";

}

rrdFile=sprintf("/path_to_data/%s",get_rrd_path());

#### update_metric_rrd is a shell script listed below!!!!!

cmd=sprintf("/path_to_scripts/update_metric_rrd %s %s %d %d",

rrdFile,dstype,ts,record[var]);

system(cmd);

}

# parse_record() populates an hash array

# with all metrics belonging to the table record

function parse_record() {

#print "RRRR - START OF RECORD (table " table ")"

delete record

while ( ! /^$/ ) {

# I'm parsing the record as far I'm in this while statement

# the array hash is the name of the dms metric basename.

# $1 is the metric name but I have to trim the final ":"

key=substr($1,0,length($1)-1)

record[key]=$2

getline

}

# this function is included in funcions.awk:

# I invoke it to process the record I've just parsed

process_record();

}

BEGIN {

# as far as started is 0, I've never reached the first table

started=0

}

#MAIN

{

# I jump over the first lines until I reach the first table

if (started==0) {

while ( ! /^---/ ) {

getline

}

started=1

}

# looking for the next occurrence of a table

# all tables start with:

# ----------

# table_name

# ----------

if ( /^---/ ) {

# first table reached: the next row is my table name,

# then I reach again a dashed line -----

getline table

getline trash

#print ""

#print "##########################"

print " TABELLA " table

#print "##########################"

}

if ( ! /^$/ ) {

# reached an empty line: could be the end of a record or the and of a table

# since a new table is threated in previous "if" statement, I'm starting a new record.

parse_record()

}

END {

}

And this is the code for update_metric_rrd:

#!/bin/bash
RRDFILE=$1
DSTYPE=$2
TS=$3
VALUE=$4

rrdtool update $RRDFILE ${TS}:${VALUE}

if [ $? -ne 0 ] ; then
        DIR=`dirname $RRDFILE`

        [ -d $DIR ] || mkdir -p $DIR
        [ -f $RRDFILE ] || rrdtool create $RRDFILE -b "now-1month" -s 1800 \
                DS:metric:${DSTYPE}:7200:0:U \
                RRA:AVERAGE:0.5:1:672 \
                RRA:AVERAGE:0.5:4:1080 \
                RRA:AVERAGE:0.5:12:1460 \
                RRA:AVERAGE:0.5:48:1095 \
                RRA:MAX:0.5:4:1080 \
                RRA:MAX:0.5:12:1460 \
                RRA:MAX:0.5:48:1095 \
                RRA:LAST:0.5:1:672
        rrdtool update $RRDFILE ${TS}:${VALUE}
fi

#!/bin/bash

RRDFILE=$1

DSTYPE=$2

TS=$3

VALUE=$4

rrdtool update $RRDFILE ${TS}:${VALUE}

if [ $? -ne 0 ] ; then

DIR=`dirname $RRDFILE`

[ -d $DIR ] || mkdir -p $DIR

[ -f $RRDFILE ] || rrdtool create $RRDFILE -b "now-1month" -s 1800 \

DS:metric:${DSTYPE}:7200:0:U \

RRA:AVERAGE:0.5:1:672 \

RRA:AVERAGE:0.5:4:1080 \

RRA:AVERAGE:0.5:12:1460 \

RRA:AVERAGE:0.5:48:1095 \

RRA:MAX:0.5:4:1080 \

RRA:MAX:0.5:12:1460 \

RRA:MAX:0.5:48:1095 \

RRA:LAST:0.5:1:672

rrdtool update $RRDFILE ${TS}:${VALUE}

Once you have all your rrd files populated, it’s easy to script automatic reporting. You would probably want a graph with the request count served by your Apache cluster, along with its linear regression:

rrdtool graph - -s "end-${hours}hours" -e $end \
                -v "Requests Completed/sec" \
        -w 640 -h 240 --slope-mode \
                -t "HTTP Requests for www.ludovicocaldara.net" \
                DEF:1request_completed=/data/wwwserver1/ohs_server/request_completed.rrd:metric:AVERAGE \
                DEF:2request_completed=/data/wwwserver2/ohs_server/request_completed.rrd:metric:AVERAGE \
                CDEF:request_completed=1request_completed,2request_completed,+ \
                VDEF:slope=request_completed,LSLSLOPE \
                VDEF:lslint=request_completed,LSLINT \
                CDEF:reg=request_completed,POP,slope,COUNT,*,lslint,+ \
                LINE1:reg#666666:"Regression" \
                AREA:1request_completed#4040AA:"wwwserver1"  \
                AREA:2request_completed#6666FF:"wwwserver1":STACK  \
        &gt; mygraph.png

rrdtool graph - -s "end-${hours}hours" -e $end \

-v "Requests Completed/sec" \

-w 640 -h 240 --slope-mode \

-t "HTTP Requests for www.ludovicocaldara.net" \

DEF:1request_completed=/data/wwwserver1/ohs_server/request_completed.rrd:metric:AVERAGE \

DEF:2request_completed=/data/wwwserver2/ohs_server/request_completed.rrd:metric:AVERAGE \

CDEF:request_completed=1request_completed,2request_completed,+ \

VDEF:slope=request_completed,LSLSLOPE \

VDEF:lslint=request_completed,LSLINT \

CDEF:reg=request_completed,POP,slope,COUNT,*,lslint,+ \

LINE1:reg#666666:"Regression" \

AREA:1request_completed#4040AA:"wwwserver1" \

AREA:2request_completed#6666FF:"wwwserver1":STACK \

> mygraph.png

This is the result:
OHS request completed
OHHHHHHHHHHHH!!!! COOL!!!!

That’s all for DMS capacity planning. Stay tuned, more about rrdtool is coming!

JBoss Portal and MySQL scalability: What The…???

Posted on December 10, 2008 by Ludovico

I found several queries running on a MySQL 5.0 database like this one:

SELECT PATH, NAME FROM JBP_OBJECT_NODE  WHERE PK IN (
SELECT NODE_KEY FROM JBP_OBJECT_NODE_SEC WHERE ROLE IN (
SELECT jr.jbp_name from jbp_users ju, jbp_role_membership jrm,
jbp_roles jr
where jrm.jbp_uid = ju.jbp_uid
and jr.jbp_rid = jrm.jbp_rid
and ju.jbp_uname = 'LUDOVICO'
and ju.jbp_enabled = 1));

SELECT PATH, NAME FROM JBP_OBJECT_NODE WHERE PK IN (

SELECT NODE_KEY FROM JBP_OBJECT_NODE_SEC WHERE ROLE IN (

SELECT jr.jbp_name from jbp_users ju, jbp_role_membership jrm,

jbp_roles jr

where jrm.jbp_uid = ju.jbp_uid

and jr.jbp_rid = jrm.jbp_rid

and ju.jbp_uname = 'LUDOVICO'

and ju.jbp_enabled = 1));

This query is related to JBoss Portal and does a full scan on table JBP_OBJECT_NODE.

It has bad performances (>0.8 sec) with just a few records:

mysql> select count(*) from JBP_OBJECT_NODE;
+———-+
| count(*) |
+———-+
| 33461 |
+———-+

If I rewrite the query using an inner join (à la Oracle, please forgive me) instead of a subquery I get an index scan:

SELECT distinct a.PATH, a.NAME /* , b.NODE_KEY */ from  JBP_OBJECT_NODE
a, JBP_OBJECT_NODE_SEC b
where a.pk=b.NODE_KEY
and b.ROLE IN (
SELECT jr.jbp_name from jbp_users ju, jbp_role_membership jrm,
jbp_roles jr
where jrm.jbp_uid = ju.jbp_uid
and jr.jbp_rid = jrm.jbp_rid
and ju.jbp_uname = 'UTDEMO'
and ju.jbp_enabled = 1);

SELECT distinct a.PATH, a.NAME /* , b.NODE_KEY */ from JBP_OBJECT_NODE

a, JBP_OBJECT_NODE_SEC b

where a.pk=b.NODE_KEY

and b.ROLE IN (

SELECT jr.jbp_name from jbp_users ju, jbp_role_membership jrm,

jbp_roles jr

where jrm.jbp_uid = ju.jbp_uid

and jr.jbp_rid = jrm.jbp_rid

and ju.jbp_uname = 'UTDEMO'

and ju.jbp_enabled = 1);

With 30k records the execution time falls down from 0.8 secs to 0.01 secs…
That’s NOT all! I found this open bug:

https://jira.jboss.org/jira/browse/JBPORTAL-2040

With many users registered in, the JBoss Portal Admin console tooks over a minute to show a single page…

I don’t like portals…

Oracle RAC Standard Edition to achieve low cost and high performance

Posted on November 28, 2008 by Ludovico

I finished today to create a new production environment based on 2 Linux serverX86_64 and running Oracle RAC 10gR2. (I know, there is 11g right now, but I’m a conservative!)
Wheeew, I just spent a couple of hours applying all the recommended patches!
We choosed 2 nodes with a maximum of 2 multi-core processors each one so we can license Standard Edition instead of Enterprise Edition. 64bits addressing allow us to allocate many gigabytes of SGA. I’m starting with 5Gb but I think we’ll need more. And a set of 6x300Gb 15krpms disks (it can be expanded with more disks and more shelves).
This configuration keeps low the total cost of ownership but achieves best performance.
Due to disks layout, costs and needed usable storage, we had to configure one huge RAID5 on the SAN with multi-path. I decided anyway to create 2 ASM disk groups (ASM is mandatory for Standard Edition RAC), one for the DB, the second one for the recovery area. With spare disks we should have enough availability and even if it’s a RAID5 I saw good write performances (>150M/s).

Welcome new RAC, I hope we’ll feel good together!

System triggers, stats$user_log and side effects

Posted on October 9, 2008 by Ludovico

Sometimes people get advice from internet: both Metalink or well-known consulting sites.
If people need a fix or a feature, they use to trust advices.

Last week I heard a collegue about a 10g RAC database with performance problems and, since I never lay on my chair, I probed both AWR and ADDM . I suddenly recognized heavy enqueues and physical reads
over a segment named STATS$USER_LOG. “Strange”, I said, “I cannot remember this name in neither perfstat or catalog segments”.
Then I searched the Internet and the Metalink and found the same thing in BOTH metalink.oracle.com and www.dba-oracle.com: a trick to trace logon and logoffs into a table using system triggers.

Look at this code:

create or replace trigger logon_audit_trigger AFTER LOGON ON DATABASE BEGIN insert into stats$user_log values( user, sys_context('USERENV','SESSIONID'), sys_context('USERENV','HOST'), sysdate, to_char(sysdate, 'hh24:mi:ss'), [...] ); COMMIT; END; /

Cool, every single access is kept into stats$user_log.

Let’s see the logoff trigger:

create or replace trigger logoff_audit_trigger BEFORE LOGOFF ON DATABASE BEGIN -- *************************************************** -- Update the last action accessed -- *************************************************** update stats$user_log [...] --*************************************************** -- Update the last program accessed -- *************************************************** update stats$user_log [...] -- *************************************************** [ ... many, many updates ...] -- *************************************************** update stats$user_log [...] -- *************************************************** -- Compute the elapsed minutes -- *************************************************** update stats$user_log set elapsed_minutes = round((logoff_day - logon_day)*1440) where sys_context('USERENV','SESSIONID') = session_id; COMMIT; END; /
That’s all. It inserts a row when someone logons. It updates MANY rows when someone logoffs.
There is no match between the record inserted and the records updated (but the session_id).
Neither indexes or constraints.

What’s the matter?

What happens if we have many logons?

SQL> select num_rows from dba_tables where table_name='STATS$USER_LOG';

NUM_ROWS
———-
3053931

What happens if the execution plan does a full scan?

SQL> explain plan for update stats$user_log […]

Explained.

SQL> @?/rdbms/admin/utlxpls --------------------------------------------- | Id | Operation | Name | --------------------------------------------- | 0 | UPDATE STATEMENT | | | 1 | UPDATE | STATS$USER_LOG | | 2 | TABLE ACCESS FULL| STATS$USER_LOG | ---------------------------------------------

How many reads should it take?

SQL> select bytes/1024/1024 Mb from dba_Segments where segment_name='STATS$USER_LOG';

MB
———-
237

The database performace will decrease constantly and very slowly…..
Remember: never trust a solution if it involves a change on the system.

Plot Oracle historical statistics within SQL*Plus

Posted on September 24, 2008 by Ludovico

More than often I’m asked to investigate “what happened yesterday when performance problems appeared”.

Sometimes I have the Enterprise Manager DB Console licensed, sometimes not. Sometimes I have direct SQL*Net access to the database that I may use to produce custom reports with my LAMP self-developed application. But it may happen that only an ssh access is granted to the db server.

That’s why I started to develop some little scripts to plot the trends of database timed statistics.

Let’s see this one:
SQL> @sysstat.sql Enter a sysstat to search for: physical reads
STAT_ID STAT_NAME ----------- ------------------------------------------
2263124246physical reads 4171507801 physical reads cache 297908839 physical reads cache prefetch 2589616721 physical reads direct 2564935310 physical reads direct (lob) 2663793346 physical reads direct temporary tablespace 473165409 physical reads for flashback new 3102888545 physical reads prefetch warmup 531193461 physical reads retry corrupt

9 rows selected.

Enter the desired stat_id: 2263124246
Enter the start date (YYYYMMDD) [defaults today] : 20080922
Enter the end date date (YYYYMMDD) [defaults today] : 20080922
STAT_NAME START END ---------------- -------- -------- physical reads 20080922 20080922

BEGIN_INTERVAL_TIME VALORE PLOTTED_VALUE ------------------------- ---------- ------------------------- 22-SEP-08 12.00.12.122 AM 0 22-SEP-08 01.00.28.253 AM 120092 22-SEP-08 02.00.05.039 AM 35780 22-SEP-08 03.00.55.595 AM 4792 22-SEP-08 04.00.43.725 AM 4905 22-SEP-08 05.00.31.855 AM 7300 22-SEP-08 06.00.17.017 AM 234596 22-SEP-08 07.00.08.132 AM 24651 22-SEP-08 08.00.50.936 AM 481884 22-SEP-08 09.00.33.488 AM 130201 22-SEP-08 10.00.03.805 AM 1300306 ** 22-SEP-08 11.00.07.764 AM 491857 22-SEP-08 12.00.31.548 PM 304702 22-SEP-08 01.01.04.880 PM 1023664 * 22-SEP-08 02.00.17.822 PM 8588180 ************ 22-SEP-08 03.00.36.969 PM 2201615 *** 22-SEP-08 04.01.01.397 PM 17237098 ************************* 22-SEP-08 05.00.39.262 PM 1606300 ** 22-SEP-08 06.00.03.829 PM 451568 22-SEP-08 07.00.31.461 PM 137684 22-SEP-08 08.00.05.966 PM 203803 22-SEP-08 09.00.24.829 PM 536394 22-SEP-08 10.00.12.945 PM 10209783 ************** 22-SEP-08 11.00.35.123 PM 6151663 *********

24 rows selected.

Oh! At 4.00 PM we had a lot of physical reads. Nice.

This is the code:

-- display given statistics from DBA_HIST_SYSSTAT
col BEGIN_INTERVAL_TIME for a25
set pages 100 lines 130
set verify off term on

accept sysstat prompt 'Enter a sysstat to search for: '
select STAT_ID, STAT_NAME
  from DBA_HIST_STAT_NAME
   where lower(STAT_NAME) like lower('%&sysstat%')
  order by stat_name;

accept stat_id prompt 'Enter the desired stat_id: '
accept startdate prompt 'Start date (YYYYMMDD) [today] : '
accept enddate prompt 'End date date (YYYYMMDD) [today] : '

select STAT_NAME,
  nvl('&startdate',to_char(sysdate,'YYYYMMDD')) as "START",
  nvl('&enddate',to_char(sysdate,'YYYYMMDD')) as "END"
 from DBA_HIST_STAT_NAME
where STAT_ID = &stat_id;

select BEGIN_INTERVAL_TIME, VALORE,
  substr( rpad('*',40*round( VALORE/max(VALORE)over(),2),'*'),1,40) PLOTTED_VALORE
  from (
  select s.BEGIN_INTERVAL_TIME BEGIN_INTERVAL_TIME,
    nvl(decode(greatest(VALUE, nvl(lag(VALUE) over
      (partition by s.dbid, s.instance_number, g.stat_name order by s.snap_id),0)),
    VALUE,
    VALUE - lag(VALUE) over
      (partition by s.dbid, s.instance_number, g.stat_name order by s.snap_id),VALUE), 0) VALORE
  from DBA_HIST_SNAPSHOT s,
          DBA_HIST_SYSSTAT g,
          v$instance i
  where s.SNAP_ID=g.SNAP_ID
  and g.STAT_ID='&stat_id'
  and s.BEGIN_INTERVAL_TIME >=
    trunc(to_timestamp(nvl('&startdate',to_char(sysdate,'YYYYMMDD')),'YYYYMMDD'))
  and s.BEGIN_INTERVAL_TIME < =
   trunc(to_timestamp(nvl('&enddate',to_char(sysdate,'YYYYMMDD')),'YYYYMMDD')+1)
  and s.instance_number=i.instance_number
  and s.instance_number=g.instance_number
  order by 1
);

-- display given statistics from DBA_HIST_SYSSTAT

col BEGIN_INTERVAL_TIME for a25

set pages 100 lines 130

set verify off term on

accept sysstat prompt 'Enter a sysstat to search for: '

select STAT_ID, STAT_NAME

from DBA_HIST_STAT_NAME

where lower(STAT_NAME) like lower('%&sysstat%')

order by stat_name;

accept stat_id prompt 'Enter the desired stat_id: '

accept startdate prompt 'Start date (YYYYMMDD) [today] : '

accept enddate prompt 'End date date (YYYYMMDD) [today] : '

select STAT_NAME,

nvl('&startdate',to_char(sysdate,'YYYYMMDD')) as "START",

nvl('&enddate',to_char(sysdate,'YYYYMMDD')) as "END"

from DBA_HIST_STAT_NAME

where STAT_ID = &stat_id;

select BEGIN_INTERVAL_TIME, VALORE,

substr( rpad('*',40*round( VALORE/max(VALORE)over(),2),'*'),1,40) PLOTTED_VALORE

from (

select s.BEGIN_INTERVAL_TIME BEGIN_INTERVAL_TIME,

nvl(decode(greatest(VALUE, nvl(lag(VALUE) over

(partition by s.dbid, s.instance_number, g.stat_name order by s.snap_id),0)),

VALUE,

VALUE - lag(VALUE) over

(partition by s.dbid, s.instance_number, g.stat_name order by s.snap_id),VALUE), 0) VALORE

from DBA_HIST_SNAPSHOT s,

DBA_HIST_SYSSTAT g,

v$instance i

where s.SNAP_ID=g.SNAP_ID

and g.STAT_ID='&stat_id'

and s.BEGIN_INTERVAL_TIME >=

trunc(to_timestamp(nvl('&startdate',to_char(sysdate,'YYYYMMDD')),'YYYYMMDD'))

and s.BEGIN_INTERVAL_TIME < =

trunc(to_timestamp(nvl('&enddate',to_char(sysdate,'YYYYMMDD')),'YYYYMMDD')+1)

and s.instance_number=i.instance_number

and s.instance_number=g.instance_number

order by 1

);

Ciao
—
Ludovico