线上Oracle准备实现类似MySQL slow query的监控脚本,把查询时间超出定值的SQL定时的发送邮件告警,实现过程记录如下:
主要思路是通过DBA_HIST的几个视图来获取每小时快照中慢SQL的情况,为了不影响线上环境,这里把脚本部署在了自己的监控端,通过DBLINK定期的抓取线上生产库的数据到监控数据库,并简单的处理后获得csv格式的报表,发送报表至邮箱。
定时脚本 每小时查询一次
00 * * * * /opt/scripts/oracle/get_slow_query.sh
脚本内容如下
[oracle@59-Mysql-Test ~]$ cat /opt/scripts/oracle/get_slow_query.sh
#!/bin/bash
errlog="/opt/scripts/oracle/sqlerror.log"
sq_data="/opt/scripts/oracle/slow_query_data.xls"
check_file="/opt/scripts/oracle/slowsql_check.log"
send_mail_check="/opt/scripts/oracle/send_mail.chk"
export ORACLE_BASE=/u01/app/oracle
export ORACLE_HOME=/u01/app/oracle/product/11.2.0/db_1
export ORACLE_SID=oramon
export PATH=/usr/sbin:$PATH
export PATH=/u01/app/oracle/product/11.2.0/db_1/bin:$PATH
export LD_LIBRARY_PATH=$ORACLE_HOME/lib:/lib:/usr/lib
export CLASSPATH=/u01/app/oracle/product/11.2.0/db_1/JRE:/u01/app/oracle/product/11.2.0/db_1/jlib:/u01/app/oracle/product/11.2.0/db_1/rdbms/jlib
cd /opt/scripts/oracle/
[ -s ${send_mail_check} ]&& /bin/mail -a ${sq_data} -s "OracleDB find slow query,please check" xxx@xxx.com,xxx@xxx.com
[oracle@59-Mysql-Test oracle]$ cat main.sql
set linesize 5000
set term off verify off feedback off pagesize 999
set markup html on entmap ON spool on preformat off
spool slow_query_data.xls
spool off
exit
[oracle@59-Mysql-Test oracle]$ cat get_tables.sql
select sql_id,elapsed_time,cpu_time,iowait_time,gets,reads,rws,clwait_time,execs,elpe,machine,username,dbms_lob.substr(sqt,4000) from DBA_ORA_SLOW_QUERY where elpe > 10 and machine not in ('rac01','rac02');
存储过程pro_get_slow_query内容如下
CREATE OR REPLACE PROCEDURE SQMON.pro_get_slow_query
AS
BEGIN
/**********delete old data on sqltext*************/
delete from
commit;
insert into local_dba_hist_sqltextas select * from dba_hist_sqltext@dg2;
commit;
insert into select a.*,sysdate from DBA_ORA_SLOW_QUERY;
commit;
delete from
commit;
/*
select * from DBA_ORA_SLOW_QUERY;
select * from DBA_ORA_SLOW_QUERY_HISTORY;
*/
/************insert new date ********************/
insert into DBA_ORA_SLOW_QUERY
select v_1.sql_id,
v_1.elapsed_time,
v_1.cpu_time,
v_1.iowait_time,
v_1.gets,
v_1.reads,
v_1.rws,
v_1.clwait_time,
v_1.execs,
v_1.elpe,
v_2.machine,
v_2.username,
v_1.sqt
from (select s.sql_id,
elapsed_time / 1000000 elapsed_time,
cpu_time / 1000000 cpu_time,
iowait_time / 1000000 iowait_time,
gets,
reads,
rws,
clwait_time / 1000000 clwait_time,
execs,
st.sql_text sqt,
elapsed_time / 1000000 / decode(execs, 0, null, execs) elpe
from (select *
from (select sql_id,
sum(executions_delta) execs,
sum(buffer_gets_delta) gets,
sum(disk_reads_delta) reads,
sum(rows_processed_delta) rws,
sum(cpu_time_delta) cpu_time,
sum(elapsed_time_delta) elapsed_time,
sum(clwait_delta) clwait_time,
sum(iowait_delta) iowait_time
from dba_hist_sqlstat@HUBSDG2
where snap_id >=
(select max(snap_id) - 1
from dba_hist_snapshot@DG2)
and snap_id