@echo off cls ::REM ------------------- Begin sample batch script ------------------------ ::REM Sample batch programming script ClusterMonitor.cmd ::REM (C)2006 John D. Seaman, Copylefted under terms of the GNU/GPL ::REM by John D. Seaman, www.japan-page.net/batch ::REM Requires bmail, or another command line SMTP utility to send alert e.mails. echo. echo Cluster Healthchk utility by John D. Seaman echo v.1.2 (2006.9.8) echo echo. echo Now checking... echo. ::REM --------- Set required values here -------------- ::REM Put the virtual server name here set _clusname=paexhs02cl set _too=alerts@yourdomain.com set _hst=smtp.yourdomain.com set _frm=%computername%@yourdomain.com set _msg="Error condition detected on cluster host %computername%." ::REM Initialize the alert variable. set _alert=0 ::REM If you use a drive letter other than Q: for your Quorum drive, modify the script below. ::REM Set the first few chars of the node names to filter... ::REM In this example the cluster node names are mscssvr1 and mscssvr2 ::REM Easier to set here manually than use a 3rd party text manipulator... set _nodeNm=mscssvr ::REM --------- END required values here -------------- ::REM Set debug level, (0 is normal, 1 saves log files) set _debug=0 ::REM Make sure we are running on Windows 2k(x) with MSCS installed. if /i not "%OS%" == "Windows_NT" ( set _abortMsg=Script can only run on NT. goto :abort ) if not exist %systemroot%\cluster ( set _abortMsg=Cluster service not installed. goto :abort ) ::REM Setup log file set _log=healthchklog.txt echo.>%_log% echo.>>%_log% echo ^|------------------------ Starting hourly check ---------------------^| >>%_log% echo.>>%_log% echo Initialized at %time% on %date%... >>%_log% echo.>>%_log% echo. echo. echo ^|------------------------ Starting hourly check ---------------------^| echo. echo Initialized at %time% on %date% on %computername% ... echo. ::REM ---- MODIFY HERE ----------- ::REM Decide if I have the Quorum drive (Modify here if you don't use the Q: drive) if exist q:\mscs ( set /a _isquorum=1 echo Node %computername% currently has the Quorum resource... echo Node %computername% currently has the Quorum resource.>>%_log% ) if not exist q:\mscs ( set /a _isquorum=0 echo Node %computername% does not hold the Quorum resource. echo Node %computername% does not hold the Quorum resource.>>%_log% ) echo. echo.>>%_log% ::REM Get node names and check node status echo. echo.>>%_log% echo Now checking cluster node status... echo Now checking cluster node status...>>%_log% ::REM Get node output (raw) cluster %clusname% Node /Status >_nodeStat.txt ::REM Clean it up, extract the node name based on first few characters of the node name... type _nodeStat.txt | find "%_nodeNm%" >_nodeStat1.txt for /f "tokens=1,2,3" %%x IN (_nodeStat1.txt) do call :SUBNode %%x %%y %%z echo. echo.>>%_log% ::REM Check resource group status echo. echo.>>%_log% echo Now checking cluster group status... echo Now checking cluster group status...>>%_log% CLUSTER %clusname% GROUP /Status >_groupStat.txt type _groupStat.txt | find "%_nodeNm%" >_groupStat1.txt for /f "tokens=1,2,3,4*" %%i IN (_groupStat1.txt) do call :SUBGroup %%i %%j %%k %%l ::REM Skip optional code. goto :checkStatus :: ---------- OPTIONAL CODE -------------------- ::REM If you run Exchange or another clustered application in active / passive mode ::REM and want to be alerted when node 1 isn't active for this service, use the following code. ::REM I prefer to run Exchange on node 1 and if it fails over to node 2 I want to be alerted... ::REM Make sure Node A is active with Exchange echo. echo.>>%_log% echo Now checking the Active Exchange node... echo Now checking the Active Exchange node...>>%_log% type _groupStat1.txt | find "Exchange_" >_nodeCheck.txt ::Extract active node name for Exchange cluster resource for /f "tokens=1,2,3" %%i IN (_nodeCheck.txt) do echo %%j >_exActiveNode.txt type _exActiveNode.txt | find "A " if /i %errorlevel% EQU 0 ( echo Node A is active for the Exchange Resource group... echo Node A is active for the Exchange Resource group...>>%_log% echo. echo.>>%_log% goto :checkStatus ) ::REM Error detected, node A is not active for the Exchange group set _alert=1 echo Error! Node A is not active for the Exchange resource group... echo Error! Node A is not active for the Exchange resource group...>>%_log% :: ---------- END OPTIONAL CODE -------------------- ::REM Check error status (last step) :checkStatus if /i %_alert% EQU 1 ( echo Error detected, now sending alert... echo Error detected, now sending alert...>>%_log% goto :alert ) echo. echo.>>%_log% echo No errors found. echo No errors found.>>%_log% echo ^ echo ^ >>%_log% goto :END ::REM ------------------- F U N C T I O N S ------------------------------------------ :SUBNode ::REM ------------------------ ::REM Detect if a node is down ::REM ------------------------ echo Status is %3... if not "%3" == "" if not "%3"=="Up" ( echo Node %1 is %3. echo Node %1 is %3>>%_log% set /a _alert=1 set _msg="ALERT! Node %1 is %3! goto :ALERT ) echo Node %1 is currently %3 echo Node %1 is currently %3>>%_log% goto :EOF :SUBGroup ::REM ---------------------------------- ::REM Detect if resource groups are down ::REM ---------------------------------- echo. echo %1 %2 %3 %4 set _desc=Virtual Server Resource Group ::REM Handle resource group not partially down, skip local cluster group if not "%1" == "Cluster" if not "%3" == "Online" if not "%3" == "Partially" ( echo %_desc% %1 is currently %3!!>>%_log% set /a _alert=1 ) if not "%1" == "Cluster" if not "%3" == "Online" if not "%3" == "Partially" ( set _msg="Alert! %1 is currently %3!!" goto :alert ) ::REM Handle resource group partially down, skip local cluster group if not "%1" == "Cluster" if not "%3" == "Online" if "%3" == "Partially" ( echo %_desc% %1 is currently partially OFFLINE!!>>%_log% set /a _alert=1 ) if not "%1" == "Cluster" if not "%3" == "Online" if "%3" == "Partially" ( set _msg="Alert! %1 is currently partially OFFLINE!!" goto :alert ) ::REM Write resource not online, not partially down TO the log if not "%1" == "Cluster" ( echo %_desc% %1 is currently %3 echo %_desc% %1 is currently %3>>%_log% ) ::REM Handle local cluster group if "%1" == "Cluster" if not "%4" == "Online" ( echo %1 %2 %3 echo %1 %2 %3>>%_log% set /a _alert=1 ) if "%1" == "Cluster" ( echo Local Cluster Group %3 is %4 echo Local Cluster Group %3 is %4>>%_log% echo. echo.>>%_log% ) goto :EOF ::REM --------------------------------------------------------------------- :abort ::REM Process errors and bail out echo. echo An error occurred! echo. echo %_abortMsg% echo %_abortMsg%>>%_log% echo. :alert ::REM Check to see if an alert condition exists and react accordingly set _warntxt=****** A L E R T ****** A L E R T ****** if "%_alert%" == "1" ( echo.>>%_log% echo %_warntxt%>>%_log% echo.>>%_log% echo ! %_msg%>>%_log% ) if "%_alert%" == "1" ( bmail -f %_frm% -s %_hst% -t %_too% -a %_msg% -b %_msg% -m %_log% -d -h echo SMTP alert sent... echo SMTP alert sent...>>%_log% ) ::REM Reset alert status, prevents multiple SMTP error messages for the same issue set /a _alert=0 ::REM -------------- C L E A N -- U P ------------------- :END if /i not %_debug% EQU 1 del _*.txt /q :EOF ::REM ------------------- End sample batch script ------------------------