#!/usr/bin/perl
#
# Copyright 2013 SPARTA, Inc.  All rights reserved.
# See the COPYING file distributed with this software for details.
#
# owl-rrecdata
#
#	This script retrieves the resource-record response data gathered by
#	an Owl sensor.  It runs on the Owl manager and provides data for use
#	by a Nagios monitoring environment.
#
#	File organization:
#		/owl/data/owldev1/data/
#		/owl/data/owldev1/history/
#		/owl/data/owldev1/history/rrdata
#
#
# Revision History
#	2.0	Initial version.				130315
#		This was adapted from the owl-dnstimer plugin.
#	2.0.1	Added history recording.			130401
#	2.0.2	Moved data to <data/dns> subdirectory.		130903
#		Fixed use of name server.
#

use strict;

use Getopt::Long qw(:config no_ignore_case_always);
use Fcntl ':flock';
use File::Path;

#######################################################################
#
# Version information.
#
my $NAME   = "owl-rrecdata";
my $VERS   = "$NAME version: 2.0.2";
my $DTVERS = "DNSSEC-Tools version: 2.0";

#######################################################################
#
# Paths.
#
#	The installer must set the value of $OWLDIR to reflect the
#	desired file hierarchy.
#	The $SUBDATA and $subhist values may be set, but the values
#	given below are sufficient.
#
#	Putting together the various values below will give:
#		/owl/data/<sensor>/data/rrec
#

my $OWLDIR	= '/owl';		# Owl directory.
my $OWLDATA	= "$OWLDIR/data";	# Owl sensor data directory

my $SUBDATA = 'data';			# Directory for sensor data.
my $SUBHIST = 'history';		# Subdirectory for history data.

my $FILEEXT = 'rrec';			# Subdirectory for sensor data.

#####

my $datadir;				# Data directory.

my $hist_rrdata	= 'rrdata';		# History file for rrdata data.

my $historydir;				# History data directory.
my $histfile;				# History file for this triad.
my $writehist = 0;			# Write-history-file flag.

my $nohist = 1;				# History file not-found flag.

my @history = ();			# Contents of history file.

#######################################################################
#
# Nagios return codes.
#
my $RC_NORMAL	= 0;		# Normal return code.
my $RC_WARNING	= 1;		# Warning return code.
my $RC_CRITICAL	= 2;		# Critical return code.
my $RC_UNKNOWN	= 3;		# Unknown return code.


#
# Thresholds for changed DNS resource-record data.
#
# These are calculated as days, but this may be adjusted as desired.
# The greater the unit, the longer the changed condition will be visible
# on the Nagios display.
#
#
my $MINUTESECONDS = 60;				# Seconds in a minute.
my $HOURSECONDS	  = 3600;			# Seconds in an hour.
my $DAYSECONDS	  = 86400;			# Seconds in a day.
my $WEEKSECONDS	  = 604800;			# Seconds in a week.

my $UNITS = $DAYSECONDS;

my $WARNING_THRESHOLD  = (5 * $UNITS);		# Warning threshold.
my $CRITICAL_THRESHOLD = (1 * $UNITS);		# Critical threshold.

######################################################################r
#
# Data required for command line options.
#
my %options = ();			# Filled option array.
my @opts =
(
	'warning=i',			# Warning threshold.
	'critical=i',			# Critical threshold.
	'nohistory',			# Don't save history information.
	'paths',			# Display the paths.
	'Version',			# Display the version number.
	'help',				# Give a usage message and exit.
);

my $warning = $WARNING_THRESHOLD;	# Warning threshold.
my $critical = $CRITICAL_THRESHOLD;	# Critical threshold.
my $savehist = 1;			# Save history.

#######################################################################
#
# Data from command line.

my $sensor = '';			# Owl sensor host.
my $target = '';			# Target host.
my $nsrvr  = '';			# Name server host.
my $query = '';				# Type of DNS query.

my $triad = '';				# Combination of arguments.

#######################################################################

my $rc = $RC_UNKNOWN;			# Command's return code.
my $outstr = '';			# Accumulated DNS response data.
my $rrdata = '';			# Accumulated resource-record data.
my $querydata = '';			# Last valid entry.

################################################################################

#
# Run shtuff.
#
$rc = main();
exit($rc);

#-----------------------------------------------------------------------------
# Routine:	main()
#
# Purpose:	Main controller routine for owl-rredata.
#
sub main
{
	my $srvc;				# Service to check.
	my $retcode = 0;			# Service's return code.

	my @fns = ();				# Filenames to check.
	my @lasts = ();				# Last file and record checked.
	my $lastfn = '';			# Last filename we checked.
	my $lasttime = 0;			# Time of last history entry.
	my $lastdata = '';			# Data from last history entry.
	my $kronos;				# Last time checked.

	#
	# Check our options.
	#
	doopts();

	#
	# Get the names of our directories.
	#
	getdirs(1);

	#
	# Get the last file and entry checked for this sensor/target/query
	# triad.
	#
	($lasttime, $lastdata) = gethistory();

	#
	# Get the names of the files to check for response lines.
	#
	@fns = getfns();

	#
	# Get the DNS resource-record data for this triad from the recent files.
	#
	foreach my $fn (@fns)
	{
		$kronos = rrecdata($fn,$lasttime);

		#
		# Save the last filename we examined.
		#
		$lastfn = $fn;

		last if($kronos != 0);
	}

	#
	# Figure out what our return code for Nagios will be.
	#
	$retcode = calcret($lasttime,$lastdata,$kronos);

	#
	# Write a line of data to Nagios.
	#
	nagiout($kronos,$retcode);

	#
	# We'll move to now if no final time was returned.
	#
	$kronos = time() if($kronos eq '');

	#
	# Update the history file.
	#
	updatehist("$kronos $querydata");

	#
	# Exit with the command's return code.
	#
	return($retcode);
}

#-----------------------------------------------------------------------------
# Routine:	doopts()
#
# Purpose:	This routine shakes and bakes our command line options.
#
sub doopts
{
	#
	# Parse the command line.
	#
	GetOptions(\%options,@opts) || usage();

	#
	# Show the version number or usage if requested.
	#
	version()   if(defined($options{'Version'}));
	usage()     if(defined($options{'help'}));
	showpaths() if(defined($options{'paths'}));

	$warning  = $options{'warning'} if(defined($options{'warning'}));
	$critical = $options{'critical'} if(defined($options{'critical'}));

	$savehist = 0 if(defined($options{'nohistory'}));

	usage() if(@ARGV != 4);

	#
	# Ensure the thresholds are not negative.
	#
	if(($warning < 0) || ($critical < 0))
	{
		print "thresholds must be zero or greater\n";
		exit($RC_UNKNOWN);
	}

	#
	# Ensure the warning threshold is greater than the critical threshold.
	#
	if(($warning < $critical) && ($warning > 0))
	{
		print "warning threshold ($warning) must be greater than critical threshold ($critical)\n";
		exit($RC_UNKNOWN);
	}

	#
	# Set a few parameters.
	#
	$sensor	= $ARGV[0];
	$target	= $ARGV[1];
	$nsrvr	= $ARGV[2];
	$query	= $ARGV[3];

	#
	# Combine the arguments.
	#
	$triad = "$sensor,$target,$nsrvr,$query.$FILEEXT";

}

#-----------------------------------------------------------------------------
# Routine:	getdirs()
#
# Purpose:	Build the data and history directories' names. 
#		If they don't exist, we'll try to create them.
#		If we can't create a directory, exit with a critical error.
#
sub getdirs
{
	my $mkflag = shift;			# Flag for creating directories.

	#
	# Build the directory names we'll need.
	#
	$datadir    = "$OWLDATA/$sensor/$SUBDATA";
	$historydir = "$OWLDATA/$sensor/$SUBHIST";

	#
	# Ensure the data directory exists.
	#
	if(! -e $datadir)
	{
		if($mkflag)
		{
			if(mkpath($datadir) == 0)
			{
				print "data directory \"$datadir\" does not exist\n";
				exit($RC_CRITICAL);
			}
		}
	}

	#
	# And now for the sensor-data subdirectory.
	#
	$datadir = "$OWLDATA/$sensor/$SUBDATA/$FILEEXT";
	if(! -e $datadir)
	{
		if($mkflag)
		{
			if(mkpath($datadir) == 0)
			{
				print "rrec data subdirectory \"$datadir\" does not exist\n";
				exit($RC_CRITICAL);
			}
		}
	}

	#
	# Ensure the history directory exists.
	#
	if(! -e $historydir)
	{
		if($mkflag)
		{
			if(mkpath($historydir) == 0)
			{
				print "history directory \"$historydir\" does not exist\n";
				exit($RC_CRITICAL);
			}
		}
	}
}

#-----------------------------------------------------------------------------
# Routine:	gethistory()
#
# Purpose:	Get the history entries for this sensor/target/query pair.
#		The sensor is implied by its presence in a particular hierarchy.
#
sub gethistory
{
	my $lastline;				# Last line in history file.

	#
	# Get the history file for this triad.
	#
	$histfile = "$historydir/$hist_rrdata,$target,$nsrvr,$query";

	#
	# If the history file doesn't exist, we'll create it and return.
	#
	if(! -e $histfile)
	{
		open(HIST,">$histfile");
		close(HIST);
		return(0,'');
	}

	#
	# Get the lines from this query's rrec history file.
	#
	open(HIST,"<$histfile");
	flock(HIST,LOCK_EX);
	@history = <HIST>;
	flock(HIST,LOCK_UN);
	close(HIST);

	#
	# Get the time from the last line in the query's history file.
	#
	$lastline = $history[-1];
	$lastline =~ /^(\S+)\s+(.*)$/;
	return($1,$2);
}

#-----------------------------------------------------------------------------
# Routine:	getfns()
#
# Purpose:	Get the data files for this query triad.  The list of
#		files is sorted and reversed before being returned.
#
sub getfns
{
	my @files;				# Files matching this service.

	#
	# Get the list of extant files for this triad.  Give an unknown
	# error if there aren't any.
	#
	@files = reverse(sort(glob("$datadir/*,$triad")));
	if(@files == 0)
	{
		print "no data available for \"$triad\"|$outstr";
		exit($RC_UNKNOWN);
	}

	#
	# Return the most recent file from our sorted list.
	#
	return(@files);
}

#-----------------------------------------------------------------------------
# Routine:	rrecdata()
#
# Purpose:	Get the DNS resource-record info from a sensor's data file.
#		The relevant data are stored in $outstr.
#
sub rrecdata
{
	my $fn = shift;				# Service's last data file.
	my $lasttime = shift;			# Timestamp of last entry.

	my $qarg;				# Query type from file.
	my @lines;				# Files matching this service.
	my $tempus;				# Timestamp to return.
	my $rrtmp = '';				# Temporary rrec data.

	#
	# Open our data file.
	#
# print "rrecdata:  reading - <$fn>\n";
	if(open(SF,"<$fn") == 0)
	{
		return(0);
	}

	#
	# Get the list of extant files for this triad.
	#
	@lines = <SF>;
	close(SF);

	#
	# Return if the file is empty.  It isn't necessarily an error, so
	# we won't complain.
	#
	return(0) if(@lines == 0);

	$lasttime = 0 if((!defined($lasttime)) || ($lasttime eq ''));

	#
	# Handle any file entries more recent than the last time we ran.
	#
	for(my $ind=0; $ind < @lines; $ind++)
	{
		my $line = $lines[$ind];	# Line to examine.
		my $kronos;			# Timestamp from file.
		my $thost;			# Target host from file.
		my $namesrvr;			# Name server from file.
		my $resptime;			# DNS response time from file.
		my $errval;			# Error value from file.
		my $datalen;			# Length of rrec data.
		my $rcnt = 0;			# Count of rrec data read.

#    1349291568.73732 example.com ns.example.com A 0.011641979217529 0 14
#    <14 bytes of data follow>

# print "rrecdata:  $ind  line - <$line>\n";

		#
		# Split the line into its atoms.
		#
		$line =~ /^([0-9]+).[0-9]+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
		$kronos	  = $1;
		$thost	  = $2;
		$namesrvr = $3;
		$qarg	  = $4;
		$resptime = $5;
		$errval	  = $6;
		$datalen  = $7;

# print "rrecdata:  <$thost> <$namesrvr> <$qarg> <$resptime> <$errval> <$datalen>\n";

		#
		# Skip this line on a few conditions.
		#
		next if($errval ne 0);
		next if($kronos eq '');
		next if($lasttime ge $kronos);

		#
		# If this entry has data to be read, we'll grab it and
		# pop it onto the end of our resource-record buffer.
		if($datalen > 0)
		{
			#
			# We'll look at all the subsequent lines,
			# up to the end of the file.  However, we
			# won't go past the data sent by the sensor.
			#
			$querydata = '';
			for($ind++; $ind < @lines; $ind++)
			{
				$querydata .= $lines[$ind];
				$rcnt += length($lines[$ind]);
				last if($rcnt >= $datalen);
			}

			$querydata = datamunch($querydata);
			$rrtmp = "$qarg RRec Data:  $querydata";
		}

		#
		# Save a valid timestamp.
		#
		$tempus = $kronos;
	}

	#
	# Make sure there's *something* there...
	#
	$querydata = 'no resource-record data found' if($querydata eq '');

	#
	# Massage the query data a bit.
	#
	chomp $rrtmp;
	$rrdata = $rrtmp;
	$rrdata =~ s/\n/, /g if($qarg =~ /^ns$/i);
	$querydata =~ s/\n/, /g;
	$querydata =~ s/, $//g;

	#
	# Build the output string.
	#
#	$outstr = "owl-rrdata=$kronos:$resptime ms;";

	#
	# Return the response data.
	#
	return($tempus);
}

#-----------------------------------------------------------------------------
# Routine:	datamunch()
#
# Purpose:	Manipulate the data, based on the resource record type.
#
#		A	Nothing is done to A record data.
#		NS	The hosts in NS records are converted to lowercase,
#			sorted, and joined into a space-separated string.
#
sub datamunch
{
	my $data = shift;			# Data to muck with.

	if($query eq 'A')
	{
		# Don't do anything to A record data.
	}
	elsif($query eq 'NS')
	{
		my @servers;			# Array of nameservers.

		@servers = split /[ \n]/, lc($data);
		$data = join ' ', sort(@servers);
	}

	#
	# Return our masticated data.
	#
	return($data);
}

#-----------------------------------------------------------------------------
# Routine:	calcret()
#
# Purpose:	Figure out what exit code we're going to use.  This is based
#		on the time since the last time the sensor provided different
#		resource-record data for this query.
#
sub calcret
{
	my $lasttime = shift;			# Time of last history entry.
	my $lastquery = shift;			# Data from last history entry.
	my $kronos = shift;			# Time of this entry.
	my $rc;					# Return code.
	my $timediff;				# Time since last data change.


	#
	# If our history is a blank slate, then all is well.
	# If this triad has a history, then we must look further...
	#
	if(@history == 0)
	{
		$timediff = 0;
		$writehist = 1;
	}
	else
	{
		#
		# If this current query response is different from the
		# last query response, we'll calculate the difference on
		# their times.
		#
		if($querydata ne $lastquery)
		{
			$timediff = $kronos - $lasttime;
			$writehist = 1;
		}
		else
		{
			#
			# If this current query response is the same as the
			# last query response, but there aren't any others
			# before that one, then all is well.
			#
			if(@history == 1)
			{
				$timediff = 0;
			}
			else
			{
				#
				# If there are older responses still, then
				# we'll calculate the times based on this
				# query response and the second-last one.
				#

				$history[-2] =~ /^(\S+)\s+(.*)$/;

				$timediff = $kronos - $1;
			}
		}
	}

	#
	# Figure out what what exit code we're going to need.
	#
	if(($timediff <= $critical) && ($critical > 0))
	{
		$rc = $RC_CRITICAL;
	}
	elsif(($timediff <= $warning) && ($warning > 0))
	{
		$rc = $RC_WARNING;
	}
	else
	{
		$rc = $RC_NORMAL;
	}

	#
	# If we aren't saving history, we'll always give a normal return.
	#
	$rc = $RC_NORMAL if($savehist == 0);

	return($rc);
}

#-----------------------------------------------------------------------------
# Routine:	nagiout()
#
# Purpose:	Generate a line of DNS resource-record output for Nagios.
#		The return code is added for graphing purposes.
#		We want a non-zero normal code, so the line rises above
#		the axis.  We're adjusting the return code given in this
#		output line to be really big so that the spike won't be
#		averaged out so easily in the long-term graphs.
#
sub nagiout
{
	my $kronos = shift;			# Command's time of execution.
	my $rc	   = shift;			# Command's return code.
	my $outrc  = 120;			# Return value for graphing.

	if($rc == $RC_CRITICAL)
	{
		$outrc = 80;
	}
	elsif($rc == $RC_WARNING)
	{
		$outrc = 40;
	}
	elsif($rc == $RC_NORMAL)
	{
		$outrc = 10;
	}

# $outrc = $rc + 1;

	print "$rrdata|owl-rrecdata=$kronos:$outrc;\n";
}

#-----------------------------------------------------------------------------
# Routine:	updatehist()
#
# Purpose:	Update the history file for this execution.
#
sub updatehist
{
	my $newentry = shift;			# New entry for history file.

	#
	# Don't update the history file if nothing's changed.
	#
	return if(($writehist == 0) || ($savehist == 0));

	#
	# Open and lock this triad's history file.
	#
	open(HIST,">> $histfile");
	flock(HIST,LOCK_EX);

	#
	# Write the new entry.
	#
	print HIST "$newentry\n";

	#
	# Unlock and close the history file.
	#
	flock(HIST,LOCK_UN);
	close(HIST);

}

#-----------------------------------------------------------------------------
# Routine:	showpaths()
#
# Purpose:	Display the data and history directories' names. 
#
sub showpaths
{
	#
	# We'll use a dummy sensor name.
	#
	$sensor = '<sensor>';
	getdirs(0);

	print "Owl directory		$OWLDATA\n";
	print "Owl data directory	$datadir\n";
	print "Owl history directory	$historydir\n";
	print "Owl history file     	$histfile\n";

	exit(0);
}

#----------------------------------------------------------------------
# Routine:	version()
#
sub version
{
	print STDERR "$VERS\n";
	print STDERR "$DTVERS\n";
	exit(1);
}

#-----------------------------------------------------------------------------
# Routine:	usage()
#
sub usage
{
	print STDERR "$VERS
$DTVERS
Copyright 2013 SPARTA, Inc.  All rights reserved.

This script retrieves the DNS resource-record data gathered by a Owl sensor.

usage:  owl-rrecdata [options] <sensor> <target> <nameserver> <rrec-type>
	options:
		-warning <num>     warning threshold (in seconds)
		-critical <num>    critical threshold (in seconds)
		-nohistory         do not save history
		-paths             display paths to be used
		-Version           display program version
		-help              display this message

";

	exit(1);
}

1;

###############################################################################

=pod

=head1 NAME

owl-rrecdata - Nagios plugin to display DNS resource-record data from an Owl sensor

=head1 SYNOPSIS

  owl-rrecdata [options] <sensor> <target> <nameserver> <rrec-type>

=head1 DESCRIPTION

B<owl-rrecdata> displays DNS resource-record data collected by an Owl
sensor node.  The sensor data are read from files already transferred
to the manager and B<owl-rrecdata> does not transfer the files itself.
The data are formatted for use in Nagios' own display.

The current set of Nagios objects and configuration files are B<not>
set up to provide time-based graphing of DNS resource-record data.

To improve performance, the data directories are hard-coded in B<owl-rrecdata>.
DNS resource-record data are in B</owl/data/<sensor>/data/rrec>.

The specified service name determines which file will be selected from
the appropriate data directory.  The file names in the data directory
have this format:

    timestamp,sensor,target,nameserver,query.rrec

For example, if the sensor "sensor3" was monitoring NS records for the
target domain "example.com" from the name server "ns.example.com", then
a datafile would be named something like this:

    130401.0215,sensor3,example.com,ns.example.com,NS.rrec

The most recent file whose I<servicename> matches the service name given on
the command line will be consulted.  The DNS response data will be taken from
the last entry in that file.

Thresholds for the critical state and the warning state may be specified.
These are only triggered if the current DNS resource record data has changed
from the previous execution of B<owl-rrecdata>.  These thresholds are measured
from the time of the last change in the specified DNS resource record data to
the time of the current changed record.  If the difference in times is
between zero and the critical threshold, that entry is in a critical state.
If the difference in times is between the critical and warning thresholds,
then the entry is in a warning state.  If the time difference exceeds the
warning threshold, then the entry reverts to the normal state.  This
automatic reduction in states assumes that as time passes, the administrators
will handle the condition as required.  Graphs can be defined in Nagios that
show the occurrence and frequency of state changes in DNS data.

Both the warning threshold and the critical threshold can be disabled, in
case the local installation only wants one particular type of state for
tracking changes in DNS resource records.  If both thresholds are disabled, 
then these entries will always appear to be normal; graphing such data will
never show a change.

B<owl-rrecdata> is expected to only be run by the Nagios monitoring system.

=head1 RECORD-SPECIFIC PROCESSING

Before passing the resource-record data to Nagios, B<owl-rrecdata> can modify
the data as required.  The following data manipulations are performed:

    resource-record type          data manipulation
            NS                    nameservers are converted to lowercase,
                                  then sorted alphabetically

All other resource-record data are left as is.

=head1 NAGIOS USE

B<owl-rrecdata> is run from a Nagios I<command> object.  These are examples
of how the objects should be defined:

    define command {
         command_name    dnsresp
         command_line    $USER1$/owl-rrecdata $ARG1$ $ARG2$ $ARG3$ $ARG4$
    }

    define service {
         service_description     example.com NS          
         host_name               sensor3
         check_command		 owl-rrecdata!sensor3!example.com!ns.example.com!NS
         active_checks_enabled   1 
         ...
    }

=head1 OPTIONS

The following options are recognized by B<owl-rrecdata>:

=over 4

=item I<-critical critical-threshold>

The threshold for entering a critical state.  This is the number of
seconds between the most recent change in data and the current data
entry's timestamp.  This must be less than the warning threshold.
If this is zero, then the critical state will not be entered.
The default critical threshold is one day.
This critical threshold is specified in seconds.

=item I<-warning warning-threshold>

The threshold for entering a warning state.  This is the number of
seconds between the most recent change in data and the current data
entry's timestamp.  This must be greater than the critical threshold.
If this is zero, then the warning state will not be entered.
The default warning threshold is five days.
This warning threshold is specified in seconds.

=item I<-nohistory>

This option prevents history data for this plugin from being saved.
This removes the possibility of tracking and graphing changes in DNS
resource-record data over time.

=item I<-paths>

Display the paths B<owl-rrecdata> will use.

=item I<-Version>

Display the program version and exit.

=item I<-help>

Display a usage message and exit.

=back

=head1 COPYRIGHT

Copyright 2013 SPARTA, Inc.  All rights reserved.
See the COPYING file included with the DNSSEC-Tools package for details.

=head1 AUTHOR

Wayne Morrison, tewok@tislabs.com

=cut

