#!/usr/bin/perl
###########################################################################
#
# Program : Log Analyzer for DansGuardian
# Author : Jimmy Myrick (jmyrick@cherokeek12.org)
# Django (django@nausch.org>
# Version : 2.1
# Released : November 21, 2011
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
# If you like it and want to send me something, that's ok too.
# How about a gift certificate to amazon.com or a donation to DansGuardian
# on my behalf?
#
###########################################################################
###########################################################################
#
# Change to point to your DansGuardian log directory
# NOTE: The trailing / IS REQUIRED!!
#
###########################################################################
$logdir = '/var/log/dansguardian/';
###########################################################################
#
# Log filename. Change this to match the prefix of your log files
# This defaults to access.log and should not have to be modified.
#
# Any logfiles in $logdir that match the prefix $logfile and are gzip'ed
# with a .gz extension will also be read. The results will be printed in
# reverse chronological filename order.
#
# Example:
# If you have the files: access.log access.log.0.gz access.log.1.gz
# where they are newest to oldest, then any matches in
# access.log.1.gz will be printed first, followed by access.log.0.gz
# and then access.log
#
# No sorting is done by the program and the results are displayed in logfile
# order. If your results are out of sequence, check the filename/dates
# to be sure they are compressed and rotated properly. If you use
# the FreeBSD newsyslog.conf to rotate your logs, this will not be a
# problem.
#
###########################################################################
$logfile = 'access.log';
###########################################################################
#
# Log Format. Change to indicate what format the log files are. This
# should match what is in dansguardian.conf. Setting it to the wrong type
# will cause strange results.
#
# 1 = DansGuardian format 2 = CSV-style format
#
###########################################################################
$logformat = 1;
###########################################################################
#
# If you need the perl modules below, download and untar them to a directory.
# Then cd to the directory and enter the commands:
# perl Makefile.PL; make; make test; make install
#
# If you need more instructions,
# go here: http://www.cpan.org/modules/INSTALL.html
#
# Get it here: http://www.cpan.org/authors/id/LDS/CGI.pm-2.81.tar.gz
#
###########################################################################
use CGI;
###########################################################################
#
# This is needed to do gzip'ed log files on the fly
# Get it here: http://www.cpan.org/authors/id/PMQS/Compress-Zlib-1.16.tar.gz
#
###########################################################################
use Compress::Zlib;
###########################################################################
#
# This should determine where the program is called from automagically.
# If not, uncomment the first line, change to your server name/path and
# comment the second line. You can use Apache restrictions to block
# access to this file if desired.
#
###########################################################################
#$cgipath = 'http://your.server.com/cgi-bin/dglog/dglog2.pl';
$cgipath = $ENV{SCRIPT_NAME};
###########################################################################
#
# SHOULDN'T HAVE TO MODIFY ANYTHING BELOW THIS LINE
#
###########################################################################
$q = new CGI;
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
$mon = $mon + 1; # mon starts at 0
$year = $year + 1900; # year needs 1900 added
$pagename = 'Log Analyzer for DansGuardian';
$a = $q->param('a');
if ($a eq 'i') { # Inquiry into logs
# These are the values that can be sent by the user through the browser
$sIP = "ALL"; # IP address
$sUN = "ALL"; # Username
$sURL = "ALL"; # URL to show or trace a denied site - this is the URL to trace
$sSD = "ALL"; # Complete start date
$sSDY = "ALL"; # Start date year
$sSDM = "ALL"; # Start date month
$sSDD = "ALL"; # Start date day
$sED = "ALL"; # Complete end date
$sEDY = "ALL"; # End date year
$sEDM = "ALL"; # End date month
$sEDD = "ALL"; # End date day
$sA = "ALL"; # Action
$sSumCnt = "20"; # Number of summary sites to show
$sSumDen = "off"; # Show denied summary? on/off
$sSumAlw = "off"; # Show allowed summary? on/off
$sSumOrd = "URL"; # Default to showing url for summary denied/allowed
$sL = "off"; # Turn URL's into links? on/off
$sZ = "off"; # Examine gziped files? on/off
$sIP = &validateIP($q->param('sIP')) if $q->param('sIP') ne "";
$sUN = $q->param('sUN') if $q->param('sUN') ne "";
$sURL = $q->param('sURL') if $q->param('sURL') ne "";
if ($q->param('sSDY') ne "" && $q->param('sSDY') ne 'ALL' &&
$q->param('sSDM') ne "" && $q->param('sSDM') ne 'ALL' &&
$q->param('sSDD') ne "" && $q->param('sSDD') ne 'ALL' &&
$q->param('sEDY') ne "" && $q->param('sEDY') ne 'ALL' &&
$q->param('sEDM') ne "" && $q->param('sEDM') ne 'ALL' &&
$q->param('sEDD') ne "" && $q->param('sEDD') ne 'ALL') {
$sSDY = $q->param('sSDY');
$sSDM = $q->param('sSDM');
$sSDD = $q->param('sSDD');
$sEDY = $q->param('sEDY');
$sEDM = $q->param('sEDM');
$sEDD = $q->param('sEDD');
$sSD = $sSDY.'.'.$sSDM.'.'.$sSDD;
$sSD = convertDate($sSD);
$sED = $sEDY.'.'.$sEDM.'.'.$sEDD;
$sED = convertDate($sED);
if ($sSD > $sED) {
$msg = "End Date is greater than Start Date";
&printMenu;
}
}
$sA = &validateAction($q->param('sA')) if $q->param('sA') ne ""; # Action
$sSumCnt = &validateSummary($q->param('sSumCnt'))
if $q->param('sSumCnt') ne "";
$sSumDen = $q->param('sSumDen') if $q->param('sSumDen') eq 'on';
$sSumAlw = $q->param('sSumAlw') if $q->param('sSumAlw') eq 'on';
$sSumOrd = $q->param('sSumOrd') if $q->param('sSumOrd') ne '';
$sL = $q->param('sL') if $q->param('sL') eq 'on';
$sZ = $q->param('sZ') if $q->param('sZ') eq 'on';
# Need a few global variables to keep from passing back and forth a bunch
$linesRead, $allowTotal, $blockTotal, $grandTotal = 0;
&searchLog;
}
elsif ($a eq 'h') {
&displayHelp;
}
else {
&printMenu;
}
#############
sub searchLog
#############
{
my $first = 0;
&printHeader;
print "";
print "Report information for:
Start Date: $sSD | End Date: $sED |
Username : $sUN | IP: $sIP |
Action: $sA | URL: $sURL\n";
print "";
opendir(D, $logdir);
@files = grep {/^$logfile/} readdir(D);
@files = sort {$b cmp $a} @files;
closedir(D);
foreach $file (@files) {
if ($file =~ /\.gz/) {
if ($sZ eq 'off') {
if ($first == 0) {
print "Ignoring gzip logfile(s) in $logdir: ";
$first = 1;
}
print "$file | ";
next;
}
$gz = gzopen($logdir.$file,r);
if (!$gz) {
$msg = "Cannot open $logdir$file. Check Permissions.
";
}
#############
sub checkLine
#############
{
my ($line) = @_;
# If a line doesn't start with a digit, throw it out.
return if (!($line =~ /^\d/));
$linesRead++;
# Print out a '.' every 1000 log file lines read. Keep browser connect alive
if (($linesRead % 1000) == 0) {
print " ";
}
if ($logformat == 2) {
# If CSV format, then convert to dg format.
# $c1=date+time,$c5=action, $c6=method, $c7=size
($c1,$user,$ip,$url,$c5,$c6,$c7) = split(/","/,$line,7);
($date,$time) = split(/ /,$c1);
# Clean up the extra quotes - this is dirty but does the trick. Also, by
# doing the split above it would be possible for a line to be misread if
# a strange URL contained such a sequence...but it gets the job done in
# most cases.
$date =~ s/\"//; $c7 =~ s/\"//;
$toeol = $c5 . ' ' . $c6. ' ' . $c7;
}
else {
($date,$time,$user,$ip,$url,$toeol) = split(/ /,$line,6);
}
# Rule out the easy matches first
return if ($sIP ne "ALL" && $sIP ne $ip);
return if ($sUN ne "ALL" && $sUN ne $user);
# Don't do a date comparison unless we are told to
if ($sSD ne "ALL" || $sED ne "ALL") {
$dgDate = &convertDate($date);
return if (!($dgDate ge $sSD && $dgDate le $sED));
}
$url =~ /(\w+):\/\/([\w\.-]+)\/?(\S*)/;
$protocol = $1; # HTTP, FTP
$baseurl = $2; # domain part without http:// or ftp://
return if ($sURL ne "ALL" && $sURL ne $baseurl);
$toeol =~ /(\*.+\*)? ?(.*)? ([A-Z]+) ([0-9]+) (.*)?/;
# 5 = unknown
# 6 = unknown
# 7 = status
# 8 = type
$action = $1; # *DENIED# or *EXCEPTION* etc., if exists
$reason = $2; # Reason for #1 if exists
$method = $3; # method (GET POST)
$size = $4; # size
# 5 = unknown, 6 = unknown, 7 = status, 8 = type
if ($sA ne "ALL") {
return if ($sA eq "denAll" &&
$action ne "*DENIED*");
return if ($sA eq "excAll" &&
$action ne "*EXCEPTION*");
return if ($sA eq "denSite" &&
!($reason =~ /^Banned site/));
return if ($sA eq "denRegURL" &&
!($reason =~ /^Banned Regular Expression URL/));
return if ($sA eq "denPhrase" &&
!($reason =~ /^Banned Phrase/));
return if ($sA eq "denCombPhrase" &&
!($reason =~ /^Banned combination phrase/));
return if ($sA eq "denWeightPhrase" &&
!($reason =~ /^Weighted phrase limit/));
return if ($sA eq "denExt" &&
!($reason =~ /^Banned extension/));
return if ($sA eq "denMIME" &&
!($reason =~ /^Banned MIME Type/));
return if ($sA eq "denICRA" &&
!($reason =~ /^ICRA/));
return if ($sA eq "denBlanketIP" &&
!($reason =~ /^Blanket IP Block/));
return if ($sA eq "excSite" &&
!($reason =~ /^Exception site/));
return if ($sA eq "excPhrase" &&
!($reason =~ /^Exception phrase/));
return if ($sA eq "excCombPhrase" &&
!($reason =~ /^Combination exception phrase/));
}
# Need to do a count for grandTotal if allowed OR denied summary selected
if ($sSumAlw eq "on" || $sSumDen eq "on") {
if ($action ne '*DENIED*') {
$allowTotal++;
$grandTotal++;
# Don't waste memory if didn't want this, but need to count for grandTotal
$topSites{$baseurl}++ if ($sSumAlw eq "on" && $sSumOrd eq "URL");
$topSites{$ip}++ if ($sSumAlw eq "on" && $sSumOrd eq "IP");
$topSites{$user}++ if ($sSumAlw eq "on" && $sSumOrd eq "User");
}
else {
$blockTotal++;
$grandTotal++;
# Don't waste memory if didn't want this, but need to count for grandTotal
$blockSites{$baseurl}++ if ($sSumDen eq "on" && $sSumOrd eq "URL");
$blockSites{$ip}++ if ($sSumDen eq "on" && $sSumOrd eq "IP");
$blockSites{$user}++ if ($sSumDen eq "on" && $sSumOrd eq "User");
}
}
else {
print "$date $time ";
print "$ip
$user ";
if ($sL eq 'on') {
print "$url $method $size ";
} else {
print "$url $method $size ";
}
if ($action ne "" && $reason ne "") {
print "$action : $reason
";
} else {
print "
";
}
$grandTotal++;
}
}
####################
sub showSummarySites {
####################
my ($subTotal, $whatToShow, $topNum, $sumOrder, %sites) = @_;
my $count = 1;
print "
Total $whatToShow Requests (only top $topNum sites shown) : $subTotal
";
print "
";
}
###################
sub validateSummary
###################
{
my ($count) = @_;
if ($count < 0 || $count > 100) {
$count = 20;
}
return($count);
}
##############
sub validateIP
##############
{
my ($checkIP) = @_;
if ($checkIP eq 'ALL') {
return('ALL');
}
elsif ($checkIP =~ /^((2([0-4]\d|5[0-5])|1?\d{1,2})(\.|$)){4}/) {
return ($checkIP);
}
else {
$msg = "Invalid IP address entered.";
&printMenu;
}
}
##################
sub validateAction {
##################
my ($action) = @_;
# Need to make the actions a hash and reference them that way
# Make it easier to add/modify and can validate that way too
# Maybe later.
if ($action eq "none") { return ("ALL"); }
return ($action);
}
###############
sub convertDate {
###############
my ($workDate) = @_;
($year, $mon, $day) = split(/\./,$workDate);
if (length($mon) == 1) { $mon = '0'.$mon; }
if (length($day) == 1) { $day = '0'.$day; }
if (($mon ge "01" && $mon le "12") && ($day ge "01" && $day le "31") &&
($year ge "2000" && $year le "2035")) {
$goodDate = $year.$mon.$day;
return ($goodDate);
} else {
$msg = "Invalid Date Detected - $workDate -
Be sure logformat is set to the correct format.";
&printMenu;
}
}
###############
sub buildSelect
###############
{
my ($start, $end, $type) = @_;
my $x = 0;
## print "