From d8018cf9916569f280d44651f190ccae598e64dc Mon Sep 17 00:00:00 2001 From: daneel Date: Thu, 7 Jan 2016 22:00:08 +0100 Subject: [PATCH 1/3] Managed cases with two or three source IP addresses --- accesslog2csv.pl | 63 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/accesslog2csv.pl b/accesslog2csv.pl index df01df7..e1f1c0a 100755 --- a/accesslog2csv.pl +++ b/accesslog2csv.pl @@ -47,6 +47,65 @@ print STDOUT "\"$host\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; } else { - print STDERR "Invalid Line at $line_no: $_"; - } + if (/^([\w\.:-]+),\s*([\w\.:-]+),\s*([\w\.:-]+)\s*([\w\.:-]+)\s+([\w\.-]+)\s+\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)\s?([\w:\+-]+)]\s+"(\w+)\s+(\S+)\s+HTTP\/1\.\d"\s+(\d+)\s+([\d-]+)((\s+"([^"]+)"\s+")?([^"]+)")?$/) { + $host1 = $1; + $host2 = $2; + $host3 = $3; + $other = $4; + $logname = $5; + $day = $6; + $month = $MONTHS{$7}; + $year = $8; + $hour = $9; + $min = $10; + $sec = $11; + $tz = $12; + $method = $13; + $url = $14; + $code = $15; + if ($16 eq '-') { + $bytesd = 0; + } else { + $bytesd = $16; + } + $referer = $19; + $ua = $20; + + print STDOUT "\"$host1\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; + print STDOUT "\"$host2\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; + print STDOUT "\"$host3\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; + } else { + if (/^([\w\.:-]+),\s*([\w\.:-]+)\s*([\w\.:-]+)\s+([\w\.-]+)\s+\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)\s?([\w:\+-]+)]\s+"(\w+)\s+(\S+)\s+HTTP\/1\.\d"\s+(\d+)\s+([\d-]+)((\s+"([^"]+)"\s+")?([^"]+)")?$/) { + $host1 = $1; + $host2 = $2; + $other = $3; + $logname = $4; + $day = $5; + $month = $MONTHS{$6}; + $year = $7; + $hour = $8; + $min = $9; + $sec = $10; + $tz = $11; + $method = $12; + $url = $13; + $code = $14; + if ($15 eq '-') { + $bytesd = 0; + } else { + $bytesd = $15; + } + $referer = $18; + $ua = $19; + + print STDOUT "\"$host1\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; + print STDOUT "\"$host2\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; + print STDOUT "\"$host3\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; + } else { + print STDERR "Invalid Line at $line_no: $_"; + } + } + } } + + From 50e3c63e66a46f56a8534b4eb714841c7d40eb46 Mon Sep 17 00:00:00 2001 From: daneel Date: Thu, 7 Jan 2016 22:04:48 +0100 Subject: [PATCH 2/3] Fixed bug in 2 IP branch --- accesslog2csv.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/accesslog2csv.pl b/accesslog2csv.pl index e1f1c0a..cb4f584 100755 --- a/accesslog2csv.pl +++ b/accesslog2csv.pl @@ -100,7 +100,6 @@ print STDOUT "\"$host1\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; print STDOUT "\"$host2\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; - print STDOUT "\"$host3\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; } else { print STDERR "Invalid Line at $line_no: $_"; } From 69477e88165deb36b317f2dd5ded0b6e823eeff5 Mon Sep 17 00:00:00 2001 From: daneel Date: Fri, 12 Feb 2016 22:04:07 +0100 Subject: [PATCH 3/3] Added comment to regexps --- accesslog2csv.pl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/accesslog2csv.pl b/accesslog2csv.pl index cb4f584..47ea848 100755 --- a/accesslog2csv.pl +++ b/accesslog2csv.pl @@ -23,6 +23,7 @@ while (<>) { ++$line_no; + #regex for 1 host if (/^([\w\.:-]+)\s+([\w\.:-]+)\s+([\w\.-]+)\s+\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)\s?([\w:\+-]+)]\s+"(\w+)\s+(\S+)\s+HTTP\/1\.\d"\s+(\d+)\s+([\d-]+)((\s+"([^"]+)"\s+")?([^"]+)")?$/) { $host = $1; $other = $2; @@ -47,6 +48,7 @@ print STDOUT "\"$host\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; } else { + #regex for 3 hosts if (/^([\w\.:-]+),\s*([\w\.:-]+),\s*([\w\.:-]+)\s*([\w\.:-]+)\s+([\w\.-]+)\s+\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)\s?([\w:\+-]+)]\s+"(\w+)\s+(\S+)\s+HTTP\/1\.\d"\s+(\d+)\s+([\d-]+)((\s+"([^"]+)"\s+")?([^"]+)")?$/) { $host1 = $1; $host2 = $2; @@ -75,6 +77,7 @@ print STDOUT "\"$host2\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; print STDOUT "\"$host3\",\"$logname\",\"$year-$month-$day $hour:$min:$sec\",\"GMT$tz\",\"$method\",\"$url\",$code,$bytesd,\"$referer\"\,\"$ua\"\n"; } else { + #regex for 2 hosts if (/^([\w\.:-]+),\s*([\w\.:-]+)\s*([\w\.:-]+)\s+([\w\.-]+)\s+\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)\s?([\w:\+-]+)]\s+"(\w+)\s+(\S+)\s+HTTP\/1\.\d"\s+(\d+)\s+([\d-]+)((\s+"([^"]+)"\s+")?([^"]+)")?$/) { $host1 = $1; $host2 = $2;