291 lines
7.2 KiB
Perl
291 lines
7.2 KiB
Perl
#!/usr/bin/perl
|
|
|
|
use v5.32;
|
|
use strict;
|
|
use warnings;
|
|
#use diagnostics;
|
|
use feature qw(signatures refaliasing);
|
|
no warnings qw(experimental::signatures);
|
|
use Data::Dumper;
|
|
|
|
package Str {
|
|
sub contains ($x, $y) { -1 != index $x, $y }
|
|
sub starts_with ($x, $y) { 0 == index $x, $y }
|
|
sub ends_with ($x, $y) { length($x) - length($y) == index($x, $y) }
|
|
}
|
|
|
|
package Foostats::Logreader {
|
|
use Digest::SHA3 'sha3_512_base64';
|
|
use File::stat;
|
|
use PerlIO::gzip;
|
|
use Time::Piece;
|
|
|
|
use constant {
|
|
GEMINI_LOGS_GLOB => '/var/log/daemon*',
|
|
WWW_LOGS_GLOB => '/var/www/logs/access.log*',
|
|
};
|
|
|
|
sub anonymize_ip ($ip) {
|
|
my $ip_proto = (Str::contains $ip, ':') ? 'IPv6' : 'IPv4';
|
|
my $ip_hash = sha3_512_base64 $ip;
|
|
return ($ip_hash, $ip_proto);
|
|
}
|
|
|
|
sub read_lines ($glob, $callback) {
|
|
my sub year ($path) {
|
|
localtime( (stat $path)->mtime )->strftime('%Y')
|
|
}
|
|
|
|
my sub open_file ($path) {
|
|
my $flag = $path =~ /\.gz$/ ? '<:gzip' : '<';
|
|
open my $file, $flag, $path or die $!;
|
|
return $file;
|
|
}
|
|
|
|
for my $path (glob $glob) {
|
|
say "Opening $path";
|
|
my $file = open_file $path;
|
|
my $year = year $file;
|
|
while (<$file>) {
|
|
$callback->($year, split / +/) unless Str::contains $_, 'logfile turned over';
|
|
}
|
|
say "Closing $path";
|
|
close $file;
|
|
# last; # DEBUGGING ONLY TODO UNDO THIS;
|
|
}
|
|
}
|
|
|
|
sub parse_www_logs ($callback) {
|
|
my sub parse_date ($date) {
|
|
my $t = Time::Piece->strptime($date, '[%d/%b/%Y:%H:%M:%S');
|
|
($t->strftime('%Y-%m-%d'), $t->strftime('%H:%M:%S'));
|
|
}
|
|
|
|
my sub parse_line (@line) {
|
|
my ($ip_hash, $ip_proto) = anonymize_ip $line[1];
|
|
my ($date, $time) = parse_date $line[4];
|
|
{
|
|
proto => 'http/s',
|
|
host => $line[0],
|
|
ip_hash => $ip_hash,
|
|
ip_proto => $ip_proto,
|
|
date => $date,
|
|
time => $time,
|
|
uri_path => $line[7],
|
|
status => $line[9],
|
|
}
|
|
}
|
|
|
|
read_lines WWW_LOGS_GLOB, sub ($year, @line) {
|
|
$callback->(parse_line @line);
|
|
};
|
|
}
|
|
|
|
sub parse_gemini_logs ($callback) {
|
|
my sub parse_date ($year, @line) {
|
|
my $timestr = "$line[0] $line[1]";
|
|
Time::Piece->strptime($timestr, '%b %d')->strftime("$year-%m-%d");
|
|
}
|
|
|
|
my sub parse_vger_line ($year, @line) {
|
|
my $full_path = $line[5];
|
|
$full_path =~ s/"//g;
|
|
my ($proto, undef, $host, $uri_path) = split '/', $full_path, 4;
|
|
$uri_path = '' unless defined $uri_path;
|
|
{
|
|
proto => 'gemini',
|
|
host => $host,
|
|
uri_path => "/$uri_path",
|
|
status => $line[6],
|
|
date => parse_date($year, @line),
|
|
time => $line[2],
|
|
}
|
|
}
|
|
|
|
my sub parse_relayd_line ($year, @line) {
|
|
my ($ip_hash, $ip_proto) = anonymize_ip $line[12];
|
|
{
|
|
ip_hash => $ip_hash,
|
|
ip_proto => $ip_proto,
|
|
date => parse_date($year, @line),
|
|
time => $line[2],
|
|
}
|
|
}
|
|
|
|
# Expect one vger and one relayd log line per event! So collect
|
|
# both events (one from one log line each) and then merge the result hash!
|
|
my ($vger, $relayd);
|
|
read_lines GEMINI_LOGS_GLOB, sub ($year, @line) {
|
|
if ($line[4] eq 'vger:') {
|
|
$vger = parse_vger_line $year, @line;
|
|
} elsif ($line[5] eq 'relay' and Str::starts_with $line[6], 'gemini') {
|
|
$relayd = parse_relayd_line $year, @line;
|
|
}
|
|
|
|
if (defined $vger and defined $relayd and $vger->{time} eq $relayd->{time}) {
|
|
$callback->({ %$vger, %$relayd });
|
|
$vger = $relayd = undef;
|
|
}
|
|
};
|
|
}
|
|
|
|
sub parse_logs {
|
|
my $agg = Foostats::Aggregator->new;
|
|
|
|
my sub foo ($event) { $agg->add($event); }
|
|
|
|
say 'Parsing www logs';
|
|
parse_www_logs \&foo;
|
|
say 'Parsing gemini logs';
|
|
parse_gemini_logs \&foo;
|
|
|
|
return $agg->{stats};
|
|
}
|
|
}
|
|
|
|
package Foostats::Filter {
|
|
sub new ($class) {
|
|
bless {
|
|
odds => [qw(
|
|
.php wordpress /wp .asp .. robots.txt .env + % HNAP1 /admin
|
|
.git microsoft.exchange .lua /owa/
|
|
)]
|
|
}, $class;
|
|
}
|
|
|
|
sub ok ($self, $event) {
|
|
state %blocked = ();
|
|
return 0 if exists $blocked{$event->{ip_hash}};
|
|
|
|
if ($self->odd($event) or $self->excessive($event)) {
|
|
($blocked{$event->{ip_hash}} //= 0)++;
|
|
return 0;
|
|
|
|
} else {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
sub odd ($self, $event) {
|
|
\my $uri_path = \$event->{uri_path};
|
|
|
|
for ($self->{odds}->@*) {
|
|
if (Str::contains $uri_path, $_) {
|
|
say STDERR "Warn: $uri_path contains $_ and is odd and will therefore be blocked!";
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub excessive ($self, $event) {
|
|
\my $time = \$event->{time};
|
|
\my $ip_hash = \$event->{ip_hash};
|
|
|
|
state $last_time = $time; # Time with second: 'HH:MM:SS'
|
|
state %count = (); # IPs accessing within the same second!
|
|
|
|
if ($last_time ne $time) {
|
|
$last_time = $time;
|
|
%count = ();
|
|
return 0;
|
|
}
|
|
|
|
# IP requested site more than once within the same second!?
|
|
if (1 < ++($count{$ip_hash} //= 0)) {
|
|
say STDERR "Warn: $ip_hash blocked due to excessive requesting...";
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
package Foostats::Aggregator {
|
|
use constant {
|
|
ATOM_FEED_URI => '/gemfeed/atom.xml',
|
|
GEMFEED_URI => '/gemfeed/index.gmi',
|
|
GEMFEED_URI_2 => '/gemfeed/',
|
|
};
|
|
|
|
sub new ($class) {
|
|
bless {
|
|
filter => Foostats::Filter->new,
|
|
stats => { by_date => {}, global => { notyetimplemented => 0 } },
|
|
}, $class;
|
|
}
|
|
|
|
sub add ($self, $event) {
|
|
my $date = $event->{date};
|
|
$self->add_count_by_date($event, $date);
|
|
}
|
|
|
|
sub add_count_by_date ($self, $event, $date) {
|
|
$self->{stats}{by_date}{$date} //= {
|
|
count => { filtered => 0 },
|
|
feed_ips => { atom_feed => {}, gemfeed => {} },
|
|
};
|
|
|
|
\my $s = \$self->{stats}{by_date}{$date};
|
|
\my $e = \$event;
|
|
|
|
unless ($self->{filter}->ok($event)) {
|
|
$s->{count}{filtered}++;
|
|
return;
|
|
}
|
|
|
|
\my $c = \$s->{count};
|
|
\my $f = \$s->{feed_ips};
|
|
|
|
($c->{$e->{proto}} //= 0)++;
|
|
($c->{$e->{ip_proto}} //= 0)++;
|
|
($c->{$e->{proto}.' '.$e->{ip_proto}} //= 0)++;
|
|
|
|
if (Str::contains $e->{uri_path}, ATOM_FEED_URI) {
|
|
($f->{atom_feed}->{$e->{ip_hash}} //= 0)++;
|
|
} elsif (Str::contains $e->{uri_path}, GEMFEED_URI) {
|
|
($f->{gemfeed}->{$e->{ip_hash}} //= 0)++;
|
|
} elsif (Str::ends_with $e->{uri_path}, GEMFEED_URI_2) {
|
|
($f->{gemfeed}->{$e->{ip_hash}} //= 0)++;
|
|
}
|
|
|
|
return $s;
|
|
}
|
|
}
|
|
|
|
package Foostats::Outputter {
|
|
sub new ($class, %args) {
|
|
bless \%args, $class;
|
|
}
|
|
|
|
sub write ($self) {
|
|
my $outfile = $self->{outdir} . '/stats.txt';
|
|
say "Writing $outfile";
|
|
|
|
say 'Unique feed subscribers:';
|
|
say $self->for_dates(\&_feed_ips);
|
|
say '';
|
|
}
|
|
|
|
sub for_dates ($self, $callback) {
|
|
say "$_: " . $callback->($self->{stats}{by_date}{$_})
|
|
for sort keys $self->{stats}->{by_date}->%*;
|
|
}
|
|
|
|
sub _feed_ips ($stats) {
|
|
my $atom_feed = scalar keys $stats->{feed_ips}->{atom_feed}->%*;
|
|
my $gemfeed = scalar keys $stats->{feed_ips}->{gemfeed}->%*;
|
|
sprintf "Atom: %2d, Gemfeed: %2d, Total: %2d",
|
|
$atom_feed, $gemfeed, $atom_feed + $gemfeed;
|
|
}
|
|
}
|
|
|
|
package main {
|
|
my $out = Foostats::Outputter->new(
|
|
stats => Foostats::Logreader::parse_logs,
|
|
outdir => '/tmp/',
|
|
);
|
|
|
|
#say Dumper $out;
|
|
$out->write;
|
|
}
|