reslog/t/_helper.pm

829 lines
27 KiB
Perl
Raw Normal View History

2021-02-01 15:06:51 +08:00
# _helper.pm - A simple test suite helper
# Copyright (c) 2005-2021 imacat
#
2021-02-01 15:06:51 +08:00
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
2021-02-01 15:06:51 +08:00
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
2021-02-01 15:06:51 +08:00
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package _helper;
use 5.005;
use strict;
use warnings;
use base qw(Exporter);
our ($VERSION, @EXPORT);
2021-02-01 15:06:51 +08:00
$VERSION = "0.05";
@EXPORT = qw(
read_file read_raw_file write_file write_raw_file
run_cmd where_is file_type list_files preserve_source clean_up
has_no_file has_no_gzip has_no_bzip2
make_log_file make_resolved_log_file make_empty_log_file
random_word
TYPE_TEXT TYPE_GZIP TYPE_BZIP2
@CONTENT_TYPES @SOURCE_TYPES @KEEP_MODES @OVERRIDE_MODES @SUFFICES @TRIM_SUFFIX);
2021-02-01 15:06:51 +08:00
# Prototype declaration
sub this_file();
sub read_file($);
sub read_raw_file($);
sub write_file($$);
sub write_raw_file($$);
sub run_cmd($@);
sub where_is($);
sub file_type($);
sub list_files($);
sub preserve_source($);
sub clean_up($$$);
sub has_no_file();
sub has_no_gzip();
sub has_no_bzip2();
sub make_log_file($);
sub make_resolved_log_file($);
sub make_empty_log_file($);
sub random_word();
sub random_ip();
2021-02-01 15:06:51 +08:00
use ExtUtils::MakeMaker qw();
use Fcntl qw(:seek);
use File::Basename qw(basename);
use File::Copy qw(copy);
use File::Path qw(mkpath rmtree);
use File::Spec::Functions qw(splitdir catdir catfile path);
use File::Temp qw(tempfile);
use Socket;
our (%WHERE_IS, $HAS_NO_FILE, $HAS_NO_GZIP, $HAS_NO_BZIP2, $RANDOM_IP);
%WHERE_IS = qw();
undef $HAS_NO_FILE;
undef $HAS_NO_GZIP;
undef $HAS_NO_BZIP2;
undef $RANDOM_IP;
2021-02-01 15:06:51 +08:00
use constant TYPE_TEXT => "text/plain";
2021-02-01 15:06:51 +08:00
use constant TYPE_GZIP => "application/x-gzip";
use constant TYPE_BZIP2 => "application/x-bzip2";
our (@CONTENT_TYPES, @SOURCE_TYPES, @KEEP_MODES, @OVERRIDE_MODES, @SUFFICES,
@TRIM_SUFFIX);
# All the content type information
@CONTENT_TYPES = (
{ "title" => "normal log file",
"sub" => \&make_log_file, },
{ "title" => "resolved log file",
"sub" => \&make_resolved_log_file, },
{ "title" => "empty log file",
"sub" => \&make_empty_log_file, }, );
2021-02-01 15:06:51 +08:00
# All the source type information
@SOURCE_TYPES = (
{ "title" => "plain text source",
"type" => TYPE_TEXT,
"suf" => "",
"skip" => 0, },
{ "title" => "gzip source",
"type" => TYPE_GZIP,
"suf" => ".gz",
"skip" => has_no_gzip, },
{ "title" => "bzip2 source",
"type" => TYPE_BZIP2,
"suf" => ".bz2",
"skip" => has_no_bzip2, }, );
# All the keep mode information
@KEEP_MODES = (
{ "title" => "keep default",
"opts" => [],
"del" => 1,
"keep" => 0,
"cdel" => 0,
"ckeep" => 1, },
{ "title" => "keep all",
"opts" => [qw(-k a)],
"del" => 0,
"keep" => 1,
"cdel" => 0,
"ckeep" => 1, },
{ "title" => "keep delete",
"opts" => [qw(-k d)],
"del" => 1,
"keep" => 0,
"cdel" => 1,
"ckeep" => 0, },
{ "title" => "keep restart",
"opts" => [qw(-k r)],
"del" => 0,
"keep" => 0,
"cdel" => 0,
"ckeep" => 0, }, );
# All the override mode information
@OVERRIDE_MODES = (
{ "title" => "override no existing",
"opts" => [],
"exists" => 0,
"ok" => 1,
"ce" => sub { $_[1]; }, },
{ "title" => "override default",
"opts" => [],
"exists" => 1,
"ok" => 0,
"ce" => sub { $_[0]; }, },
{ "title" => "override overwrite",
"opts" => [qw(-o o)],
"exists" => 1,
"ok" => 1,
"ce" => sub { $_[1]; }, },
{ "title" => "override append",
"opts" => [qw(-o a)],
"exists" => 1,
"ok" => 1,
"ce" => sub { $_[0] . $_[1]; }, },
{ "title" => "override fail",
"opts" => [qw(-o f)],
"exists" => 1,
"ok" => 0,
"ce" => sub { $_[0]; }, }, );
2021-02-01 15:06:51 +08:00
# All the suffix information
@SUFFICES = (
{ "title" => "default suffix",
"suf" => ".resolved",
"opts" => sub { }, },
{ "title" => "custom suffix",
"suf" => undef,
"opts" => sub { ("-s", $_[0]); }, }, );
2021-02-01 15:06:51 +08:00
# All the trim-suffix information
@TRIM_SUFFIX = (
{ "title" => "default trim-suffix",
"suf" => "",
"opts" => sub { }, },
{ "title" => "custom trim-suffix",
"suf" => undef,
"opts" => sub { ("-t", $_[0]); }, }, );
# Return the name of this file
sub this_file() { basename($0); }
# A simple reader to read a log file in any supported format
sub read_file($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $content);
$file = $_[0];
2021-02-01 15:06:51 +08:00
# non-existing file
return undef if !-e $file;
2021-02-01 15:06:51 +08:00
# a gzip compressed file
if ($file =~ /\.gz$/) {
# Compress::Zlib
if (eval { require Compress::Zlib;
Compress::Zlib->import(qw(gzopen));
2021-02-01 15:06:51 +08:00
1; }) {
use Compress::Zlib qw(gzopen);
2021-02-01 15:06:51 +08:00
my ($FH, $gz);
$content = "";
open $FH, $file or die this_file . ": $file: $!";
$gz = gzopen($FH, "rb") or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
while (1) {
($gz->gzread($_, 10240) != -1)
or die this_file . ": $file: " . $gz->gzerror;
2021-02-01 15:06:51 +08:00
$content .= $_;
last if length $_ < 10240;
}
$gz->gzclose and die this_file . ": $file: " . $gz->gzerror;
2021-02-01 15:06:51 +08:00
return $content;
2021-02-01 15:06:51 +08:00
# gzip executable
} else {
my ($PH, $CMD);
$CMD = where_is "gzip";
2021-02-01 15:06:51 +08:00
$CMD = "\"$CMD\" -cd \"$file\"";
open $PH, "$CMD |" or die this_file . ": $CMD: $!";
2021-02-01 15:06:51 +08:00
$content = join "", <$PH>;
close $PH or die this_file . ": $CMD: $!";
2021-02-01 15:06:51 +08:00
return $content;
}
2021-02-01 15:06:51 +08:00
# a bzip compressed file
} elsif ($file =~ /\.bz2$/) {
# Compress::Bzip2
if (eval { require Compress::Bzip2;
Compress::Bzip2->import(2.00);
Compress::Bzip2->import(qw(bzopen));
2021-02-01 15:06:51 +08:00
1; }) {
my ($FH, $bz);
$content = "";
open $FH, $file or die this_file . ": $file: $!";
$bz = bzopen($FH, "rb") or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
while (1) {
($bz->bzread($_, 10240) != -1)
or die this_file . ": $file: " . $bz->bzerror;
2021-02-01 15:06:51 +08:00
$content .= $_;
last if length $_ < 10240;
}
$bz->bzclose and die this_file . ": $file: " . $bz->bzerror;
2021-02-01 15:06:51 +08:00
return $content;
2021-02-01 15:06:51 +08:00
# bzip2 executable
} else {
my ($PH, $CMD);
$CMD = where_is "bzip2";
2021-02-01 15:06:51 +08:00
$CMD = "bzip2 -cd \"$file\"";
open $PH, "$CMD |" or die this_file . ": $CMD: $!";
2021-02-01 15:06:51 +08:00
$content = join "", <$PH>;
close $PH or die this_file . ": $CMD: $!";
2021-02-01 15:06:51 +08:00
return $content;
}
2021-02-01 15:06:51 +08:00
# a plain text file
} else {
my $FH;
open $FH, $file or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
$content = join "", <$FH>;
close $FH or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
return $content;
}
}
# A raw file reader
sub read_raw_file($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $content, $FH, $size);
$file = $_[0];
2021-02-01 15:06:51 +08:00
# non-existing file
return undef if !-e $file;
2021-02-01 15:06:51 +08:00
$size = (stat $file)[7];
open $FH, $file or die this_file . ": $file: $!";
binmode $FH or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
(read($FH, $content, $size) == $size)
or die this_file . ": $file: $!";
close $FH or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
return $content;
}
# A simple writer to write a log file in any supported format
sub write_file($$) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $content);
($file, $content) = @_;
2021-02-01 15:06:51 +08:00
# a gzip compressed file
if ($file =~ /\.gz$/) {
# Compress::Zlib
if (eval { require Compress::Zlib;
Compress::Zlib->import(qw(gzopen));
2021-02-01 15:06:51 +08:00
1; }) {
my ($FH, $gz);
open $FH, ">$file" or die this_file . ": $file: $!";
$gz = gzopen($FH, "wb9") or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
($gz->gzwrite($content) == length $content)
or die this_file . ": $file: " . $gz->gzerror;
$gz->gzclose and die this_file . ": $file: " . $gz->gzerror;
2021-02-01 15:06:51 +08:00
return;
2021-02-01 15:06:51 +08:00
# gzip executable
} else {
my ($PH, $CMD);
$CMD = where_is "gzip";
2021-02-01 15:06:51 +08:00
$CMD = "\"$CMD\" -c9f > \"$file\"";
open $PH, "| $CMD" or die this_file . ": $CMD: $!";
print $PH $content or die this_file . ": $CMD: $!";
close $PH or die this_file . ": $CMD: $!";
2021-02-01 15:06:51 +08:00
return;
}
2021-02-01 15:06:51 +08:00
# a bzip compressed file
} elsif ($file =~ /\.bz2$/) {
# Compress::Bzip2
if (eval { require Compress::Bzip2;
Compress::Bzip2->import(2.00);
Compress::Bzip2->import(qw(bzopen));
2021-02-01 15:06:51 +08:00
1; }) {
my ($FH, $bz);
open $FH, ">$file" or die this_file . ": $file: $!";
$bz = bzopen($FH, "wb9") or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
if ($content ne "") {
($bz->bzwrite($content, length $content) == length $content)
or die this_file . ": $file: " . $bz->bzerror;
2021-02-01 15:06:51 +08:00
}
$bz->bzclose and die this_file . ": $file: " . $bz->bzerror;
2021-02-01 15:06:51 +08:00
return;
2021-02-01 15:06:51 +08:00
# bzip2 executable
} else {
my ($PH, $CMD);
$CMD = where_is "bzip2";
2021-02-01 15:06:51 +08:00
$CMD = "\"$CMD\" -9f > \"$file\"";
open $PH, "| $CMD" or die this_file . ": $CMD: $!";
print $PH $content or die this_file . ": $CMD: $!";
close $PH or die this_file . ": $CMD: $!";
2021-02-01 15:06:51 +08:00
return;
}
2021-02-01 15:06:51 +08:00
# a plain text file
} else {
my $FH;
open $FH, ">$file" or die this_file . ": $file: $!";
print $FH $content or die this_file . ": $file: $!";
close $FH or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
return;
}
}
# A raw file writer
sub write_raw_file($$) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $content, $FH);
($file, $content) = @_;
open $FH, ">$file" or die this_file . ": $file: $!";
binmode $FH or die this_file . ": $file: $!";
print $FH $content or die this_file . ": $file: $!";
close $FH or die this_file . ": $file: $!";
2021-02-01 15:06:51 +08:00
return;
}
# Run a command and return the result
sub run_cmd($@) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($ret_no, $out, $err, $in, @cmd, $cmd, $OUT, $ERR, $STDOUT, $STDERR, $PH);
2021-02-01 15:06:51 +08:00
($in, @cmd) = @_;
2021-02-01 15:06:51 +08:00
$err = "Running " . join(" ", map "\"$_\"", @cmd) . "\n";
$out = "";
open $STDOUT, ">&", \*STDOUT or die this_file . ": STDOUT: $!";
open $STDERR, ">&", \*STDERR or die this_file . ": STDERR: $!";
$OUT = tempfile or die this_file . ": tempfile: $!";
binmode $OUT or die this_file . ": tempfile: $!";
$ERR = tempfile or die this_file . ": tempfile: $!";
binmode $ERR or die this_file . ": tempfile: $!";
open STDOUT, ">&", $OUT or die this_file . ": tempfile: $!";
binmode STDOUT or die this_file . ": tempfile: $!";
open STDERR, ">&", $ERR or die this_file . ": tempfile: $!";
binmode STDERR or die this_file . ": tempfile: $!";
2021-02-01 15:06:51 +08:00
$cmd = join " ", map "\"$_\"", @cmd;
if ($^O eq "MSWin32") {
open $PH, "| $cmd" or die this_file . ": $cmd: $!";
2021-02-01 15:06:51 +08:00
} else {
open $PH, "|-", @cmd or die this_file . ": $cmd: $!";
2021-02-01 15:06:51 +08:00
}
binmode $PH or die this_file . ": $cmd: $!";
print $PH $in or die this_file . ": $cmd: $!";
2021-02-01 15:06:51 +08:00
close $PH;
$ret_no = $?;
open STDOUT, ">&", $STDOUT or die this_file . ": tempfile: $!";
open STDERR, ">&", $STDERR or die this_file . ": tempfile: $!";
seek $OUT, 0, SEEK_SET or die this_file . ": tempfile: $!";
2021-02-01 15:06:51 +08:00
$out = join "", <$OUT>;
close $OUT or die this_file . ": tempfile: $!";
seek $ERR, 0, SEEK_SET or die this_file . ": tempfile: $!";
2021-02-01 15:06:51 +08:00
$err = join "", <$ERR>;
close $ERR or die this_file . ": tempfile: $!";
return ($ret_no, $out, $err);
2021-02-01 15:06:51 +08:00
}
# Find an executable
2021-02-01 15:06:51 +08:00
# Code inspired from CPAN::FirstTime
sub where_is($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $path);
$file = $_[0];
return $WHERE_IS{$file} if exists $WHERE_IS{$file};
2021-02-01 15:06:51 +08:00
foreach my $dir (path) {
return ($WHERE_IS{$file} = $path)
2021-02-01 15:06:51 +08:00
if defined($path = MM->maybe_command(catfile($dir, $file)));
}
return ($WHERE_IS{$file} = undef);
2021-02-01 15:06:51 +08:00
}
# Find the file type
sub file_type($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my $file;
$file = $_[0];
return undef unless -e $file;
# Use File::MMagic
if (eval { require File::MMagic; 1; }) {
$_ = File::MMagic->new->checktype_filename($file);
2021-02-01 15:06:51 +08:00
return TYPE_GZIP if /gzip/;
return TYPE_BZIP2 if /bzip2/;
# All else are text/plain
return TYPE_TEXT;
2021-02-01 15:06:51 +08:00
}
# Use file executable
if (defined($_ = where_is "file")) {
2021-02-01 15:06:51 +08:00
$_ = join "", `"$_" "$file"`;
return TYPE_GZIP if /gzip/;
return TYPE_BZIP2 if /bzip2/;
# All else are text/plain
return TYPE_TEXT;
2021-02-01 15:06:51 +08:00
}
# No type checker available
return undef;
}
# Obtain the files list in a directory
sub list_files($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($dir, $DH);
$dir = $_[0];
@_ = qw();
opendir $DH, $dir or die this_file . ": $dir: $!";
2021-02-01 15:06:51 +08:00
while (defined($_ = readdir $DH)) {
next if $_ eq "." || $_ eq ".." || !-f "$dir/$_";
push @_, $_;
}
closedir $DH or die this_file . ": $dir: $!";
2021-02-01 15:06:51 +08:00
return join " ", sort @_;
}
# Preserve the source test files
sub preserve_source($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($dir, $DH);
$dir = $_[0];
@_ = qw();
opendir $DH, $dir or die this_file . ": $dir: $!";
2021-02-01 15:06:51 +08:00
while (defined($_ = readdir $DH)) {
next if $_ eq "." || $_ eq ".." || !-f "$dir/$_";
push @_, $_;
}
closedir $DH or die this_file . ": $dir: $!";
2021-02-01 15:06:51 +08:00
rmtree "$dir/source";
mkpath "$dir/source";
write_raw_file "$dir/source/$_", read_raw_file "$dir/$_"
2021-02-01 15:06:51 +08:00
foreach @_;
return;
}
# Clean up the test files
sub clean_up($$$) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($r, $dir, $test_no, $test_name, $c);
($r, $dir, $test_no) = @_;
2021-02-01 15:06:51 +08:00
# Nothing to clean up
return unless -e $dir;
# Success
if ($r) {
rmtree $dir;
return;
}
# Fail - keep the test files for debugging
$test_name = basename((caller)[1]);
$test_name =~ s/\.t$//;
2021-02-01 15:06:51 +08:00
$c = 1;
$c++ while -e ($_ = "$dir.$test_name.$test_no.$c");
rename $dir, $_ or die this_file . ": $dir, $_: $!";
2021-02-01 15:06:51 +08:00
return;
}
# If we have the file type checker somewhere
sub has_no_file() {
$HAS_NO_FILE = eval { require File::MMagic; 1; }
|| defined where_is "file"?
2021-02-01 15:06:51 +08:00
0: "File::MMagic or file executable not available"
if !defined $HAS_NO_FILE;
return $HAS_NO_FILE;
2021-02-01 15:06:51 +08:00
}
# If we have gzip support somewhere
sub has_no_gzip() {
$HAS_NO_GZIP = eval { require Compress::Zlib; 1; }
|| defined where_is "gzip"?
2021-02-01 15:06:51 +08:00
0: "Compress::Zlib or gzip executable not available"
if !defined $HAS_NO_GZIP;
return $HAS_NO_GZIP;
2021-02-01 15:06:51 +08:00
}
# If we have bzip2 support somewhere
sub has_no_bzip2() {
$HAS_NO_BZIP2 = eval { require Compress::Bzip2; Compress::Bzip2->import(2.00); 1; }
|| defined where_is "bzip2"?
2021-02-01 15:06:51 +08:00
0: "Compress::Bzip2 v2 or bzip2 executable not available"
if !defined $HAS_NO_BZIP2;
return $HAS_NO_BZIP2;
2021-02-01 15:06:51 +08:00
}
# Create a normal random log file
sub make_log_file($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $hosts, @host_is_ip, @logs, $t, $content, $malformed, $tz);
my (%resolved_logs, $resolved_content);
2021-02-01 15:06:51 +08:00
$file = $_[0];
2021-02-01 15:06:51 +08:00
@logs = qw();
%resolved_logs = qw();
2021-02-01 15:06:51 +08:00
# Start from sometime in the past year
$t = time - int rand(86400*365);
# Time zone
$tz = (-12 + (int rand 53) / 2) * 3600;
2021-02-01 15:06:51 +08:00
# 3-5 hosts
$hosts = 3 + int rand 3;
# Host type: 1: IP, 0: domain name
@host_is_ip = qw();
push @host_is_ip, 0 while @host_is_ip < $hosts;
# We need exactly 2 IP
$host_is_ip[int rand $hosts] = 1
while grep($_ == 1, @host_is_ip) < 2;
foreach my $is_ip (@host_is_ip) {
my ($host, $resolved_host, $user, $http_ver, @host_logs, $count);
2021-02-01 15:06:51 +08:00
if ($is_ip) {
# Generate a random IP
($host, $resolved_host) = random_ip;
2021-02-01 15:06:51 +08:00
} else {
# Generate a random domain name
# 3-5 levels, end with net or com
$_ = 2 + int rand 3;
@_ = qw();
push @_, random_word while @_ < $_;
2021-02-01 15:06:51 +08:00
push @_, (qw(net com))[int rand 2];
$host = join ".", @_;
$resolved_host = $host;
2021-02-01 15:06:51 +08:00
}
$user = (0, 0, 1)[int rand 3]? "-": random_word;
$http_ver = (qw(HTTP/1.1 HTTP/1.1 HTTP/1.1 HTTP/1.0))[int rand 4];
2021-02-01 15:06:51 +08:00
# 3-5 log entries foreach host
$count = 3 + int rand 3;
@host_logs = qw();
while (@host_logs < $count) {
my ($time, $method, $url, $dirs, @dirs, $type, $status, $size);
2021-02-01 15:06:51 +08:00
my $record;
# 0-2 seconds later
$t += int rand 3;
# Time text
@_ = gmtime($t + $tz);
$_[5] += 1900;
$_[4] = (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$_[4]];
$time = sprintf "%02d/%s/%04d:%02d:%02d:%02d %+05d",
2021-02-01 15:06:51 +08:00
@_[3,4,5,2,1,0],
int($tz / 3600) * 100 + ($tz - int($tz / 3600) * 3600) / 60;
2021-02-01 15:06:51 +08:00
$method = (qw(GET GET GET HEAD POST))[int rand 5];
2021-02-01 15:06:51 +08:00
# Generate a random URL
# 0-3 levels of directories
$dirs = int rand 4;
@dirs = qw();
push @dirs, "/" . random_word while @dirs < $dirs;
2021-02-01 15:06:51 +08:00
$type = ("", qw(html html txt css png jpg))[int rand 7];
if ($type eq "") {
$url = join("", @dirs) . "/";
} else {
$url = join("", @dirs) . "/" . random_word . ".$type";
2021-02-01 15:06:51 +08:00
}
2021-02-01 15:06:51 +08:00
$status = (200, 200, 200, 200, 304, 400, 403, 404)[int rand 8];
if ($status == 304) {
$size = 0;
} else {
$size = 200 + int rand 35000;
}
$record = sprintf "%s - %s [%s] \"%s %s %s\" %d %d\n",
$host, $user, $time, $method, $url, $http_ver, $status, $size;
$resolved_logs{$record} = sprintf "%s - %s [%s] \"%s %s %s\" %d %d\n",
$resolved_host, $user, $time, $method, $url, $http_ver, $status, $size;
push @host_logs, $record;
2021-02-01 15:06:51 +08:00
}
push @logs, @host_logs;
2021-02-01 15:06:51 +08:00
# 0-5 seconds later
$t += int rand 6;
}
2021-02-01 15:06:51 +08:00
# Insert 1-2 malformed lines
$malformed = 1 + int rand 2;
while ($malformed > 0) {
my $line;
2021-02-01 15:06:51 +08:00
# Generate the random malformed line
$_ = 3 + int rand 5;
@_ = qw();
push @_, random_word while @_ < $_;
2021-02-01 15:06:51 +08:00
$line = join(" ", @_) . ".\n";
$line =~ s/^(.)/uc $1/e;
# The position to insert the line
$_ = int rand @logs;
@logs = (@logs[0...$_], $line, @logs[$_+1...$#logs]);
$malformed--;
}
2021-02-01 15:06:51 +08:00
# Compose the content
$content = join "", @logs;
$resolved_content = join "", map exists $resolved_logs{$_}? $resolved_logs{$_}: $_, @logs;
2021-02-01 15:06:51 +08:00
# Output the file
write_file($file, $content);
2021-02-01 15:06:51 +08:00
# Return the content
return ($content, $resolved_content);
2021-02-01 15:06:51 +08:00
}
# Create a random log file that are fully resolved.
sub make_resolved_log_file($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
my ($file, $hosts, @logs, $t, $content, $malformed, $tz);
$file = $_[0];
2021-02-01 15:06:51 +08:00
@logs = qw();
2021-02-01 15:06:51 +08:00
# Start from sometime in the past year
$t = time - int rand(86400*365);
# Time zone
$tz = (-12 + (int rand 53) / 2) * 3600;
2021-02-01 15:06:51 +08:00
# 3-5 hosts
$hosts = 3 + int rand 3;
for (my $i = 0; $i < $hosts; $i++) {
my ($host, $user, $http_ver, @host_logs, $count);
2021-02-01 15:06:51 +08:00
# Generate a random domain name
# 3-5 levels, end with net or com
$_ = 2 + int rand 3;
@_ = qw();
push @_, random_word while @_ < $_;
2021-02-01 15:06:51 +08:00
push @_, (qw(net com))[int rand 2];
$host = join ".", @_;
$user = (0, 0, 1)[int rand 3]? "-": random_word;
$http_ver = (qw(HTTP/1.1 HTTP/1.1 HTTP/1.1 HTTP/1.0))[int rand 4];
2021-02-01 15:06:51 +08:00
# 3-5 log entries foreach host
$count = 3 + int rand 3;
@host_logs = qw();
while (@host_logs < $count) {
my ($time, $method, $url, $dirs, @dirs, $type, $status, $size);
2021-02-01 15:06:51 +08:00
# 0-2 seconds later
$t += int rand 3;
# Time text
@_ = gmtime($t + $tz);
$_[5] += 1900;
$_[4] = (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$_[4]];
$time = sprintf "%02d/%s/%04d:%02d:%02d:%02d %+05d",
2021-02-01 15:06:51 +08:00
@_[3,4,5,2,1,0],
int($tz / 3600) * 100 + ($tz - int($tz / 3600) * 3600) / 60;
2021-02-01 15:06:51 +08:00
$method = (qw(GET GET GET HEAD POST))[int rand 5];
2021-02-01 15:06:51 +08:00
# Generate a random URL
# 0-3 levels of directories
$dirs = int rand 4;
@dirs = qw();
push @dirs, "/" . random_word while @dirs < $dirs;
2021-02-01 15:06:51 +08:00
$type = ("", qw(html html txt css png jpg))[int rand 7];
if ($type eq "") {
$url = join("", @dirs) . "/";
} else {
$url = join("", @dirs) . "/" . random_word . ".$type";
2021-02-01 15:06:51 +08:00
}
2021-02-01 15:06:51 +08:00
$status = (200, 200, 200, 200, 304, 400, 403, 404)[int rand 8];
if ($status == 304) {
$size = 0;
} else {
$size = 200 + int rand 35000;
}
push @host_logs, sprintf "%s - %s [%s] \"%s %s %s\" %d %d\n",
$host, $user, $time, $method, $url, $http_ver, $status, $size;
2021-02-01 15:06:51 +08:00
}
push @logs, @host_logs;
2021-02-01 15:06:51 +08:00
# 0-5 seconds later
$t += int rand 6;
}
2021-02-01 15:06:51 +08:00
# Insert 1-2 malformed lines
$malformed = 1 + int rand 2;
while ($malformed > 0) {
my $line;
2021-02-01 15:06:51 +08:00
# Generate the random malformed line
$_ = 3 + int rand 5;
@_ = qw();
push @_, random_word while @_ < $_;
2021-02-01 15:06:51 +08:00
$line = join(" ", @_) . ".\n";
$line =~ s/^(.)/uc $1/e;
# The position to insert the line
$_ = int rand @logs;
@logs = (@logs[0...$_], $line, @logs[$_+1...$#logs]);
$malformed--;
}
2021-02-01 15:06:51 +08:00
# Compose the content
$content = join "", @logs;
# Output the file
write_file($file, $content);
2021-02-01 15:06:51 +08:00
# Return the content
return ($content, $content);
}
# Create an empty log file.
sub make_empty_log_file($) {
2021-02-01 15:06:51 +08:00
local ($_, %_);
$_ = $_[0];
write_file($_, "");
2021-02-01 15:06:51 +08:00
return ("", "");
}
# Supply a random English word
sub random_word() {
2021-02-01 15:06:51 +08:00
local ($_, %_);
@_ = qw(
hard-to-find striped poor scene miniature marble error shelter clear settle
march breath tested symptomatic delicate road punish grain fabulous camp
authority love system placid bake maddening sleep precious crabby lovely jolly
wrist park common volleyball tick judicious degree alluring hydrant oatmeal
aboard light spare delirious unwritten unnatural existence deadpan cagey
disastrous station fear dam adorable grape event silent extra-large shame meaty
husky drag religion extra-small pot valuable deceive obese seed history
wholesale tremble delightful leather cabbage death tub loss twig hate noxious
trashy sleet bleach quizzical familiar nappy teaching private yak turkey foolish
concentrate reject tacit goofy men ajar communicate);
2021-02-01 15:06:51 +08:00
return $_[int rand @_];
}
# Supply a random IP
2021-02-01 15:06:51 +08:00
# Big public web companies have more reliable reverse DNS
sub random_ip() {
2021-02-01 15:06:51 +08:00
local ($_, %_);
# Initialize our resolvable IP pool
if (!defined $RANDOM_IP) {
2021-02-01 15:06:51 +08:00
my (@ip, @hosts);
$RANDOM_IP = {};
2021-02-01 15:06:51 +08:00
@ip = qw();
# Famous websites - they are resolved to several IPs, and their
# reverse domain is guaranteed by the akadns.net service.
2021-02-01 15:06:51 +08:00
foreach my $host (qw(www.google.com
www.yahoo.com www.microsoft.com)) {
# Find the addresses
push @ip, map join(".", unpack "C4", $_), @_[4...$#_]
if (@_ = gethostbyname $host) > 0;
}
# 127.0.0.1 may be resolved to localhost
push @ip, "127.0.0.1";
foreach my $ip (@ip) {
my $host;
# Find its reverse lookup domain name
next if !defined($host = gethostbyaddr inet_aton($ip), AF_INET);
# Find the address again
next unless (@_ = gethostbyname $host) > 0;
next if (@_ = @_[4...$#_]) > 1;
$_ = join ".", unpack "C4", $_[0];
# Not match
next if $_ ne $ip;
# OK. Record it.
$$RANDOM_IP{$ip} = $host;
2021-02-01 15:06:51 +08:00
}
# Hosts reliably resolve to themselves
@hosts = qw();
# My own hosts
push @hosts, qw(rinse.wov.idv.tw cotton.wov.idv.tw);
# Yahoo! mail servers
for (my $i = 101; $i <= 109; $i++) {
push @hosts, "smtp$i.mail.mud.yahoo.com";
}
# HiNet mail servers
for (my $i = 1; $i <= 89; $i++) {
push @hosts, "ms$i.hinet.net"
if $i % 10 != 0;
}
foreach my $host (@hosts) {
my $ip;
# Find the address
next unless (@_ = gethostbyname $host) > 0;
next if (@_ = @_[4...$#_]) > 1;
$ip = join ".", unpack "C4", $_[0];
# Find its reverse lookup domain name again
next if !defined($_ = gethostbyaddr inet_aton($ip), AF_INET);
# Not match
next if $_ ne $host;
# OK. Record it.
$$RANDOM_IP{$ip} = $host;
2021-02-01 15:06:51 +08:00
}
}
# 1: Resolvable
if (keys %$RANDOM_IP > 0 && int rand 2) {
@_ = sort keys %$RANDOM_IP;
2021-02-01 15:06:51 +08:00
$_ = $_[int rand @_];
return ($_, $$RANDOM_IP{$_});
2021-02-01 15:06:51 +08:00
}
# 0: Unresolvable
2021-02-01 15:06:51 +08:00
# Use loopback (127.0.0.0/8) and link local (169.254.0.0/16)
do {
if (int rand 2) {
$_ = join ".", 127, int rand 255, int rand 255, 1 + int rand 254;
} else {
$_ = join ".", 169, 254, int rand 255, 1 + int rand 254;
}
} until !defined gethostbyaddr inet_aton($_), AF_INET;
return ($_, $_);
}
1;
__END__