package Apache::TrafficServer::BinLog; use strict; use warnings FATAL => 'all'; use Socket qw( :DEFAULT ); if ( ! defined( &Socket::inet_ntop ) ) { eval "use Socket6;"; # For ye olde Peurle. die $@ if $@; } else { Socket->import( qw( inet_ntop AF_INET6 AF_INET ) ); } =head1 NAME Apache::TrafficServer::BinLog - Log parsing for traffic server binary logfiles with direct timestamp seeking and fields manipulation. =head1 VERSION Version 0.01 =cut our $VERSION = '0.01'; our $SZ_UINT = 4; our $SZ_UINT64 = 8; our $LOG_SEGMENT_VERSION = 2; our $IP_FAMILY_FIELD_LENGTH = 2; our $IPV4_FIELD_LENGTH = 4; our $IPV6_FIELD_LENGTH = 16; our $LOG_ENTRY_HEADER_LENGTH = 1 * $SZ_UINT64 + 2 * $SZ_UINT; our $LOG_BUFF_STATIC_HEADER_LENGTH = 14 * $SZ_UINT + 1 * $SZ_UINT64; our $INK_MIN_ALIGN = 8; our $LOG_SEGMENT_COOKIE = 0xaceface; # proxy/logging/LogBuffer.h our $LOG_BUFF_STATIC_HEADER_FORMAT = 'L8 Q1 L6'; our $LOG_BUFF_DYNAMIC_HEADER_FORMAT = 'Z* Z* Z* Z* Z*'; our $LOG_BUFF_STATIC_HEADER_COOKIE_FORMAT = 'L1'; our $LOG_BUFF_ENTRY_HEADER_FORMAT = 'Q1 I1 I1'; our @LOG_BUFF_STATIC_HEADER_FIELDS = qw/ cookie version format_type byte_count entry_count low_timestamp high_timestamp log_object_flags log_object_signature fmt_name_offset fmt_fieldlist_offset fmt_printf_offset src_hostname_offset log_filename_offset data_offset/; our @LOG_BUFF_DYNAMIC_HEADER_FIELDS = qw/fmt_name_str fieldlist_str printf_str src_hostname_str log_filename_str/; # proxy/logging/Log.cc our %FIELD_TEMPLATE = ( cqtq => 'L1', ttms => 'L1', chi => 'S1', crc => 'L1', pssc => 'L1', psql => 'L1', cqhm => 'Z*', cquc => 'Z*', caun => 'Z*', phr => 'L1', pqsn => 'Z*', psct => 'Z*', xid => 'Z*', cquuc => 'Z*', '{User-Agent}cqh' => 'Z*', ); our %CRC_SYMBOL = ( '0' => 'EMPTY', '1' => 'TCP_HIT', '2' => 'TCP_DISK_HIT', '.' => 'TCP_MEM_HIT', '3' => 'TCP_MISS', '4' => 'TCP_EXPIRED_MISS', '5' => 'TCP_REFRESH_HIT', '6' => 'TCP_REF_FAIL_HIT', '7' => 'TCP_REFRESH_MISS', '8' => 'TCP_CLIENT_REFRESH', '9' => 'TCP_IMS_HIT', 'a' => 'TCP_IMS_MISS', 'b' => 'TCP_SWAPFAIL', 'c' => 'TCP_DENIED', 'd' => 'TCP_WEBFETCH_MISS', 'e' => 'TCP_SPIDER_BYPASS', 'f' => 'TCP_FUTURE_2', '[' => 'TCP_HIT_REDIRECT', ']' => 'TCP_MISS_REDIRECT', '<' => 'TCP_HIT_X_REDIRECT', '>' => 'TCP_MISS_X_REDIRECT', 'g' => 'UDP_HIT', 'h' => 'UDP_WEAK_HIT', 'i' => 'UDP_HIT_OBJ', 'j' => 'UDP_MISS', 'k' => 'UDP_DENIED', 'l' => 'UDP_INVALID', 'm' => 'UDP_RELOADING', 'n' => 'UDP_FUTURE_1', 'o' => 'UDP_FUTURE_2', 'p' => 'ERR_READ_TIMEOUT', 'q' => 'ERR_LIFETIME_EXP', 'r' => 'ERR_NO_CLIENTS_BIG_OBJ', 's' => 'ERR_READ_ERROR', 't' => 'ERR_CLIENT_ABORT', 'u' => 'ERR_CONNECT_FAIL', 'v' => 'ERR_INVALID_REQ', 'w' => 'ERR_UNSUP_REQ', 'x' => 'ERR_INVALID_URL', 'y' => 'ERR_NO_FDS', 'z' => 'ERR_DNS_FAIL', 'A' => 'ERR_NOT_IMPLEMENTED', 'B' => 'ERR_CANNOT_FETCH', 'C' => 'ERR_NO_RELAY', 'D' => 'ERR_DISK_IO', 'E' => 'ERR_ZERO_SIZE_OBJECT', 'G' => 'ERR_PROXY_DENIED', 'H' => 'ERR_WEBFETCH_DETECTED', 'I' => 'ERR_FUTURE_1', 'J' => 'ERR_SPIDER_MEMBER_ABORTED', 'K' => 'ERR_SPIDER_PARENTAL_CONTROL_RESTRICTION', 'L' => 'ERR_SPIDER_UNSUPPORTED_HTTP_VERSION', 'M' => 'ERR_SPIDER_UIF', 'N' => 'ERR_SPIDER_FUTURE_USE_1', 'O' => 'ERR_SPIDER_TIMEOUT_WHILE_PASSING', 'P' => 'ERR_SPIDER_TIMEOUT_WHILE_DRAINING', 'Q' => 'ERR_SPIDER_GENERAL_TIMEOUT', 'R' => 'ERR_SPIDER_CONNECT_FAILED', 'S' => 'ERR_SPIDER_FUTURE_USE_2', 'T' => 'ERR_SPIDER_NO_RESOURCES', 'U' => 'ERR_SPIDER_INTERNAL_ERROR', 'V' => 'ERR_SPIDER_INTERNAL_IO_ERROR', 'W' => 'ERR_SPIDER_DNS_TEMP_ERROR', 'X' => 'ERR_SPIDER_DNS_HOST_NOT_FOUND', 'Y' => 'ERR_SPIDER_DNS_NO_ADDRESS', 'Z' => 'ERR_UNKNOWN', ); # proxy/hdrs/HTTP.h our %HIER_SYMBOL = ( 0 => 'EMPTY', 1 => 'NONE', 2 => 'DIRECT', 3 => 'SIBLING_HIT', 4 => 'PARENT_HIT', 5 => 'DEFAULT_PARENT', 6 => 'SINGLE_PARENT', 7 => 'FIRST_UP_PARENT', 8 => 'NO_PARENT_DIRECT', 9 => 'FIRST_PARENT_MISS', a => 'LOCAL_IP_DIRECT', b => 'FIREWALL_IP_DIRECT', c => 'NO_DIRECT_FAIL', d => 'SOURCE_FASTEST', e => 'SIBLING_UDP_HIT_OBJ', f => 'PARENT_UDP_HIT_OBJ', g => 'PASSTHROUGH_PARENT', h => 'SSL_PARENT_MISS', i => 'INVALID_CODE', j => 'TIMEOUT_DIRECT', k => 'TIMEOUT_SIBLING_HIT', l => 'TIMEOUT_PARENT_HIT', m => 'TIMEOUT_DEFAULT_PARENT', n => 'TIMEOUT_SINGLE_PARENT', o => 'TIMEOUT_FIRST_UP_PARENT', p => 'TIMEOUT_NO_PARENT_DIRECT', q => 'TIMEOUT_FIRST_PARENT_MISS', r => 'TIMEOUT_LOCAL_IP_DIRECT', s => 'TIMEOUT_FIREWALL_IP_DIRECT', t => 'TIMEOUT_NO_DIRECT_FAIL', u => 'TIMEOUT_SOURCE_FASTEST', v => 'TIMEOUT_SIBLING_UDP_HIT_OBJ', w => 'TIMEOUT_PARENT_UDP_HIT_OBJ', x => 'TIMEOUT_PASSTHROUGH_PARENT', y => 'TIMEOUT_TIMEOUT_SSL_PARENT_MISS', z => 'INVALID_ASSIGNED_CODE', ); # This is technically no longer needed since profiling showed that dynamically returning the anonymous subroutine is significantely slower than simply hardcoding the padding values in the _make_unpack_line() sub. # It is left here as a matter of clarity of how each field is supposed to be padded. our $FIELD_LENGTH_MAGIC = { 'cqtq' => sub { return ( 0 ); }, 'ttms' => sub { return ( $INK_MIN_ALIGN ); }, 'chi' => sub { return ( $IP_FAMILY_FIELD_LENGTH + 2 ); }, 'ipv4' => sub { return ( $IPV4_FIELD_LENGTH ); }, 'ipv6' => sub { return ( $IPV6_FIELD_LENGTH + 4 ); }, 'crc' => sub { return ( $INK_MIN_ALIGN ); }, 'pssc' => sub { return ( $INK_MIN_ALIGN ); }, 'psql' => sub { return ( $INK_MIN_ALIGN ); }, 'cqhm' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, 'cquc' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, 'caun' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, 'phr' => sub { return ( $INK_MIN_ALIGN ); }, 'pqsn' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, 'psct' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, 'xid' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, 'cquuc' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, '{User-Agent}cqh' => sub { return ( _ink_align( length( $_[ 0 ] ) + 1, $INK_MIN_ALIGN ) ); }, }; =head1 SYNOPSIS use Apache::TrafficServer::BinLog; my $binlog = Apache::TrafficServer::BinLog->new( { file => $path, tstamp => $tstamp } ); my %line = $binlog->get_line(); my @lines = $binlog->get_lines( 1000 ); my $ascii_line = $binlog->get_line_ascii(); my @ascii_lines = $binlog->get_lines_ascii( 1000 ); =head1 DESCRIPTION This is an object-oriented library which will convert Apache Traffic Server binary logs from binary to ascii. It allows for direct seeking to a timestamp (or near a timestamp) that way one can only parse a "slice" of the log (say the last few seconds). It can parse each log line into a hash table with each filed accessible as a key. It can also simply return the ascii line. This module essentially does what logcat does but allows for horizontal and/or vertical slicing of the logs using timestamps and/or fileds names. Various methods can be called to extract 1 or more lines from the logfile. An internal pointer is always incremented each time one of the get_line[s] method is called. The binary log format was mostly reverse engineered from the apache traffic server source code and thus could potentially be buggy. Also not all fields conversion is supposrted but enough of the basic ones are. =head1 SUBROUTINES/METHODS =head2 new my $binlog = Apache::TrafficServer::BinLog->new( { file => $path, tstamp => $tstamp } ); Instantiates an object which holds a logfile and a pointer to the current location inside the logfile. The intenal pointer is defined by the 'tstamp' parameter. =cut sub new { my ( $class, %args ) = @_; my $self = bless( {}, $class ); $self->{ 'cur_static_header' } = (); $self->{ 'cur_dynamic_header' } = (); $self->{ 'cur_lines' } = (); $self->{ 'error' } = ""; if ( exists $args{ 'file' } ) { $self->{ 'file' } = $args{ 'file' }; open ( $self->{ 'fh' }, "<", $self->{ 'file' } ) or die "Can't open file $self->{ 'file' }\n"; } else { die "need a file path"; } $self->{ 'line_format' } = $self->_get_bin_line_format(); $self->{ 'reader_sub' } = *_get_next_log_buff{CODE}; $self->{ 'cur_tstamp_pair_sub' } = *_get_cur_bin_tstamp_pair{CODE}; $self->{ 'unpack_line_sub' } = eval $self->_make_unpack_line( 'PERL', @{ $self->{ 'line_format' } } ); $self->{ 'unpack_ascii_line_sub' } = eval $self->_make_unpack_line( 'ASCII', @{ $self->{ 'line_format' } } ); $self->_read_headers( 1 ) or die "Can't init parser because $self->{ 'error' }."; unless ( $self->{ 'tstamp' } ) { $self->{ 'tstamp' } = $self->{ 'cur_static_header' }->{ 'low_timestamp' }; } if ( exists $args{ 'tstamp' } ) { $self->set_tstamp( $args{ 'tstamp' } ); } if ( exists $args{ 'length' } ) { $self->set_length( $args{ 'length' } ); } return $self; } =head2 set_tstamp $binlog->set_tstamp( $tstamp ); Seeks to the timestamp specified. (or very close to it :) ) =cut sub set_tstamp { my ( $self, $tstamp ) = @_; $self->{ 'tstamp' } = $tstamp; unless ( $self->_seek_to_tstamp( $self->{ 'tstamp' }, 0, -s $self->{ 'file' }, $self->{ 'fh' } ) ) { die $self->{'error'}; } } =head2 set_length $binlog->set_length( $length ); Sets the length (in seconds) of the slice we wnat from the log file. =cut sub set_length { my ( $self, $length ) = @_; $self->{ 'length' } = $length; } =head2 get_last_tstamp my $tstamp = $binlog->get_last_tstamp(); Gets last timestamp in the file. (uesful for tailing last few seconds of log) =cut sub get_last_tstamp { my ( $self ) = @_; return $self->_get_last_bin_tstamp(); } =head2 get_line my $line_hash_ref = $binlog->get_line(); Gets one line of the logfile with each elements of the line neatly parsed into a hashref. =cut sub get_line { my ( $self ) = @_; my $line; unless ( $line = shift ( @{ $self->{ 'cur_lines' } } ) ) { unless ( $self->{ 'reader_sub' }->( $self, 'PERL' ) ) { return 0; } $line = shift ( @{ $self->{ 'cur_lines' } } ) } return $line; } =head2 get_ascii_line my $line = $binlog->get_ascii_line(); Gets one line of the logfile in ascii. =cut sub get_ascii_line { my ( $self ) = @_; my $line; unless ( $line = shift ( @{ $self->{ 'cur_lines' } } ) ) { unless ( $self->{ 'reader_sub' }->( $self, 'ASCII' ) ) { return 0; } $line = shift ( @{ $self->{ 'cur_lines' } } ) } return $line; } =head2 get_lines my $lines_array_ref = $binlog->get_lines( $num ); Get $num lines from the logfiles and returns them into an arrayref of hashref. =cut sub get_lines { my ( $self, $num ) = @_; my @lines; while ( @lines < $num ) { if ( $self->{ 'cur_lines' } ) { push( @lines, @{ $self->{ 'cur_lines' } } ); delete( $self->{ 'cur_lines' } ); } else { unless ( $self->{ 'reader_sub' }->( $self, 'PERL' ) ) { if ( @lines ) { return \@lines } else { return 0; } } } } return \@lines; } =head2 get_ascii_lines my $lines_array_ref = $binlog->get_ascii_lines( $num ); Get $num lines from the logfiles and returns them in ascii in an arrayref. =cut sub get_ascii_lines { my ( $self, $num ) = @_; my @lines; while ( @lines < $num ) { if ( $self->{ 'cur_lines' } ) { push( @lines, @{ $self->{ 'cur_lines' } } ); delete( $self->{ 'cur_lines' } ); } else { unless ( $self->{ 'reader_sub' }->( $self, 'ASCII' ) ) { if ( @lines ) { return \@lines } else { return 0; } } } } return \@lines; } =head2 get_line_fields my $format_array_ref = $binlog->get_line_fields(); Returns the line log format of the file as an array ref. =cut sub get_line_fields { my ( $self ) = @_; # This looks silly now, but this sub used to support UFF and [AY]TS format and thus was an encapsulation of several pivate "get_format" methods. return $self->_get_bin_line_format(); } ### "Private" methods ### # Extract the log line format stored in the LogBuffer dynamic header. # Logging fields are defined in the ATS docs here: # http://trafficserver.apache.org/docs/trunk/admin/event-logging-formats/index.en.html sub _get_bin_line_format { my ( $self ) = @_; my @line_format; if ( $self->{ 'cur_dynamic_header' } ) { @line_format = split( /,/, $self->{ 'cur_dynamic_header' }->{ 'fieldlist_str' } ); } else { $self->_read_headers( 1 ); @line_format = split( /,/, $self->{ 'cur_dynamic_header' }->{ 'fieldlist_str' } ); } return \@line_format; } # Attempts to retrieve the last low_timestamp and high_timestamp pair from the last LogBuffer in the file. # Walks the file backwards 64 bytes at a time from the end of the file. sub _get_last_bin_tstamp { my ( $self ) = @_; my $seek_pos = $LOG_BUFF_STATIC_HEADER_LENGTH; my $cur_pos = seek( $self->{ 'fh' }, -$seek_pos, 2 ) or die "can't seek"; while( !$self->_align_to_cookie() ) { $seek_pos += $LOG_BUFF_STATIC_HEADER_LENGTH; if ( $seek_pos >= -s $self->{ 'file' } ) { return 0; } $cur_pos = seek( $self->{ 'fh' }, -$seek_pos, 2 ) or die "can't seek"; } my %tstamps = $self->{ 'cur_tstamp_pair_sub' }->( $self ); return $tstamps{ 'high' }; } # Gets the next LogBuffer which will contains several lines of the log. sub _get_next_log_buff { my ( $self, $output_type ) = @_; my @lines; my $log_buff_data; unless ( $self->_read_headers( 0 ) ) { $self->{ 'error' } .= "Can't read headers"; return 0; } if ( $self->{ 'length' } ) { if ( $self->{ 'cur_static_header' }->{ 'low_timestamp' } >= $self->{ 'tstamp' } + $self->{ 'length' } ) { return 0; } } # the log buffer size minus the size of the static header since we already read it. my $log_buff_data_length = $self->{ 'cur_static_header' }->{ 'byte_count' } - $self->{ 'cur_static_header' }->{ 'data_offset' }; my $cur_log_buff_entry = 0; # reading one log buffer which will contain the dynamic header and several log entries which contains several log fields. unless ( seek( $self->{ 'fh' }, $self->{ 'cur_static_header' }->{ 'data_offset' }, 1 ) ) { $self->{ 'error' } .= "Can't seek to $self->{ 'cur_static_header' }->{ 'data_offset' }."; return 0; } unless ( read( $self->{ 'fh' }, $log_buff_data, $log_buff_data_length ) ) { $self->{ 'error' } .= "Can't read file at ". seek( $self->{'fh'}, 0, 1) . "."; return 0; } LOGENTRY: while ( $cur_log_buff_entry != $log_buff_data_length ) { my %line = (); my $line = ""; #### PARSE OF ENTRY HEADER #### my %log_buff_entry_header; @log_buff_entry_header{ qw/timestamp timestamp_usec entry_len/ } = unpack $LOG_BUFF_ENTRY_HEADER_FORMAT, substr( $log_buff_data, $cur_log_buff_entry, $LOG_ENTRY_HEADER_LENGTH ); $log_buff_entry_header{ 'timestamp_usec' } = sprintf( "%03s", int( $log_buff_entry_header{ 'timestamp_usec' } / 1000 ) ); #### PARSE OF ENTRY DATA #### my $log_buff_entry_data = substr( $log_buff_data, $cur_log_buff_entry + $LOG_ENTRY_HEADER_LENGTH, $log_buff_entry_header{ 'entry_len' } ); my $cur_log_buff_entry_field = $INK_MIN_ALIGN; if ( $output_type eq 'ASCII' ) { $line .= $log_buff_entry_header{ 'timestamp' } . "." . $log_buff_entry_header{ 'timestamp_usec' }; if ( $self->{ 'unpack_ascii_line_sub' }->( \$log_buff_entry_data, \$cur_log_buff_entry_field, \$line ) ) { push( @lines, $line ); } #something bad happened if we don't get here. } else { $line{ 'cqtq' } = sprintf( "%.3f", $log_buff_entry_header{ 'timestamp' } . "." . $log_buff_entry_header{ 'timestamp_usec' } ); if ( $self->{ 'unpack_line_sub' }->( \$log_buff_entry_data, \$cur_log_buff_entry_field, \%line ) ) { if ( scalar ( @{ $self->{ 'line_format' } } ) == scalar ( keys ( %line ) ) ) { push( @lines, \%line ); #if we don't get here, line split wrong. ignore. likely data corruption in file. } } #something bad happened if we don't get here. } $cur_log_buff_entry += $log_buff_entry_header{ 'entry_len' }; } $self->{ 'cur_lines' } = \@lines; return 1; } # This sub only reads the static and optionally the dynamic header without touching the field data. # Seeks back to begining of log buffer at the end. sub _read_headers { my ( $self, $dynamic ) = @_; my ( $buff, $offset ); unless ( $self->_align_to_cookie() ) { $self->{ 'error' } .= "Can't align to cookie or EOF.\n"; return 0; } read( $self->{ 'fh' }, $buff, $LOG_BUFF_STATIC_HEADER_LENGTH ); @{ $self->{ 'cur_static_header' } }{ @LOG_BUFF_STATIC_HEADER_FIELDS } = unpack $LOG_BUFF_STATIC_HEADER_FORMAT, $buff; if ( $self->{ 'cur_static_header' }->{ 'version' } != $LOG_SEGMENT_VERSION ) { $self->{ 'error' } .= "Can't parse version $self->{ 'cur_static_header' }->{ 'version' }.\n"; return 0; } $offset = $LOG_BUFF_STATIC_HEADER_LENGTH; if ( $dynamic ) { read( $self->{ 'fh' }, $buff, $self->{ 'cur_static_header' }->{ 'data_offset' } - $self->{ 'cur_static_header' }->{ 'fmt_name_offset' } ); @{ $self->{ 'cur_dynamic_header' } }{ @LOG_BUFF_DYNAMIC_HEADER_FIELDS } = unpack $LOG_BUFF_DYNAMIC_HEADER_FORMAT, $buff; $offset = $self->{ 'cur_static_header' }->{ 'data_offset' }; } seek( $self->{ 'fh' }, -$offset, 1 ); return 1; } # The following 2 subs used to borrow code from "AnyEvents::Sockets" to provide ipv6 parsing for perl < 5.12. # Due to possible license concerns it now uses the low level core Socket module. sub _format_ipv4 { my ( $ipv4 ) = @_; return inet_ntop( AF_INET, $ipv4 ); } sub _format_ipv6 { my ( $ipv6 ) = @_; return inet_ntop( AF_INET6, $ipv6 ); } # _ink_align() is only to be used to align on a power of 2 boundary sub _ink_align { my ( $size, $boundary ) = @_; return ( ( ( $size ) + ( ( $boundary ) - 1 ) ) & ~( ( $boundary ) - 1 ) ); } # Align the filehandle pointer to the begining of a LogBuffer ( starts with 0xACEFACE ) sub _align_to_cookie { my ( $self ) = @_; my $rv = read( $self->{ 'fh' }, my $buff, $LOG_BUFF_STATIC_HEADER_LENGTH ); if ( $rv < $LOG_BUFF_STATIC_HEADER_LENGTH ) { return 0; } # EOF my $cookie = unpack $LOG_BUFF_STATIC_HEADER_COOKIE_FORMAT, substr( $buff, 0, $SZ_UINT ); while ( $cookie != $LOG_SEGMENT_COOKIE ) { $buff = substr( $buff, 1 ); if ( read( $self->{ 'fh' }, my $byte, 1 ) ) { $buff .= $byte; $cookie = unpack $LOG_BUFF_STATIC_HEADER_COOKIE_FORMAT, substr( $buff, 0, $SZ_UINT ); } else { return 0; } } seek( $self->{ 'fh' }, -$LOG_BUFF_STATIC_HEADER_LENGTH, 1 ); return 1; } # We only aligns with the begining of a LogBuffer which contains several Log entries. # Therefore we may not align exactly to the time stamp provided. # We will always align like so: low_timestamp < $tstamp < high_timestamp # This sub is just a recursive binary search :) sub _seek_to_tstamp { my ( $self, $tstamp, $logfile_min, $logfile_max ) = @_; unless ( defined( $logfile_min ) && defined( $logfile_max ) ) { $logfile_min = 0; $logfile_max = -s $self->{ 'file' }; } if ( $logfile_max < $logfile_min ) { $self->{ 'error' } .= "Can't seek to time stamp $tstamp.\n"; return 0; } else { my $logfile_mid = int( $logfile_min + ( ( $logfile_max - $logfile_min ) / 2 ) ); unless ( $self->_seek_to_byte( $logfile_mid ) ) { $self->{ 'error' } .= "Can't seek to byte $logfile_mid.\n"; return 0; } my %cur_tstamp = $self->{ 'cur_tstamp_pair_sub' }->( $self ); if ( %cur_tstamp ) { if ( $cur_tstamp{ 'low' } > $tstamp ) { return $self->_seek_to_tstamp( $tstamp, $logfile_min, ( $logfile_mid - 1 ) ); } elsif ( $cur_tstamp{ 'high' } < $tstamp ) { return $self->_seek_to_tstamp( $tstamp, $logfile_mid + ( 1, $logfile_max ) ); } else { seek( $self->{ 'fh' }, -$LOG_BUFF_STATIC_HEADER_LENGTH, 1 ); return seek( $self->{ 'fh' }, 0, 1 ); } } else { $self->{ 'error' } .= "Can't seek to time stamp $tstamp.\n"; return 0; } } } # Seeks to $byte in the file. :) sub _seek_to_byte { my ( $self, $byte ) = @_; if ( seek( $self->{ 'fh' }, $byte, 0 ) ) { return 1; } else { return 0; } } # Extract low_timestamp and high_timestamp from the static LogBuffer header. sub _get_cur_bin_tstamp_pair { my ( $self ) = @_; my %tstamps = (); my ( $buff, $rv ); unless ( $self->_read_headers( 0 ) ) { $self->{ 'error' } .= "Can't read headers.\n"; return 0; }; $tstamps{ 'low' } = $self->{ 'cur_static_header' }->{ 'low_timestamp' }; $tstamps{ 'high' } = $self->{ 'cur_static_header' }->{ 'high_timestamp' }; return %tstamps; } # Dynamically generate the code that will unpack a given LogEntry for any given log line format. sub _make_unpack_line { my ( $self, $output_type, @line_format ) = @_; my $dyna_code = 'sub {' . "\n" .' my ( $log_buff_entry_data, $cur_log_buff_entry_field, $line ) = @_;' . "\n" .' my $field;' . "\n"; foreach my $field_name ( @line_format ) { next if ( $field_name eq 'cqtq' || $field_name eq 'cqts'); if ( $field_name eq 'chi' ) { $dyna_code .= ' $field = unpack( "' . $FIELD_TEMPLATE{ 'chi' } . '", substr( $$log_buff_entry_data, $$cur_log_buff_entry_field ) );' . "\n" .' $$cur_log_buff_entry_field += ' . ( $IP_FAMILY_FIELD_LENGTH + 2 ) . ';' . "\n" .' if ( $field == AF_INET ) {' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . _format_ipv4( substr( $$log_buff_entry_data, $$cur_log_buff_entry_field, ' . $IPV4_FIELD_LENGTH . ' ) );' . "\n"; } else { $dyna_code .= ' $line->{ "chi" } = _format_ipv4( substr( $$log_buff_entry_data, $$cur_log_buff_entry_field, ' . $IPV4_FIELD_LENGTH . ' ) );' . "\n"; } $dyna_code .= ' $$cur_log_buff_entry_field += ' . $IPV4_FIELD_LENGTH . ';' . "\n" .' } elsif ( $field == AF_INET6 ) {' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . _format_ipv6( substr( $$log_buff_entry_data, $$cur_log_buff_entry_field, ' . $IPV6_FIELD_LENGTH . ' ) );' . "\n"; } else { $dyna_code .= ' $line->{ "chi" } = _format_ipv6( substr( $$log_buff_entry_data, $$cur_log_buff_entry_field, ' . $IPV6_FIELD_LENGTH . ' ) );' . "\n"; } $dyna_code .= ' $$cur_log_buff_entry_field += ' . ( $IPV6_FIELD_LENGTH + 4 ) . ';' . "\n" .' } else {' . "\n" .' return 0;' . "\n" .' }' . "\n"; } elsif ( $field_name eq 'crc' ) { $dyna_code .= ' $field = unpack( "' . $FIELD_TEMPLATE{ 'crc' } . '", substr( $$log_buff_entry_data, $$cur_log_buff_entry_field ) );' . "\n" .' if ( defined $CRC_SYMBOL{ chr( $field ) } ) {' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . $CRC_SYMBOL{ chr( $field ) };' . "\n"; } else { $dyna_code .= ' $line->{ "crc" } = $CRC_SYMBOL{ chr( $field ) };' . "\n"; } $dyna_code .= ' $$cur_log_buff_entry_field += ' . $INK_MIN_ALIGN . ';' . "\n" .' } else {' . "\n" .' return 0;' . "\n" .' }' . "\n"; } elsif ( $field_name eq 'phr' ) { $dyna_code .= ' $field = unpack( "' . $FIELD_TEMPLATE{ 'phr' } . '", substr( $$log_buff_entry_data, $$cur_log_buff_entry_field ) );' . "\n" .' if ( defined $HIER_SYMBOL{ chr( $field ) } ) {' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . $HIER_SYMBOL{ chr( $field ) };' . "\n"; } else { $dyna_code .= ' $line->{ "phr" } = $HIER_SYMBOL{ chr( $field ) };' . "\n"; } $dyna_code .= ' } else {' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . "INVALID";' . "\n"; } else { $dyna_code .= ' $line->{ "phr" } = "INVALID";' . "\n"; } $dyna_code .= ' }' . "\n" .' $$cur_log_buff_entry_field += ' . $INK_MIN_ALIGN . ';' . "\n"; } elsif ( $field_name eq '{User-Agent}cqh' ) { $dyna_code .= ' $field = unpack( "' . $FIELD_TEMPLATE{ '{User-Agent}cqh' } . '", substr( $$log_buff_entry_data, $$cur_log_buff_entry_field ) );' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " \"" . $field . "\"";' . "\n"; } else { $dyna_code .= ' $line->{ "{User-Agent}cqh" } = $field;' . "\n"; } $dyna_code .= ' $$cur_log_buff_entry_field += _ink_align( length( $field ) + 1, $INK_MIN_ALIGN );' . "\n"; } elsif ( $field_name eq 'ttms' || $field_name eq 'pssc' || $field_name eq 'psql' ) { $dyna_code .= ' $field = unpack( "' . $FIELD_TEMPLATE{ $field_name } . '", substr( $$log_buff_entry_data, $$cur_log_buff_entry_field ) );' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . $field;' . "\n"; } else { $dyna_code .= ' $line->{ "' . $field_name . '" } = $field;' . "\n"; } $dyna_code .= ' $$cur_log_buff_entry_field += ' . $INK_MIN_ALIGN . ';' . "\n"; } else { $dyna_code .= ' $field = unpack( "' . $FIELD_TEMPLATE{ $field_name } . '", substr( $$log_buff_entry_data, $$cur_log_buff_entry_field ) );' . "\n"; if ( $output_type eq 'ASCII' ) { $dyna_code .= ' $$line .= " " . $field;' . "\n"; } else { $dyna_code .= ' $line->{ "' . $field_name . '" } = $field;' . "\n"; } $dyna_code .= ' $$cur_log_buff_entry_field += _ink_align( length( $field ) + 1, $INK_MIN_ALIGN );' . "\n"; } } $dyna_code .= 'return 1;' . "\n" .'};' . "\n"; return $dyna_code; } =head1 AUTHOR Stephane Bagneris, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Apache::TrafficServer::BinLog You can also look for information at: =over 4 =item * RT: CPAN's request tracker (report bugs here) L =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * Search CPAN L =back =head1 ACKNOWLEDGEMENTS =head1 LICENSE AND COPYRIGHT Copyright 2013 Yahoo! inc.. Licensed under the Perl License.; =cut 1; # End of Apache::TrafficServer::BinLog