#!/usr/bin/perl # # File: CallerID.pl # Author: Angus McIntyre # Date: 25.05.1995 # Updated: 21.03.2001 # # A script to find out as much information as possible about a # remote user. This displays information to the user and, # optionally, mails the results to a specified mail address # or writes results to a logfile. The script works by inspecting # environment variables set by the server. Optionally, it may also # try to generate additional info by calling 'gethostbyaddr(3)' or # executing a 'finger(1)' command. # # Care has been taken to try to make this script secure. However, # use only at your own risk, and do not remove the code for # handling potentially tainted user input. # # The script can be customized by providing a configuration file # or, less securely, by passing arguments on the command line. The # script gets its settings in three ways: from default values, from # the URL that invokes it, and from a configuration file. Settings # passed as part of the URL will override defaults; settings in the # configuration file will override both the defaults and any # settings passed in the URL. (This is a security feature: it # ensures that if the owner of the script wants it to work in a # certain way, all they need to do is specify values in the # configuration file). # # The latest version of this script is available from: # # http://www.raingod.com/ # # ----------------------------------------------------------------- # REVISION HISTORY # # 21.03.2001 SLAM Removed tests to check for 'supercookies', # on the grounds that they practically don't # exist. # 20.03.2001 SLAM Added code to report on cookies. # 09.03.2001 SLAM Added OSMapping for Windows NT 5.0, which is # another name for Windows 2000 (suggested by # Karl Babcock). # 21.12.2000 SLAM Added 'REQUEST_URI' to the data that can be # recorded by the script (suggested by Mark # Baxter). # 05.07.1999 SLAM Complete rewrite, adding more sophisticated # information analysis and logging features. # 20.07.1996 SLAM Finally eliminated the unneeded 'require' # for 'ctime.pl'. # 02.06.1996 SLAM Fixed bug in file-logging which caused # rubbish to get written instead of the # correct access time on the log file. # 31.12.1995 SLAM Extensively overhauled to allow use of a # configuration file and logging options. # 28.12.1995 SLAM Added code to perform a 'gethostbyaddr' call # in the hope of finding out more about the # remote host. Added 'referer' to the list of # information returned. # 25.05.1995 SLAM Initial release. # # ----------------------------------------------------------------- # LEGAL NOTICE # # This script may be freely copied, distributed and modified. Use # of the script is at the risk of the user. The script is presented # "as-is" without any warranty, and the author is not liable for # any loss or damages arising out of the use of or failure to use # this script. This notice must appear in any modified copy of the # script in which the name of the original author also appears. # ----------------------------------------------------------------- # ----------------------------------------------------------------- # USER-EDITABLE GLOBAL CONSTANTS # ----------------------------------------------------------------- # These global constants provide default values. They can be # edited if necessary, but in general you would do better to use # a configuration file to customize the behaviour of the script. # $DEFAULTDEBUGMODE # # Set this constant to 1 if you want to see error messages # generated by the script. $DEFAULTDEBUGMODE = 0; # $DEFAULTREPORTADDRESS # # This is the address to which output from the script should # be mailed. You can set this to your mail address (but again, # you'd do better to use a configuration file). $DEFAULTREPORTADDRESS = ""; # $DEFAULTLOGFILE # # The default name of the log file to be written to. This # currently defaults to empty, forcing you to use a configuration # file to specify the name of the log file. If you can't use a # configuration file, you can specify the full path to the log # file here. $DEFAULTLOGFILE = ""; # $DEFAULTLOGFORMAT # # The format in which the log file should be written. The format # is described by a string containing upper and lower-case letters. # Each letter stands for one piece of information that can be written # to the log. The mapping between letters (keys) and information is # fixed by the FormatKey table which you can find documented lower # down in this file. The default pattern below writes: # # t - Time in ISO 8601 format. # h - Host (domain name or IP address) # a - Address (IP address) # b - Browser (name and version of browser) # o - OS (name and version of operating system) # u - User (remote identity or authenticated username) # p - Proxy (name and version of proxy software used if any) # g - Gateway (address of proxy server used if any) # f - Fingerable (could the user be fingered?) # # See below for more options. $DEFAULTLOGFORMAT = "thaboupgf"; # $DEFAULTLOGSIZE # # The default size of the log file. When the log file exceeds # this size, it will be rolled over. $DEFAULTLOGSIZE = 16384; # $DEFAULTCONFIGFILE # # Name of the default configuration file. This will always be # a file located in the same directory as the script, so it # should consist only of a filename. $DEFAULTCONFIGFILE = "CallerID.cfg"; # $DEFAULTMAILER # # The default mail program to be used to send mail reports. # This is usually 'sendmail'. $DEFAULTMAILER = "/usr/lib/sendmail"; # ----------------------------------------------------------------- # NON-EDITABLE GLOBAL CONSTANTS # ----------------------------------------------------------------- # These constants contain no user-serviceable parts. You're very # much advised not to mess with them unless you're sure that you # know what you're doing. Incautious tinkering can stop the script # working, or expose your server to intrusions. # $LOGFILEPATTERN # # This pattern is used as a check on the pathname given for the # log file. Any attempt to write a log file that doesn't match # this pattern will fail. This is a security measure required # to prevent attackers passing in the names of essential system # files in an attempt to get them overwritten. # # The default setting requires that the log file be written to # a file whose name ends with 'Log.txt', and which is stored in # a directory called 'logs'. $LOGFILEPATTERN = qq|logs/[A-Za-z]+\.Log.txt|; # @ALLOWABLEARGUMENTS # # A list of the names of those settings which may be passed to the # script as arguments. Settings not in this list may not be set # as part of the URL that invokes the script, for fear of security # risks. # # 'logfile' is allowed because it provides a way to write data to # different log files for different invocations of the script. # This should be secure, because the log file is checked before use. # # 'format' is allowed, because it provides a way to customize the # format written to the log. Someone malicious could do a small # amount of harm by telling the script to write data in the wrong # format (which might choke programs subsequently sent to analyze # the log), but you can protect against that by specifying the # format you want in the configuration file (which overrides any # values passed as part of the URL). # # 'config' is allowed, because it provides a way to specify # different possible configuration files. This is secure, because # the configuration file is checked to ensure that it's in the # same directory as the script. If you want to ensure that the # user can't change the configuration file used, you can remove # this item from the ALLOWABLEARGUMENTS list. # # 'gethost' is allowed, because it's basically harmless. It # doesn't use lots of resources and can't be used to do any # damage. # # 'redirect' is allowed because it provides a way to redirect # the user to another page. This has no security implications. # # 'debug' is allowed because it's needed when testing the # script. # # 'mode' is *not* allowed, because it provides a way to cause # the script to send mail. # # 'address' is *not* allowed, because it provides a way to cause # the script to send mail to someone who may not want it. # # 'finger' and 'fingerall' are *not* allowed, because they cause # the script to use external system resources. # # 'logsize' is *not* allowed, because it would provide external # users with a way of clearing the logs (set 'logsize' to 0). # # 'rolloversize' is not allowed because there doesn't seem to be # any need for it. # # 'mailer' is *not* allowed, because setting this via a URL would # allow remote users to run arbitrary programs. @ALLOWABLEARGUMENTS = ( 'logfile', 'logformat', 'config', 'gethost', 'redirect', 'debug' ); # %OSMappings # # These mappings are used by the routines that identify user # agents in order to determine which OS is in use. Because # user agents (that's 'browsers' to you) are hopelessly # inconsistent when it comes to describing which OS they use, # we need some extra information in order to puzzle it out. # # This list is probably adequate for the present. The format # is: # # $OSMappings{} = "[,]"; # # The '' part is often optional, and is primarily used # to distinguish between different flavours of the same OS (i.e. # MacOS PPC vs. 68K). I choose to treat the different flavours # of Windows as distinct OS's with no minor versions. If this # bothers you, you can edit the table. %OSMappings = (); $OSMappings{"Win16"} = "Windows 3.x"; $OSMappings{"Win32"} = "Windows 3.x"; $OSMappings{"Win95"} = "Windows 95"; $OSMappings{"Win98"} = "Windows 98"; $OSMappings{"Win2000"} = "Windows 2000"; $OSMappings{"WinNT"} = "Windows NT"; $OSMappings{"Mac_PowerPC"} = "Macintosh,PPC"; $OSMappings{"Mac_PPC"} = "Macintosh,PPC"; $OSMappings{"Mac_68K"} = "Macintosh,68K"; $OSMappings{"AmigaOS"} = "AmigaOS"; $OSMappings{"BeOS"} = "BeOS"; $OSMappings{"Windows 3"} = "Windows 3.x"; $OSMappings{"Windows NT"} = "Windows NT"; $OSMappings{"Windows 95"} = "Windows 95"; $OSMappings{"Windows 98"} = "Windows 98"; $OSMappings{"Windows 2000"} = "Windows 2000"; $OSMappings{"Windows NT 5.0"} = "Windows 2000"; $OSMappings{"WebTV"} = "WebTV"; # $OSPatterns # # A pattern, compiled from the data in the table above and used # for matching entries in the browser's identifier string. $OSPatterns = join("|",keys %OSMappings); # $GETHOSTADDRESSTYPE # # The address type for doing 'gethostbyaddr' lookups is currently # always AF_INET, which is to say 2. $GETHOSTADDRESSTYPE = 2; # $FormatKey # # The format key table translates from the single character identifiers # used in the 'logformat' and 'mailformat' settings to a longer key which # is used to look up information in the 'info' array. # # Keys with lowercase letters are recommended for general use; keys with # uppercase letters indicate more specialised information, or information # that may not go well in the log files (i.e. the finger text). In many # cases, lowercase key information is produced by combining two items # represented by uppercase keys, thus 'b' ('browser') is equivalent to # 'B' ('browsername') and 'V' (browserversion). %FormatKey = (); $FormatKey{'u'} = 'user'; # User (e.g. 'user' || 'identity') $FormatKey{'a'} = 'address'; # IP address (e.g. '101.102.103.104') $FormatKey{'h'} = 'host'; # Host (e.g. 'bar.foo.com' || IP address) $FormatKey{'b'} = 'browser'; # Browser (e.g. "MSIE 3.01") $FormatKey{'o'} = 'os'; # Operating System (e.g. "Windows 95") $FormatKey{'r'} = 'referer'; # Referer (e.g. "http://www.foo.com/") $FormatKey{'p'} = 'proxy'; # Proxy software (e.g. "Squid 2.0.1") $FormatKey{'g'} = 'gateway'; # Proxy gateway (e.g. "proxy.bar.com") $FormatKey{'t'} = 'time'; # Time in ISO 8601 (e.g. '1999-02-18T020:57:57+00:00') $FormatKey{'f'} = 'finger'; # Can be fingered? (e.g. 0, 1 or 2) $FormatKey{'l'} = 'languages'; # Languages (e.g. 'en', 'fr'); $FormatKey{'c'} = 'cookies'; # Cookies (e.g. 'counter=17'); $FormatKey{'U'} = 'username'; # Authenticated user (e.g. 'admin') $FormatKey{'I'} = 'identity'; # Remote identity (e.g. 'joe') $FormatKey{'H'} = 'hostname'; # Hostname (e.g. 'bar.foo.com' || "") $FormatKey{'B'} = 'browsername'; # Browser name (e.g. "Mozilla") $FormatKey{'V'} = 'browserversion'; # Browser version (e.g. "3.01Gold") $FormatKey{'O'} = 'osname'; # OS name (e.g. "Linux") $FormatKey{'E'} = 'osversion'; # OS version (e.g. "2.5.1") $FormatKey{'G'} = 'gatewayaddress'; # IP of proxy gateway (e.g. '123.123.213.213') $FormatKey{'P'} = 'gatewayport'; # Port number of proxy gateway (e.g. 8080) $FormatKey{'M'} = 'maybehost'; # Possible hostname of proxy'd host. $FormatKey{'T'} = 'universaltime'; # universal time in seconds (e.g. 21213142) $FormatKey{'F'} = 'fingertext'; # Text returned by 'finger'; $FormatKey{'R'} = 'requesturi'; # Actual resource requested by user # ----------------------------------------------------------------- # PARAMETERS # ----------------------------------------------------------------- # Enable unbuffered output $|=1; # ----------------------------------------------------------------- # GLOBALS # ----------------------------------------------------------------- # %setting - array used to store configuration settings %setting = (); # ----------------------------------------------------------------- # TOP-LEVEL # ----------------------------------------------------------------- # Try to run the script and report an error if it fails for any # reason. eval("&main"); if ($@) { print <<"EndOfText"; Content-type: text/plain The script 'CallerID.pl' could not execute because the error: $@ occurred. EndOfText } # ----------------------------------------------------------------- # MAIN ROUTINE # ----------------------------------------------------------------- # main # # Main routine for the program. sub main { # Initialize settings to defaults; pick up arguments from the # command line, and then read the configuration file. initialize_settings(); read_arguments(); read_configuration_file(); # If the script is set to redirect to another Web page, call # the redirection routine. Otherwise, output the head of the # page (to give the remote browser something to chew on while # we're calculating the user information). if ($setting{"redirect"}) { issue_redirection($setting{"redirect"}); } else { output_page_head(); } # Work out everything we can about the user and - if we're # not running in redirect mode - print it out. get_caller_info(); output_page_body() if (!$setting{"redirect"}); # Do any necessary logging. write_log() if ($setting{'mode'} =~ /log/); send_mail() if ($setting{'mode'} =~ /mail/); } # initialize_settings # # Set up the settings array with some default values. sub initialize_settings { $setting{'mode'} = "log"; $setting{'address'} = $DEFAULTREPORTADDRESS; $setting{'logfile'} = $DEFAULTLOGFILE; $setting{'logformat'} = $DEFAULTLOGFORMAT; $setting{'mailformat'} = $DEFAULTLOGFORMAT; $setting{'gethost'} = 1; $setting{'finger'} = 1; $setting{'fingerall'} = 1; $setting{'logsize'} = $DEFAULTLOGSIZE; $setting{'rolloversize'} = int($DEFAULTLOGSIZE / 2); $setting{'config'} = $DEFAULTCONFIGFILE; $setting{'redirect'} = ""; $setting{'mailer'} = $DEFAULTMAILER; $setting{'debug'} = $DEFAULTDEBUGMODE; } # read_arguments # # Some of the settings used to control the script may be # passed to it as part of a URL. Any settings read from the # URL in this way will override the basic default settings. # The global '@ALLOWABLEARGUMENTS' indicates which settings # can be given as arguments. For reasons of either security # or simplicity, not all settings can be specified via the # URL. However, you can always specify a configuration file, # and the configuration file may contain values for any # setting. sub read_arguments { local(%input); read_cgi_input(*input); foreach (@ALLOWABLEARGUMENTS) { $setting{$_} = $input{$_} if $input{$_}; } } # read_configuration_file # # Read settings from a configuration file. This file # will only be read if it actually exists. If the named # file can't be found or can't be read, it will be ignored. # # The configuration file should consist of a set of entries # of the form 'set '. Any line that doesn't # match this form will be ignored, so you can put in blank # lines and comments. # # Note that for security reasons, configuration files are # required to be in the same directory as the script and # to have the form '.cfg', e.g. 'CallerID.cfg'. # Only alphanumeric characters are allowed in the name of # the configuration file. This is to prevent people trying # to make the script read files that it shouldn't read. An # unacceptable filename or a configuration file that couldn't # be read will be reported via the debugging system if debugging # has been switched on. sub read_configuration_file { my($name,$value); if ($setting{'config'} =~ /[A-Za-z0-9]+\.cfg/) { if (open(CONFIG,$setting{'config'})) { while() { chomp; ($name,$value) = ($_ =~ /^\s*set\s+([^\s]+)\s+(.*)$/); $setting{$name} = $value if ($name); } close(CONFIG); } else { debug("can't read from '" . $setting{'config'} . "': $!"); } } else { debug("bad configuration filename: '" . $setting{'config'} . "'") if $setting{'config'}; } } # get_caller_info # # Get information about the user that called this script. For # the most part, this simply involves copying data from the # environment variables to our own array. sub get_caller_info { $info{'username'} = $ENV{"REMOTE_USER"}; $info{'identity'} = $ENV{"REMOTE_IDENT"}; $info{'hostname'} = $ENV{"REMOTE_HOST"}; $info{'address'} = $ENV{"REMOTE_ADDR"}; $info{'browser'} = $ENV{"HTTP_USER_AGENT"}; $info{'languages'} = $ENV{"HTTP_ACCEPT_LANGUAGE"}; $info{'requesturi'} = $ENV{"REQUEST_URI"}; $info{'cookies'} = $ENV{"HTTP_COOKIE"}; # If we don't have a valid hostname, and the configuration # allows us to use 'get_hostbyaddr', then do a lookup to # determine the real hostname. if (($info{'hostname'} eq '') && $setting{'gethost'}) { $info{'hostname'} = get_hostname($info{'address'}); } # Compute the 'user' and 'host' info as secondary values. # The 'user' is either the 'username' (the name by which # they logged onto this site) or their 'identity' (as # revealed by any 'identd' daemons running on their host). # Similarly, the 'host' is either a fully-qualified domain # name or, if one couldn't be found, an IP address. $info{'user'} = $info{'username'} || $info{'identity'}; $info{'host'} = $info{'hostname'} || $info{'address'}; # Analyze the user's browser ($info{'browsername'}, $info{'browserversion'}, $info{'osname'}, $info{'osversion'}, $info{'proxy'}) = user_agent_details($ENV{'HTTP_USER_AGENT'}); $info{'browser'} = $info{'browsername'} . " " . $info{'browserversion'}; $info{'os'} = $info{'osname'} . " " . $info{'osversion'}; # If the user's coming in via a proxy gateway, we need to shuffle # some of the information around. if ($info{'proxy'} || $ENV{'HTTP_VIA'} || $ENV{'HTTP_X_FORWARDED_FOR'}) { # If we have 'HTTP_VIA', then we inspect 'HTTP_VIA' to # find out all about the proxy we're dealing with. if ($ENV{'HTTP_VIA'}) { ($info{'gateway'},$info{'gatewayport'},$info{'proxy'}) = $ENV{'HTTP_VIA'} =~ /^[^\s]+\s([\w\.\-]+):?([0-9]*)\s\(?([^\)]*)\)?$/; } # If we don't have the name of the gateway server, then we can # get it from the host. The address passed is actually going to # be that of the gateway anyway. $info{'gateway'} = $info{'host'} unless ($info{'gateway'}); $info{'gatewayaddress'} = $info{'address'}; # If we're dealing with a proxy, then we almost certainly won't have # the real name of the host, but we might be able to get something # from the 'HTTP_FORWARDED_FOR' environment variable. $info{'hostname'} = $info{'host'} = $info{'address'} = ""; if ($ENV{'HTTP_X_FORWARDED_FOR'}) { ($info{'address'}) = ($ENV{'HTTP_X_FORWARDED_FOR'} =~ m|^(\d+\.\d+\.\d+\.\d+)|); $info{'hostname'} = $ENV{'HTTP_X_FORWARDED_FOR'} unless $info{'address'}; } $info{'host'} = $info{'hostname'} || $info{'address'}; # If we have a valid IP address, we can try to work out the host that # goes with it, but this must be considered extremely tentative. $info{'maybehost'} = get_hostname($info{'address'}) if ($info{'address'} && $setting{'gethost'}); } # Analyze the user's cookies analyze_cookies(); # Compute some additional information. $info{'referer'} = $ENV{'HTTP_REFERER'}; $info{'universaltime'} = time; $info{'time'} = format_timestamp_iso8601(time); # If we are allowed to use 'finger', go ahead and try to get # some more information about the user that way. if ($setting{'finger'}) { ($info{'finger'},$info{'fingertext'}) = finger_remote_user($info{'identity'}, $info{'host'}, $info{'proxy'}); } } # analyze_cookies # # Inspect the HTTP_COOKIE variable to see what cookies the user has # picked up. sub analyze_cookies { my($key,$value); foreach (split(/; /, $ENV{'HTTP_COOKIE'})) { s/\+/ /g; ($key,$value) = split(/=/,$_,2); $key =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $value =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge; $cookies{$key} .= $value . "\t"; } } # output_page_head # # Output the first part of the page. We want to send this as soon # as possible, so that the user knows that something's happening. sub output_page_head { print "Content-type: text/html\n\n"; print < CallerID Results

CallerID Results

Hello. This is what CallerID has been able to figure out about you.

EndOfHTML } # output_page_body # # Send the user back some information to let them know what we # know about them. sub output_page_body { print <You're using $info{'browser'}, and your operating system is $info{'os'}.

EndOfHTML # Give some information about the languages recognised by the # user. It would be classier if we wrote the proper names of # the languages, rather than just the codes, but this would # probably require a separate file with a lookup table. if ($info{'languages'}) { my($languagecount) = scalar(split(/,/,$info{'languages'})); print <You say that you can understand $languagecount language(s), identified by the following code(s): $info{'languages'}.

EndOfHTML } # Let's get in a little dig at Adobe for abusing the standard. # What happens is that when you install Acrobat, it adds 'pdf' # to the list of languages that your browser accepts - which is # lunacy, because 'pdf' is not a recognised language code, for # the simple reason that PDF is not and never has been a human # language. Presumably they do this because they don't have a # way to get the browser to include PDF in its HTTP_ACCEPT # line (which would be the proper place for it). And it's # clearly *so* important that they should be able to identify # people who can read PDFs, that they decided to screw with the # standard by adding their code to the HTTP_ACCEPT_LANGUAGE # line instead. This kind of standards abuse is worthy of # Microsoft: someone needs to be very severely punished for # this, just to set an example. if ($info{'languages'} =~ /pdf/) { print <To judge from the fact that you say you speak 'PDF', you've installed Acrobat Reader from Adobe, and their idiot installer has messed with your 'accept-languages' setting.

EndOfHTML } # Give their cookies a going-over. if ($ENV{'HTTP_COOKIE'}) { print <Hmm. You seem to have picked up some cookies. Let's have a look at those.

EndOfHTML my($values); foreach (sort keys %cookies) { $values = $cookies{$_}; print "" . ""; } print <

Of course, I can only see cookies that are sent to this server (which ought to mean that they were issued by this server). You may have other cookies, of course, but I don't get to see those.

EndOfHTML } # Describe the situation with respect to host and IP address. # There are a wide variety of possible report forms here, # dependent on how much information we have. if ($info{'hostname'} && $info{'address'}) { print <Your host is called '$info{'hostname'}' and its IP address is $info{'address'}.

EndOfHTML } else { if ($info{'hostname'}) { print <Your host is called '$info{'hostname'}'.

EndOfHTML } else { if ($info{'address'}) { print <Your host's name couldn't be worked out, but its IP address is $info{'address'}.

EndOfHTML } else { print <Nothing could be determined about the identity of your host.

EndOfHTML } } } # Tell them what we know about their proxy. if ($info{'gateway'}) { print <You're using a proxy server which is located at $info{'gateway'} ($info{'gatewayaddress'}). EndOfHTML # What software is the proxy running? if ($info{'proxy'}) { print <might be called '$info{'maybehost'}', but we can't be sure about that. EndOfHTML } print "

"; } # And where did they come from? if ($info{'referer'}) { print <The Web page that called this script is located at $info{'referer'}.

EndOfHTML } # And finally, if we have any finger information to # report, this is where we show it. if ($setting{'finger'}) { SWITCH: { # If 'finger' didn't work, say why. if ($info{'finger'} == 0) { print <CallerID couldn't get any information via 'finger' because $info{'fingertext'}.

EndOfHTML last SWITCH; } # If we were able to get general information # via 'finger', tell them about that. if ($info{'finger'} == 1) { print <CallerID used 'finger' to determine the following information about your host.

$info{'fingertext'}
EndOfHTML last SWITCH; } # If we were able to finger a specific user, show # the results here. if ($info{'finger'} == 2) { print <CallerID used 'finger' to determine the following information about you.

$info{'fingertext'}
EndOfHTML last SWITCH; } } } # Debugging information on request. if ($setting{'debug'}) { print "

Information

CookieValue 
$_$cookies{$_}
\n"; foreach (sort keys %info) { print "\n"; } print "
$_ $info{$_}
\n"; print "

Settings

\n"; foreach (sort keys %setting) { print "\n"; } print "
$_ $setting{$_}
\n"; print "

Environment

\n"; foreach (sort keys %ENV) { print "\n"; } print "
$_ $ENV{$_}
\n"; } # And close the page. print <
EndOfHTML } # get_hostname # # Try to determine the name of the host from the IP address. # This function actually returns the host name, its aliases, # its type, length, and a list of addresses, but for most # purposes all but the first result can be ignored. sub get_hostname { my($address) = shift; my($packedaddress) = pack("C4",split(/\./,$address)); gethostbyaddr($packedaddress,$GETHOSTADDRESSTYPE); } # finger_remote_user # # Try to find out something about the remote user via 'finger'. We need # to be very careful here, as there's a system call involved, and hackers # could try to use this script to execute commands on our server. The # function will carefully check the user input to make sure that it # contains nothing that could hurt us before calling 'finger'. sub finger_remote_user { my($user,$host,$proxy) = @_; my($code,$text,$linecount); SWITCH: { # Check for the obvious error cases first. $code = 0, $text = "'$host' is behind a proxy server", last SWITCH if $proxy; $code = 0, $text = "the remote user is unknown", last SWITCH if (!$user && !$setting{'fingerall'}); $code = 0, $text = "the remote host is unknown", last SWITCH if (!$host); # Compose the finger string and untaint it. my($tainted) = $user . "\@" . $host; $tainted =~ /^([@\-\w.]*)$/; my($untainted) = $1; # Reject anything that poses a security risk (as shown by the # fact that the string couldn't be untainted). $code = 0, $text = "it would be dangerous to finger '$tainted'", last SWITCH if !$untainted; # Go ahead and try to finger. Open a pipe to the 'finger' # application and gather up the results. if (open(FINGER,"finger $untainted|")) { $text = ""; $linecount = 0; while() { $text .= $_; $linecount++; } close(FINGER); # If we got some text from 'finger', set up a code # based on whether we did 'finger' on an individual user # or on the remote host as a whole. if ($linecount > 2) { $code = ($user ? 2 : 1); } # If we've got nothing, set up to report appropriately. else { $code = 0; $text = "'$host' did not respond"; } } # If we couldn't open 'finger', report that. else { $code = 0; $text = "'finger' could not be opened."; } } # And return the results return ($code,$text); } # write_log # # Write our report to the log file. Note that if the logfile can't be # written to, the script will fail silently (unless 'debug' is on, in # which case it will fail noisily, as intended). sub write_log { if (($setting{'logfile'} =~ m|^$ENV{'DOCUMENT_ROOT'}|) && ($setting{'logfile'} =~ m|${LOGFILEPATTERN}|)) { if (open(LOGFILE,">>" . $setting{'logfile'})) { my(@keys) = split(//,$setting{'logformat'}); my(@data) = map($info{$FormatKey{$_}},@keys); print LOGFILE join("\t",@data), "\n"; close(LOGFILE); roll_log_if_needed(); } else { debug("can't write to '" . $setting{'logfile'} . "': $!"); } } else { debug("illegal logfile name '" . $setting{'logfile'} . "'"); } } # roll_log_if_needed # # If the log has grown beyond a certain size, roll it over by copying # just the tail end of the log. # # Note an issue here: if this script is getting hammered hard, a second # request may come in while the log file is being rolled over. If this # happens, information could be lost (because one process is trying to # write to the log file at the same moment that another is trying to # roll it over). We could probably work around this with file locking, # but it doesn't really seem worth it. If you're using CallerID in a # heavy use scenario, you're probably using it the wrong way somehow. sub roll_log_if_needed { my($logsize) = -s $setting{'logfile'}; if ($logsize > $setting{'logsize'}) { my($tmpname) = $setting{'logfile'} . ".tmp"; my($offset) = $logsize - $setting{'rolloversize'}; my($count) = 0; my($bytes,$buffer); if (open(LOGFILE,$setting{'logfile'})) { while((tell(LOGFILE) < $offset) && ) {} $bytes = read(LOGFILE,$buffer,$setting{'rolloversize'}); close(LOGFILE); unless ($bytes) { debug("nothing read from log file '" . $setting{'logfile'} . "': $!"); } if ($bytes && open(LOGFILE,">" . $setting{'logfile'})) { print LOGFILE $buffer; close(LOGFILE); } else { debug("can't write to log file '" . $setting{'logfile'} . "': $!") }; } else { debug("can't read from log file '" . $setting{'logfile'} . "': $!"); } } } # send_mail # # Send a report to the user by mail. sub send_mail { if (open(MAILPIPE, "|" . $setting{'mailer'} . " " . $setting{'address'})) { # Start by writing the message headers, followed by a set of # comments explaining where the message came from. print MAILPIPE < Redirection

Redirection

It appears that your browser cannot handle redirections automatically. You can proceed to the randomly-selected page by clicking here.

EndOfHTML } # read_cgi_input # # Get the data provided by either a GET or POST and write the data # to the associative array indicated. This is provided for the # sake of completeness. The correct thing to do is, of course, to # use the standard 'CGI-lib.pl' library or a variant, and 'require' # it at the beginning of the script, rather than laboriously # 'rolling your own' each time. sub read_cgi_input { local(*data) = @_; local($buffer); # Get the form variable information as required according to the # method in use. This allows the function to handle both GET # and POST data. if ($ENV{"REQUEST_METHOD"} eq "POST") { read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); } elsif ($ENV{"REQUEST_METHOD"} eq "GET") { $buffer = $ENV{'QUERY_STRING'}; } # Split the name-value pairs, decode the Web-encoded values and # assign them to entries in the input associative array. @pairs = split(/&/, $buffer); foreach $pair (@pairs) { ($name, $value) = split(/=/, $pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $data{$name} = $value; } } # user_agent_details # # User agents are extraordinarily inconsistent when it comes to # identifying themselves. This function does its best to figure # out what it's dealing with. It works well for most of the # common cases, but may fail on the more esoteric varieties. # # The function is called with an identifier string taken from # HTTP_USER_AGENT. It returns the browser's codename, version, # operating system, operating system version, and proxy software # (blank if none). Some values returned may be blank or "unknown". # # The main division is between Mozilla (aka Netscape Navigator) # and all the browsers that declare themselves to be compatible # with it (most often Microsoft Internet Explorer). Mozilla's # format for HTTP_USER_AGENT and the MSIE 'compatible' format # are relatively consistent, but other browsers may be less easy # to deal with. sub user_agent_details { my($identifier) = shift; my($agent,$proxy) = ($identifier =~ /^(.*) (via .*)$/); $agent = $identifier if (!$agent); my($navigator,$version,$os,$osversion); # Start by handling WebTV, which (of course) has its own bizarre # way of formatting the HTTP_USER_AGENT string. if ($agent =~ /WebTV/) { ($navigator,$version) = ($agent =~ m|(WebTV)[ /]([\w\.]+)|); $os = "WebTV"; } else { # Handle 'compatibles' (i.e. typically MSIE) first. Anything # that has 'compatible' in its name is probably returning a fake # browser as the first item in the string, but contains the real # information in parentheses thereafter. MSIE typically gives a # single token to identify the user interface and OS, which we # may then need to split into OS and version. if ($agent =~ /compatible/) { ($navigator,$version) = ($agent =~ m|compatible;\s+([\w]+)[ /]([\w\.]+);|); my($ui) = ($agent =~ m|($OSPatterns)|); if (!$ui) { ($ui) = ($agent =~ m|compatible; [^;]+; ([\w\s]+)|); } ($os,$osversion) = retrieve_os_details($ui); } # Process stuff that looks conformant. If it doesn't declare itself # as compatible, we assume that the first item in the string is the # identifier for the real navigator. We may or may not be able to # get straight to the OS version. else { ($navigator,$version) = ($agent =~ m|([\w\s]+)[ /]([\w\.]+)|); my($ui,$subtype) = ($agent =~ m|\(([^;]+);*[^;]*;?([^\)]*)\)|); SWITCH: { # If the UI is identified as X11, the operating system and # its version can be extracted simply enough from the subtype. ($os,$osversion) = ($subtype =~ m|^\s*([^\s]+)\s+([^\s]+)|), last SWITCH if ($ui =~ /X11/); # If the UI is Macintosh, we report 'Macintosh' as the OS, and # the processor type as the OS version. Note that the data for # processor type has to be massaged a little, as Netscape's # browsers occasionally write odd stuff in that field. $os = $ui, ($osversion) = ($subtype =~ m|^\s*(\w+)|), last SWITCH if ($ui eq "Macintosh"); # Otherwise, we hunt through a list of known OS's. ($os,$osversion) = retrieve_os_details($ui); } } } # Clean up information and then give it back; $proxy =~ s/(via proxy gateway|via)\s+//; $navigator = "unknown" unless $navigator; $os = "unknown" unless $os; return ($navigator,$version,$os,$osversion,$proxy); } # retrieve_os_details # # Get the details of the operating system identified, or return # unknown if we weren't able to infer anything that looked like # it could be an operating system. This function is passed a # token extracted from HTTP_USER_AGENT, and performs a lookup # in a table defined earlier. sub retrieve_os_details { my($token) = shift; my($match) = ($token =~ m|($OSPatterns)|); return split(",",$OSMappings{$match} || "unknown"); } # format_timestamp_iso8601 # # Write a time in a relatively readable format based on the # ISO8601 standard. This has the advantage of being easy for # both humans and machines to read. sub format_timestamp_iso8601 { my($time) = shift; my($sec,$min,$hour,$mday,$mon,$year) = gmtime($time); $year += 1900; $mon++; sprintf("%04d-%02d-%02dT%02d:%02d:%02d+00:00", $year,$mon,$mday,$hour,$min,$sec); } # debug # # A debugging utility. If debug flag is on, calling this function # causes the script to die and report whatever caused it to die. # If the flag is not set, the function does nothing. sub debug { my($message) = shift; if ($setting{'debug'}) { die $message; } }