#!/usr/bin/perl # # File: Judas.pl # Author: Angus McIntyre # Date: 26.08.2000 # Updated: 26.08.2000 # # A script to display an image while secretly recording information # about the remote user. Information gathered may be mailed to a # specified mail address or written to a logfile. The script works # by inspecting environment variables set by the server. # Care has been taken to try to make this script secure. However, # use only at your own risk, and do not remove the code for # handling potentially tainted user input. # # The script can be customized by providing a configuration file # or, less securely, by passing arguments on the command line. The # script gets its settings in three ways: from default values, from # the URL that invokes it, and from a configuration file. Settings # passed as part of the URL will override defaults; settings in the # configuration file will override both the defaults and any # settings passed in the URL. (This is a security feature: it # ensures that if the owner of the script wants it to work in a # certain way, all they need to do is specify values in the # configuration file). # # The latest version of this script is available from: # # http://www.raingod.com/ # # ----------------------------------------------------------------- # REVISION HISTORY # # 20.03.2001 SLAM Added facility to log user cookies. # 09.03.2001 SLAM Delta'd in user-suggested changes from # CallerID (new OSMapping and format info). # 26.08.2000 SLAM New version implemented, using code drawn # from the old 'Judas.pl' and from the # new version of 'CallerID.pl'. # ----------------------------------------------------------------- # LEGAL NOTICE # # This script may be freely copied, distributed and modified. Use # of the script is at the risk of the user. The script is presented # "as-is" without any warranty, and the author is not liable for # any loss or damages arising out of the use of or failure to use # this script. This notice must appear in any modified copy of the # script in which the name of the original author also appears. # ----------------------------------------------------------------- # ----------------------------------------------------------------- # USER-EDITABLE GLOBAL CONSTANTS # ----------------------------------------------------------------- # These global constants provide default values. They can be # edited if necessary, but in general you would do better to use # a configuration file to customize the behaviour of the script. # $DEFAULTDEBUGMODE # # Set this constant to 1 if you want to see error messages # generated by the script. $DEFAULTDEBUGMODE = 0; # $DEFAULTREPORTADDRESS # # This is the address to which output from the script should # be mailed. You can set this to your mail address (but again, # you'd do better to use a configuration file). $DEFAULTREPORTADDRESS = ""; # $DEFAULTLOGFILE # # The default name of the log file to be written to. This # currently defaults to empty, forcing you to use a configuration # file to specify the name of the log file. If you can't use a # configuration file, you can specify the full path to the log # file here. $DEFAULTLOGFILE = ""; # $DEFAULTLOGFORMAT # # The format in which the log file should be written. The format # is described by a string containing upper and lower-case letters. # Each letter stands for one piece of information that can be written # to the log. The mapping between letters (keys) and information is # fixed by the FormatKey table which you can find documented lower # down in this file. The default pattern below writes: # # t - Time in ISO 8601 format. # h - Host (domain name or IP address) # a - Address (IP address) # b - Browser (name and version of browser) # o - OS (name and version of operating system) # u - User (remote identity or authenticated username) # p - Proxy (name and version of proxy software used if any) # g - Gateway (address of proxy server used if any) # # See below for more options. $DEFAULTLOGFORMAT = "thaboupg"; # $DEFAULTLOGSIZE # # The default size of the log file. When the log file exceeds # this size, it will be rolled over. $DEFAULTLOGSIZE = 16384; # $DEFAULTCONFIGFILE # # Name of the default configuration file. This will always be # a file located in the same directory as the script, so it # should consist only of a filename. $DEFAULTCONFIGFILE = "Judas.cfg"; # $DEFAULTMAILER # # The default mail program to be used to send mail reports. # This is usually 'sendmail'. $DEFAULTMAILER = "/usr/lib/sendmail"; # $PATHNAMESEPARATOR # # The character which is used to separate elements in system # pathnames. For UNIX boxes, this is always '/'. For Macintosh, # it's ':'. For Windows, it's probably '\\'. $PATHNAMESEPARATOR = "/"; # ----------------------------------------------------------------- # NON-EDITABLE GLOBAL CONSTANTS # ----------------------------------------------------------------- # These constants contain no user-serviceable parts. You're very # much advised not to mess with them unless you're sure that you # know what you're doing. Incautious tinkering can stop the script # working, or expose your server to intrusions. # $LOGFILEPATTERN # # This pattern is used as a check on the pathname given for the # log file. Any attempt to write a log file that doesn't match # this pattern will fail. This is a security measure required # to prevent attackers passing in the names of essential system # files in an attempt to get them overwritten. # # The default setting requires that the log file be written to # a file whose name ends with 'Log.txt', and which is stored in # a directory called 'logs'. $LOGFILEPATTERN = qq|logs/[A-Za-z]+Log\.txt|; # @ALLOWABLEARGUMENTS # # A list of the names of those settings which may be passed to the # script as arguments. Settings not in this list may not be set # as part of the URL that invokes the script, for fear of security # risks. # # 'logfile' is allowed because it provides a way to write data to # different log files for different invocations of the script. # This is secure, because the log file is checked carefully # before it is used. # # 'format' is allowed, because it provides a way to customize the # format written to the log. Someone malicious could do a small # amount of harm by telling the script to write data in the wrong # format (which might choke programs subsequently sent to analyze # the log), but you can protect against that by specifying the # format you want in the configuration file (which overrides any # values passed as part of the URL). # # 'config' is allowed, because it provides a way to specify # different possible configuration files. This is secure, because # the configuration file is checked to ensure that it's in the # same directory as the script. # # 'gethost' is allowed, because it's basically harmless. It # doesn't use lots of resources and can't be used to do any # damage. # # 'debug' is allowed because it's needed when testing the # script. However, the entry in the configuration file will # override it. # # 'mode' is *not* allowed, because it provides a way to cause # the script to send mail. # # 'address' is *not* allowed, because it provides a way to cause # the script to send mail to someone who may not want it. # # 'logsize' is *not* allowed, because it would provide external # users with a way of clearing the logs (set 'logsize' to 0). # # 'rolloversize' is not allowed because there doesn't seem to be # any need for it. # # 'mailer' is *not* allowed, because setting this via a URL would # allow remote users to run arbitrary programs. # # 'documentroot' is *not* allowed, because the point of this # setting is to try to restrict the files served to a particular # area of the server. @ALLOWABLEARGUMENTS = ( 'logfile', 'format', 'config', 'gethost', 'debug', 'image' ); # %OSMappings # # These mappings are used by the routines that identify user # agents in order to determine which OS is in use. Because # user agents (that's 'browsers' to you) are hopelessly # inconsistent when it comes to describing which OS they use, # we need some extra information in order to puzzle it out. # # This list is probably adequate for the present. The format # is: # # $OSMappings{} = "[,]"; # # The '' part is often optional, and is primarily used # to distinguish between different flavours of the same OS (i.e. # MacOS PPC vs. 68K). I choose to treat the different flavours # of Windows as distinct OS's with no minor versions. If this # bothers you, you can edit the table. %OSMappings = (); $OSMappings{"Win16"} = "Windows 3.x"; $OSMappings{"Win32"} = "Windows 3.x"; $OSMappings{"Win95"} = "Windows 95"; $OSMappings{"Win98"} = "Windows 98"; $OSMappings{"Win2000"} = "Windows 2000"; $OSMappings{"WinNT"} = "Windows NT"; $OSMappings{"Mac_PowerPC"} = "Macintosh,PPC"; $OSMappings{"Mac_PPC"} = "Macintosh,PPC"; $OSMappings{"Mac_68K"} = "Macintosh,68K"; $OSMappings{"AmigaOS"} = "AmigaOS"; $OSMappings{"BeOS"} = "BeOS"; $OSMappings{"Windows 3"} = "Windows 3.x"; $OSMappings{"Windows NT"} = "Windows NT"; $OSMappings{"Windows 95"} = "Windows 95"; $OSMappings{"Windows 98"} = "Windows 98"; $OSMappings{"Windows 2000"} = "Windows 2000"; $OSMappings{"Windows NT 5.0"} = "Windows 2000"; $OSMappings{"WebTV"} = "WebTV"; # $OSPatterns # # A pattern, compiled from the data in the table above and used # for matching entries in the browser's identifier string. $OSPatterns = join("|",keys %OSMappings); # $GETHOSTADDRESSTYPE # # The address type for doing 'gethostbyaddr' lookups is currently # always AF_INET, which is to say 2. $GETHOSTADDRESSTYPE = 2; # $FormatKey # # The format key table translates from the single character identifiers # used in the 'logformat' and 'mailformat' settings to a longer key which # is used to look up information in the 'info' array. # # Keys with lowercase letters are recommended for general use; keys with # uppercase letters indicate more specialised information, or information # that may not go well in the log files (i.e. the finger text). In many # cases, lowercase key information is produced by combining two items # represented by uppercase keys, thus 'b' ('browser') is equivalent to # 'B' ('browsername') and 'V' (browserversion). %FormatKey = (); $FormatKey{'u'} = 'user'; # User (e.g. 'user' || 'identity') $FormatKey{'a'} = 'address'; # IP address (e.g. '101.102.103.104') $FormatKey{'h'} = 'host'; # Host (e.g. 'bar.foo.com' || IP address) $FormatKey{'b'} = 'browser'; # Browser (e.g. "MSIE 3.01") $FormatKey{'o'} = 'os'; # Operating System (e.g. "Windows 95") $FormatKey{'r'} = 'referer'; # Referer (e.g. "http://www.foo.com/") $FormatKey{'p'} = 'proxy'; # Proxy software (e.g. "Squid 2.0.1") $FormatKey{'g'} = 'gateway'; # Proxy gateway (e.g. "proxy.bar.com") $FormatKey{'t'} = 'time'; # Time in ISO 8601 (e.g. '1999-02-18T020:57:57+00:00') $FormatKey{'l'} = 'languages'; # Languages (e.g. 'en', 'fr'); $FormatKey{'c'} = 'cookies'; # Cookies (e.g. 'counter=17'); $FormatKey{'U'} = 'username'; # Authenticated user (e.g. 'admin') $FormatKey{'I'} = 'identity'; # Remote identity (e.g. 'joe') $FormatKey{'H'} = 'hostname'; # Hostname (e.g. 'bar.foo.com' || "") $FormatKey{'B'} = 'browsername'; # Browser name (e.g. "Mozilla") $FormatKey{'V'} = 'browserversion'; # Browser version (e.g. "3.01Gold") $FormatKey{'O'} = 'osname'; # OS name (e.g. "Linux") $FormatKey{'E'} = 'osversion'; # OS version (e.g. "2.5.1") $FormatKey{'G'} = 'gatewayaddress'; # IP of proxy gateway (e.g. '123.123.213.213') $FormatKey{'P'} = 'gatewayport'; # Port number of proxy gateway (e.g. 8080) $FormatKey{'M'} = 'maybehost'; # Possible hostname of proxy'd host. $FormatKey{'T'} = 'universaltime'; # universal time in seconds (e.g. 21213142) $FormatKey{'R'} = 'requesturi'; # Actual resource requested by user # $ImageMimeTypes # # Array used to store the MIME types for known images. This array # is used as a security check to ensure that the remote user does # not try to help themselves to something that they shouldn't. If # you alter the default settings, make sure that you don't add any # extensions that might be found on sensitive system files. $ImageMimeTypes{"gif"} = "image/gif"; $ImageMimeTypes{"jpg"} = "image/jpeg"; $ImageMimeTypes{"jpeg"} = "image/jpeg"; $ImageMimeTypes{"tiff"} = "image/tiff"; $ImageMimeTypes{"tif"} = "image/tiff"; # ----------------------------------------------------------------- # PARAMETERS # ----------------------------------------------------------------- # Enable unbuffered output $|=1; # ----------------------------------------------------------------- # GLOBALS # ----------------------------------------------------------------- # %setting - array used to store configuration settings %setting = (); # ----------------------------------------------------------------- # TOP-LEVEL # ----------------------------------------------------------------- # Try to run the script and report an error if it fails for any # reason. eval("&main"); if ($@) { print <<"EndOfText"; Content-type: text/plain The script 'Judas.pl' could not execute because the error: $@ occurred. EndOfText } # ----------------------------------------------------------------- # MAIN ROUTINE # ----------------------------------------------------------------- # main # # Main routine for the program. Read settings from a configuration file, # get the relevant environment variables, give the user something to be # getting on with and then, if permitted, use 'gethostbyaddr' and 'finger' # to try to get a little more information. Finally, write mail reports and # log files if required. sub main { initialize_settings(); read_arguments(); read_configuration_file(); get_caller_info(); return_image(); write_log() if ($setting{'mode'} =~ /log/); send_mail() if ($setting{'mode'} =~ /mail/); } # initialize_settings # # Set up the settings array with some default values. sub initialize_settings { $setting{'mode'} = "log"; $setting{'address'} = $DEFAULTREPORTADDRESS; $setting{'logfile'} = $DEFAULTLOGFILE; $setting{'logformat'} = $DEFAULTLOGFORMAT; $setting{'mailformat'} = $DEFAULTLOGFORMAT; $setting{'gethost'} = 1; $setting{'logsize'} = $DEFAULTLOGSIZE; $setting{'rolloversize'} = int($DEFAULTLOGSIZE / 2); $setting{'config'} = $DEFAULTCONFIGFILE; $setting{'mailer'} = $DEFAULTMAILER; $setting{'debug'} = $DEFAULTDEBUGMODE; $setting{'documentroot'} = $ENV{'DOCUMENT_ROOT'} . $PATHNAMESEPARATOR; } # read_arguments # # Some of the settings used to control the script may be # passed to it as part of a URL. Any settings read from the # URL in this way will override the basic default settings. # The global '@ALLOWABLEARGUMENTS' indicates which settings # can be given as arguments. For reasons of either security # or simplicity, not all settings can be specified via the # URL. However, you can always specify a configuration file, # and the configuration file may contain values for any # setting. sub read_arguments { local(%input); read_cgi_input(*input); foreach (@ALLOWABLEARGUMENTS) { $setting{$_} = $input{$_} if $input{$_}; } } # read_configuration_file # # Read settings from a configuration file. This file # will only be read if it actually exists. If the named # file can't be found or can't be read, it will be ignored. # # The configuration file should consist of a set of entries # of the form 'set '. Any line that doesn't # match this form will be ignored, so you can put in blank # lines and comments. # # Note that for security reasons, configuration files are # required to be in the same directory as the script and # to have the form '.cfg', e.g. 'Judas.cfg'. # Only alphanumeric characters are allowed in the name of # the configuration file. This is to prevent people trying # to make the script read files that it shouldn't read. An # unacceptable filename or a configuration file that couldn't # be read will be reported via the debugging system if debugging # has been switched on. sub read_configuration_file { my($name,$value); if ($setting{'config'} =~ /[A-Za-z0-9]+\.cfg/) { if (open(CONFIG,$setting{'config'})) { while() { chomp; ($name,$value) = ($_ =~ /^\s*set\s+([^\s]+)\s+(.*)$/); $setting{$name} = $value if ($name); } close(CONFIG); } else { debug("can't read from '" . $setting{'config'} . "': $!"); } } else { debug("bad configuration filename: '" . $setting{'config'} . "'") if $setting{'config'}; } } # get_caller_info # # Get information about the user that called this script. For # the most part, this simply involves copying data from the # environment variables to our own array. sub get_caller_info { $info{'username'} = $ENV{"REMOTE_USER"}; $info{'identity'} = $ENV{"REMOTE_IDENT"}; $info{'hostname'} = $ENV{"REMOTE_HOST"}; $info{'address'} = $ENV{"REMOTE_ADDR"}; $info{'browser'} = $ENV{"HTTP_USER_AGENT"}; $info{'languages'} = $ENV{"HTTP_ACCEPT_LANGUAGE"}; $info{'requesturi'} = $ENV{"REQUEST_URI"}; $info{'cookies'} = $ENV{"HTTP_COOKIE"}; # If we don't have a valid hostname, and the configuration # allows us to use 'get_hostbyaddr', then do a lookup to # determine the real hostname. if (($info{'hostname'} eq '') && $setting{'gethost'}) { $info{'hostname'} = get_hostname($info{'address'}); } # Compute the 'user' and 'host' info as secondary values. # The 'user' is either the 'username' (the name by which # they logged onto this site) or their 'identity' (as # revealed by any 'identd' daemons running on their host). # Similarly, the 'host' is either a fully-qualified domain # name or, if one couldn't be found, an IP address. $info{'user'} = $info{'username'} || $info{'identity'}; $info{'host'} = $info{'hostname'} || $info{'address'}; # Analyze the user's browser ($info{'browsername'}, $info{'browserversion'}, $info{'osname'}, $info{'osversion'}, $info{'proxy'}) = user_agent_details($ENV{'HTTP_USER_AGENT'}); $info{'browser'} = $info{'browsername'} . " " . $info{'browserversion'}; $info{'os'} = $info{'osname'} . " " . $info{'osversion'}; # If the user's coming in via a proxy gateway, we need to shuffle # some of the information around. if ($info{'proxy'} || $ENV{'HTTP_VIA'} || $ENV{'HTTP_X_FORWARDED_FOR'}) { # If we have 'HTTP_VIA', then we inspect 'HTTP_VIA' to # find out all about the proxy we're dealing with. if ($ENV{'HTTP_VIA'}) { ($info{'gateway'},$info{'gatewayport'},$info{'proxy'}) = $ENV{'HTTP_VIA'} =~ /^[^\s]+\s([\w\.\-]+):?([0-9]*)\s\(?([^\)]*)\)?$/; } # If we don't have the name of the gateway server, then we can # get it from the host. The address passed is actually going to # be that of the gateway anyway. $info{'gateway'} = $info{'host'} unless ($info{'gateway'}); $info{'gatewayaddress'} = $info{'address'}; # If we're dealing with a proxy, then we almost certainly won't have # the real name of the host, but we might be able to get something # from the 'HTTP_FORWARDED_FOR' environment variable. $info{'hostname'} = $info{'host'} = $info{'address'} = ""; if ($ENV{'HTTP_X_FORWARDED_FOR'}) { ($info{'address'}) = ($ENV{'HTTP_X_FORWARDED_FOR'} =~ m|^(\d+\.\d+\.\d+\.\d+)|); $info{'hostname'} = $ENV{'HTTP_X_FORWARDED_FOR'} unless $info{'address'}; } $info{'host'} = $info{'hostname'} || $info{'address'}; # If we have a valid IP address, we can try to work out the host that # goes with it, but this must be considered extremely tentative. $info{'maybehost'} = get_hostname($info{'address'}) if ($info{'address'} && $setting{'gethost'}); } # Compute some additional information. $info{'referer'} = $ENV{'HTTP_REFERER'}; $info{'universaltime'} = time; $info{'time'} = format_timestamp_iso8601(time); } # return_image # # Grab an image file and pipe it back at the user. The filename is # checked to make sure that it actually confirms to a recognised # image type (to prevent the remote user sneakily trying to help # themselves to something that they shouldn't. sub return_image { my($file) = $setting{'image'}; my($path) = $setting{'documentroot'} . $file; my($extension) = ($file =~ /\.(.{1,5})$/); die "Unrecognised image type '$extension'" unless $ImageMimeTypes{$extension}; if (open(IMAGEFILE,$path)) { print "Content-type: $ImageMimeTypes{$extension}\n\n"; while() { print; } close(IMAGEFILE); } else { die "couldn't open path '$path' for input: $!"; } } # get_hostname # # Try to determine the name of the host from the IP address. # This function actually returns the host name, its aliases, # its type, length, and a list of addresses, but for most # purposes all but the first result can be ignored. sub get_hostname { my($address) = shift; my($packedaddress) = pack("C4",split(/\./,$address)); gethostbyaddr($packedaddress,$GETHOSTADDRESSTYPE); } # write_log # # Write our report to the log file. sub write_log { my($nologhosts) = $setting{'nologhosts'}; unless ($nologhosts && (($info{'host'} =~ /$nologhosts/o) || ($info{'proxy'} =~ /$nologhosts/o))) { if (($setting{'logfile'} =~ m|^$ENV{'DOCUMENT_ROOT'}|) && ($setting{'logfile'} =~ m|${LOGFILEPATTERN}|)) { if (open(LOGFILE,">>" . $setting{'logfile'})) { my(@keys) = split(//,$setting{'logformat'}); my(@data) = map($info{$FormatKey{$_}},@keys); print LOGFILE join("\t",@data), "\n"; close(LOGFILE); roll_log_if_needed(); } else { debug("can't write to '" . $setting{'logfile'} . "': $!"); } } else { debug("illegal logfile name '" . $setting{'logfile'} . "'"); } } } # roll_log_if_needed # # If the log has grown beyond a certain size, roll it over by copying # just the tail end of the log. sub roll_log_if_needed { my($logsize) = -s $setting{'logfile'}; if ($logsize > $setting{'logsize'}) { my($tmpname) = $setting{'logfile'} . ".tmp"; my($offset) = $logsize - $setting{'rolloversize'}; my($count) = 0; my($bytes,$buffer); if (open(LOGFILE,$setting{'logfile'})) { while((tell(LOGFILE) < $offset) && ) {} $bytes = read(LOGFILE,$buffer,$setting{'rolloversize'}); close(LOGFILE); unless ($bytes) { debug("nothing read from log file '" . $setting{'logfile'} . "': $!"); } if ($bytes && open(LOGFILE,">" . $setting{'logfile'})) { print LOGFILE $buffer; close(LOGFILE); } else { debug("can't write to log file '" . $setting{'logfile'} . "': $!") }; } else { debug("can't read from log file '" . $setting{'logfile'} . "': $!"); } } } # send_mail # # Send a report to the user by mail. sub send_mail { if (open(MAILPIPE, "|" . $setting{'mailer'} . " " . $setting{'address'})) { print MAILPIPE <