#!/usr/bin/python # # File: RandomPageFromList.py # Author: Angus McIntyre # Date: 07.05.2001 # Updated: 07.05.2001 # # Script to send the user off to a random URL, chosen from a list of URLs # in a file. The file to use can be specified as an argument to the script. # # --------------------------------------------------------------------------- # REVISION HISTORY # # 07.05.2001 SLAM First implementation. # # --------------------------------------------------------------------------- # LEGAL NOTICE # # This script may be freely copied, distributed and modified. Use of the # script is at the risk of the user. The script is presented "as-is" without # any warranty, and the author is not liable for any loss or damages arising # out of the use of or failure to use this script. This notice must appear # in any modified copy of the script in which the name of the original # author also appears. # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # IMPORTS # --------------------------------------------------------------------------- import cgi import re import os import sys import traceback import string import random # --------------------------------------------------------------------------- # CONSTANTS # --------------------------------------------------------------------------- # default_url_file: path to default file containing a list of URLs default_url_file = "RandomURLs.txt" # path_separator: path separator for your operating system, e.g. '/' # for UNIX, '\' for DOS/Windows, ':' for MacOS etc. path_separator = "/" # full_path_pattern: a pattern that recognises whether a pathname passed # represents a complete pathname. For UNIX (as shown below) this is fairly # simple. For MacOS and DOS/Windows you'll need to be more ingenious. full_path_pattern = re.compile("^/") # allowed_url_pattern: a pattern designed to check that the line returned # as a URL *is* in fact a plausible URL. If it isn't, the script will die # with an error message. This is partly a security feature, and partly a # convenience feature. It's a security feature because it should help to # stop malicious people from getting anything of interest from system # files such as the password file; it's a convenience feature because we # don't want to serve the user anything that isn't a valid URL anyway. allowed_url_pattern = re.compile("^http://|^ftp://|^gopher://") # allowed_path_pattern: a pattern designed to check that the final # pathname to the file isn't something it shouldn't be. I set the pattern # to match 'URLs.txt' and then make all my URL lists end with that string. # An alternative approach would be to specify the path to your HTML # directory so that, whatever happens, the script won't be allowed to get # any file that isn't in your area. allowed_path_pattern = re.compile("^.*URLs.txt$") # listfile_base: if you want to specify the directory containing the # list files, you can use this variable. Otherwise the files will be # assumed to be relative to the directory containing the script to be # executed. The string should end with a path separator. listfile_base = ""; # --------------------------------------------------------------------------- # GLOBALS # --------------------------------------------------------------------------- # urls: list of URLs recovered from a URL file urls = [] # --------------------------------------------------------------------------- # GLOBALS # --------------------------------------------------------------------------- # report_error # # A general-purpose function for printing out a slightly friendly error # message as an HTML page. def report_error(message): print 'Content-type: text/html\n\n\
No random URL could be returned because the error:
\' print message; print 'occurred. Sorry.'; # read_url_file # # Given a filename (passed as an argument to the script), work out exactly # which file we should be reading, and then read its contents into the # global list 'urls' def read_url_file(filename): # See if the filename we've been passed is a complete filepath. If it # is, we can just use it as is. if full_path_pattern.match(filename): filepath = filename # If it isn't, but a base filepath has been defined, interpret it as # a partial path relative to that base. elif listfile_base: filepath = listfile_base + filename # Otherwise interpret it as a partial path relative to the location # of the script file itself. else: script_pathname = os.environ['SCRIPT_FILENAME'] script_pathname_elements = string.split(script_pathname,path_separator) script_pathname_elements.pop() filepath = string.join(script_pathname_elements,path_separator) + path_separator + filename # Now check to see if the path is a permissible path, using the # predefined criteria for checking the names of files. if allowed_path_pattern.match(filepath): # If the file passes the test, open it up for reading, and read # its contents line by line handle = open(filepath,"r") while 1: # Read a line from the file line = handle.readline() # Exit the loop on end of file if not line: break # Any line in the file that looks like it might be # a URL is added to the list of URLs if allowed_url_pattern.match(line): urls.append(string.strip(line)) handle.close else: raise "Illegal URL file", filepath # perform_redirect # # Send a redirect instruction to the browser. The redirect is actually # contained in the 'Location:' header, but for the benefit of older browsers # that don't handle this properly, we include some HTML text to give the user # the chance to go to the selected URL manually. def perform_redirect(url): print 'Content-type: text/html\nLocation: ' + url + '\n\n' print '
It appears that your browser cannot handle redirection \ automatically. Please proceed to the randomly-selected page \ by clicking here.
' # --------------------------------------------------------------------------- # MAIN ROUTINE # --------------------------------------------------------------------------- try: # Try to get the name of the URL file to use from the arguments # passed to the script. try: input = cgi.SvFormContentDict() file = input['file'] # If that didn't work, fall back on a default filename except: file = default_url_file # Read the file that contains the URLs. Throw an exception if no # valid URLs could be found in the file. Otherwise, pick one at # random and send the user to it. read_url_file(file) if len(urls) == 0: raise "No valid URLs found in file", file url = random.choice(urls) perform_redirect(url) # If anything went wrong, issue an error message except: type, value = sys.exc_info()[:2] report_error(traceback.format_exception_only(type,value))