#!/usr/local/bin/perl # # An attempt to rewrite the Hypertext Archie Gateway (CGI) so its not as # hard on the system (leaving temp files, 4 processes per request, etc.) # # Version 1.1 03-19-96 # # Public Domain # AA.pl by Brandon Long (blong@uiuc.edu) based on # AA 1.2 by Guy Brooker (guy@jw.estec.esa.nl), # CGI routines from Carlos Pero (cpero@ncsa.uiuc.edu) # # # This script relies on the C-archie client from Brendan Kehoe # Once installed, please set the correct path below. # You can obtain the latest version from a comp.sources archive, or at # file://ftp.uu.net/networking/info-service/archie/clients # # QUERIES # If no query is specified, returns a default form. # # Originally written for Perl 4 on AIX 3.2.5, but should work with minor # modification on Perl 5. # # # Version 1.0 # 12-10-95 Brandon Long (blong@uiuc.edu) # # Version 1.1 # 03-19-96 Brandon Long (blong@uiuc.edu) # Cleaned up, and now returns a default form if no query is # specified. # ****************** Configurable Variables ******************* $ARCHIE = "/local/bin/archie"; $DEFAULT_SERVER = "archie.rutgers.edu"; $DEFAULT_NICE = 500; $DEFAULT_HITS = 20; $DEFAULT_ARCHIE_OPTS = "-s"; # Case Insensitive Substring Match $ARCHIE_URL = "/cgi-bin/AA.pl"; $counter_file = "/u/Web/archie/archie.count"; $query_log_file = "/u/Web/archie/archie.log"; $query_log_url = "/archie/archie.log"; # Set this to 1 if perl flock() works on your system (doesn't # on Solaris) or to 0 if it doesn't. $LOCK_ON = 1; #************************************************************* # flock() values $LOCK_SH = 1; $LOCK_EX = 2; $LOCK_NB = 4; $LOCK_UN = 8; &cgi_receive; &cgi_decode; if (!defined($FORM{query})) { &display_form(); exit(0); } if (!(-x $ARCHIE)) { &return_error("\$Archie Program: $ARCHIE Not Found"); } if ($FORM{type} eq "Case Insensitive Substring Match") { $OPT_search = "-s"; } elsif ($FORM{type} eq "Exact Match") { $OPT_search = "-e"; } elsif ($FORM{type} eq "Case Sensitive Substring Match") { $OPT_search = "-c"; } elsif ($FORM{type} eq "Regular Expression Match") { $OPT_search = "-r"; } else { $OPT_search = $DEFAULT_ARCHIE_OPTS; } if ($FORM{server} eq "United Kingdom") { $ArchieServer="archie.doc.ic.ac.uk"; } elsif ($FORM{server} eq "ANS archie server") { $ArchieServer="archie.ans.net"; } elsif ($FORM{server} eq "Australia") { $ArchieServer="archie.au"; } elsif ($FORM{server} eq "Austria") { $ArchieServer="archie.univie.ac.at"; } elsif ($FORM{server} eq "Canada") { $ArchieServer="archie.mcgill.ca"; } elsif ($FORM{server} eq "Finland") { $ArchieServer="archie.funet.fi"; } elsif ($FORM{server} eq "Germany") { $ArchieServer="archie.th-darmstadt.de"; } elsif ($FORM{server} eq "Internic") { $ArchieServer="archie.internic.net"; } elsif ($FORM{server} eq "Israel") { $ArchieServer="archie.ac.il"; } elsif ($FORM{server} eq "Italy") { $ArchieServer="archie.unipi.it"; } elsif ($FORM{server} eq "Japan") { $ArchieServer="archie.wide.ad.jp"; } elsif ($FORM{server} eq "Korea") { $ArchieServer="archie.kr"; } elsif ($FORM{server} eq "New Zealand") { $ArchieServer="archie.nz"; } elsif ($FORM{server} eq "Rutgers University") { $ArchieServer="archie.rutgers.edu"; } elsif ($FORM{server} eq "Spain") { $ArchieServer="archie.rediris.es"; } elsif ($FORM{server} eq "SURAnet") { $ArchieServer="archie.sura.net"; } elsif ($FORM{server} eq "Sweden") { $ArchieServer="archie.luth.se"; } elsif ($FORM{server} eq "Taiwan") { $ArchieServer="archie.ncu.edu.tw"; } elsif ($FORM{server} eq "University of Nebraska") { $ArchieServer="archie.unl.edu"; } else { $ArchieServer="$DEFAULT_SERVER"; } if ($FORM{order} eq "date") { $OPT_order = "-t"; } elsif ($FORM{order} eq "host") { $OPT_order = ""; } else { $OPT_order = ""; } if ($FORM{nice} eq "Not Nice At All") { $OPT_nice = "-N0"; } elsif ($FORM{nice} eq "Nice") { $OPT_nice = "-N500"; } elsif ($FORM{nice} eq "Nicer") { $OPT_nicer = "-N1000"; } elsif ($FORM{nice} eq "Very Nice") { $OPT_nicer = "-N5000"; } elsif ($FORM{nice} eq "Extremely Nice") { $OPT_nicer = "-N10000"; } elsif ($FORM{nice} eq "Nicest") { $OPT_nicer = "-N32765"; } else { $OPT_nicer = "-N$DEFAULT_NICE"; } if ($FORM{hits} =~ /1[0-9][0-9]/) { $OPT_hits = "-m$FORM{hits}"; } elsif ($FORM{hits} =~ /[0-9][0-9]/) { $OPT_hits = "-m$FORM{hits}"; } else { $OPT_hits = "-m$DEFAULT_HITS"; } open(INPUT,"$ARCHIE $OPT_hits $OPT_nice $OPT_search $OPT_order -h $ArchieServer $FORM{query} |") || &return_error("$|"); &return_header; print "\n"; print "

Archie Gateway: Results for Query: $FORM{query}

\n"; $first = 1; $num_hosts = 0; $num_dirs = 0; $num_files = 0; while() { # print $_; chop; if (/Host/) { $host = $_; $host =~ s/(Host\s+)(\S*)\s*/$2/; if (!$first) { print ""; print "

\n"; } else { $first = 0; } # print " "; print "Host "; print "$host
\n"; print "

"; $num_hosts++; } elsif (/Location:/) { $loc = $_; $loc =~ s/(\s*Location:\s+)(\S*)\s*/$2/; print "
"; # print ""; print " "; print ""; print "$loc
\n"; $num_dirs++; } elsif (/FILE/) { $file = $_; $other = $_; $file =~ s/(\s*FILE [-drwx]{10}\s+[0-9]*\s+[A-Za-z]+[0-9]*[0-9]*\s+[0-9]*\s+[0-9]*\s+)(\S*)\s*/$2/; $other =~ s/(\s*FILE [-drwx]{10}\s+[0-9]*\s+[A-Za-z]+[0-9]*[0-9]*\s+[0-9]*\s+[0-9]*\s+)(\S*)\s*/$1/; # print "
"; # print ""; # print ""; print " "; print "$other
"; print "
"; print "$file
\n"; $num_files++; } } if ($first) { print "No Results!\n"; } ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $date = $year; $mon++; if ($mon < 10) { $date .= "0".$mon; } else { $date .= $mon; } if ($mday < 10) { $date .= "0".$mday; } else { $date .= $mday; } print "

\n"; print "


Summary for Query: $FORM{query}

\n"; print "Archie at $ArchieServer found "; print "$num_files files in $num_dirs directories on $num_hosts hosts.\n"; print "


\n"; print "\"[Back]\"\n"; print "Back to Archie Query Form"; print "
\n"; print "Brandon C. Long /\n"; print "blong@uiuc.edu / \n"; print "PGP / $date / \n"; &count_hit; &log_query; print "\n"; #&return_error("Nothing happened $ArchieServer"); #******************** Functions ***************************** sub return_header { &cgi_header(200,OK); print "\n"; print "Archie Gateway: Results for Query: $FORM{query}\n"; print "\n"; } sub return_error { &cgi_header(200,OK); print "\n"; print "Archie Gateway: Error\n"; print "\n"; print "\n"; print "

Archie Gateway

\n"; print "An error occurred while processing your request.
\n"; print "Error: @_\n"; print "\n"; exit(0); } #******************** CGI routines from cgi-dump *********************** sub cgi_header { local($code, $text) = @_; print "HTTP/1.0 $code $text\n"; print "Content-type: text/html\n"; print "\n"; } sub cgi_receive { #### Assign unique ID to user based on IP and browser $USER_ID = crypt($ENV{'REMOTE_ADDR'}, $ENV{'HTTP_USER_AGENT'}); if ($ENV{'REQUEST_METHOD'} eq "POST") { read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); } else { $buffer = $ENV{'QUERY_STRING'}; } } sub cgi_decode { @pairs = split(/&/, $buffer); foreach $pair (@pairs) { ($name, $value) = split(/=/, $pair); $name =~ tr/+/ /; $value =~ tr/+/ /; $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; # Strip out any bad, bad characters $value =~ s/;/ /g; $value =~ s/^!/ /g; #### Allow in sentences $value =~ s/[\cM\n]/ /g; #### Mainly for textareas ## Skip blank text entry fields next unless ($value); ## Check for "dynamic" field names ## Mainly for on-the-fly input names, especially checkboxes if ($name =~ /^dynamic/) { $name = $value; $value = "on"; } $FORM{$name} = $value; } } sub count_hit { open(COUNTER,"+<$counter_file"); ($LOCK_ON) && flock(COUNTER,$LOCK_EX); @count = ; print "$count[0]"; $count[0]++; seek(COUNTER,0,0); print COUNTER "$count[0]"; ($LOCK_ON) && flock(COUNTER,$LOCK_UN); close(COUNTER); } sub log_query { open(LOG,">>$query_log_file"); ($LOCK_ON) && flock(LOG,$LOCK_EX); print LOG "$date - $FORM{query}\n"; ($LOCK_ON) && flock(LOG,$LOCK_UN); close(LOG); } sub display_form { print <<"EOFORM"; Archie Request Form

Archie Request Form

This is a form based Archie gateway for the WWW.
Please remember that Archie searches can take a long time...

You might just want to check out the Monster FTP Sites List instead.


What would you like to search for?
See past search keywords

There are several types of search:

The results can be sorted By Host or By Date

The impact on other users can be:

Several Archie Servers can be used:

You can restrict the number of results returned (default 10):

Press this button to submit the query: .

To reset the form, press this button: .


What is archie ?

"Archie" is a database of anonymous ftp sites and their contents. The software for it was written by the "Archie Group" (Peter Deutsch, Alan Emtage, Bill Heelan, and Mike Parker) at McGill University in Montreal, Canada, and they maintain the database as well.

"Archie" keeps track of the entire contents of a very large number of anonymous ftp sites, and allows you to search for files on those sites using various different kinds of filename searches.


Brandon C. Long / blong@uiuc.edu / PGP EOFORM }