#!/usr/local/bin/perl # # usage: # get_links.pl [-D] url # -D Download all http links to current directory $WEBGRAB = "/home/blong/bin/webgrab"; $download = 0; $arg = 0; while ($arg <= $#ARGV) { if ($ARGV[$arg] eq "-D") { $download = 1; } else { $url = $ARGV[$arg]; } $arg++; } if (!defined($url)) { print "URL required\n"; exit 1; } open(REMOTE,"$WEBGRAB -s $url|") || die "Problem reaching remote sight: $|"; #$_ = ; while() { chop; while (/<[^>]*>/) { $tmp = $_; s/([^<]*)(<[^>]*>)(.*)/$1$3/; $tmp =~ s/([^<]*)(<[^>]*>)(.*)/$2/; if ($tmp =~ /[Aa].+[Hh][Rr][Ee][Ff]/) { $tmp =~ s/(<\s*[Aa].+[Hh][Rr][Ee][Ff]\s*=\s*["]*)([^">]*)([" >]*)/$2/; if ($tmp =~ /^\/.*/) { $tmp2 = $url; $tmp2 =~ s/(http:\/\/[^\/]+)(\/.*)/$1/; $tmp = $tmp2.$tmp; } elsif ($tmp =~ /#/) { $tmp = $url.$tmp; } elsif ($tmp =~ /[A-Za-z]+:/) { } elsif ($tmp =~ /\.\./) { $tmp2 = $url; while (($tmp =~ /\.\./) && ($tmp2 =~ /\//)) { $tmp2 =~ s/(.*\/)(.*\/)(.*)/$1/; $tmp =~ s/\.\.//; } if ($tmp =~ /\/.*/) { $tmp =~ s/\///; } $tmp = $tmp2.$tmp; } else { $tmp2 = $url; $tmp2 =~ s/(.*\/)(.*)/$1/; $tmp = $tmp2.$tmp; } print "$tmp\n"; if (($download == 1) && ($tmp =~ /http:/)) { $tmp2 = $tmp; $tmp2 =~ s/(http:.*\/)(.*)/$2/; system("$WEBGRAB -s $tmp > $tmp2"); } } } # print "$_\n"; } close(REMOTE);