Anyway, for what it is worth, here's a little perl script to grab
html pages given a list of URLs. It may be useful as a start to build
more elaborate scripts (like robots).
Oscar
---
#!/local/bin/perl -s
#
# hget --- get an html page from an http server
#
# Oscar Nierstrasz 26/8/93 os...@cui.unige.ch
#
# following the example of Gene Spafford's ftpget
require "chat2.pl";
die "Usage: hget <http-url> ...\n" unless $#ARGV >= 0;
$timeout = 60;
foreach $url (@ARGV) {
if ($url =~ m|^http://(.*)|) {
$host = $1;
$port = 80; # default
$request = "/"; # default
($host =~ s|^([^/]+)/(.*)$|$1|) && ($request = $2);
($host =~ s/:(\d+)$//) && ($port = $1);
}
# relative URL, so assume previous host & port:
elsif ($url =~ /^http:(.*)/) {
$request = $1;
unless ($host) {
warn "hget: no host for $url\n";
next;
}
}
else { warn "hget: $url is not an http URL\n"; next; }
&http_get($host,$port,$request);
}
sub http_get {
local($host,$port,$request) = @_;
($handle = &chat'open_port($host, $port))
|| die "chat'open($host,$port): $!\n";
&chat'print($handle,"GET $request\n")
|| die "chat'print(GET $request): $!\n";
$done = 0;
do{
&chat'expect($handle, $timeout,
'.', q{print "$chat'thisbuf"},
'EOF', '$done = 1',
'TIMEOUT', 'print STDERR "TIMEOUT\n"; $done = 1'
)
|| die "chat'expect: $!\n";
} until $done;
&chat'close($handle);
}
__END__
----------------------------------------------------------------------------
Dr. O.M. Nierstrasz, Centre Universitaire d'Informatique
University of Geneva, 24, rue General-Dufour, CH-1211 Geneva 4, Switzerland
Tel: +41 22 705.7664 Secr: 705.7770 Fax: 320.2927
E-mail: os...@cui.unige.ch Home: 733.9568
----------------------------------------------------------------------------