#!/usr/bin/perl -w use strict; use LWP::UserAgent; use XML::RSS; use HTML::Entities; my $ie="Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"; my $ua = LWP::UserAgent->new; $ua->agent($ie); my $url = "http://groups.google.com/groups?safe=images&as_uauthors=Terry%20Pratchett&lr=lang_en&hl=en"; my $response = $ua->get ($url); my $content; my $rss = new XML::RSS (version => '1.0'); $rss->channel(title => 'Terry Pratchett on Usenet', link => $url, description => 'Terry Pratchett\'s Usenet posts, courtesy of Google Groups'); if ($response->is_success) { my @chunks = (split ("?div>", $response->content)); foreach (split "", $chunks[1]) { if (/
]*) # Link
"?>
([^<]*) # Title
<\/a>"]*"?>
(.*) # Quote
...<\/b>\s*
\s]*) # Group link
"?\sclass="?a"?>
([^<]*) # Group name
<\/a>
([^<]*) # Date & author
\s]*) # Thread link
"?\sclass="?a"?>
(View\sThread\s\([^\s]*\sarticles?\)) # Thread title
/six)
{
$content = $3;
$content .= "
";
$content .= "";
$content .= "$5$6";
$content .= "";
$content .= "$8";
# print $content;
$rss->add_item(title => $2,
link => "http://groups.google.com$1",
description => encode_entities($content));
}
}
print $rss->as_string;
}
else
{
die $response->status_line;
}