#!/usr/bin/perl -w # # Original by Mihai Parparita # Adapted by Jacques Distler , 4/2004. # # usage: # # http://you.com/cgi-bin/nntp2rss.pl?group=sci.physics&server=newshost.utexas.edu&limit=10&google=threaded # # where # # group = the USENET newsgroup # limit = the number of posts to fetch # server = the NewsServer to contact # google = false link opens in your newsreader # = single | true link opens in Google Groups in single-post mode # = threaded link opens in Google Groups in threaded mode # # All arguments are optional, and have default values below. # # TODO: Implement caching. use Net::NNTP; use HTML::Entities; use Text::Header; use DateTime::Format::Mail; use strict; ################################################################ # Default values for the parameters. Modify these to your taste, # or override them by supplying arguments with your query. # my $kNNTPServer = "newshost.utexas.edu"; my $google = "false"; my $limit = 10; my $group = "sci.physics.research"; ################################################################ my $linktype = "news:"; my $grouplinktype = "news:"; my %args= (); my @rawArgs = split(/&/, $ENV{QUERY_STRING}); for (@rawArgs) { my ($key, $value) = split(/=/, $_); $args{$key} = $value; } # set parameters, based on query string $limit = $args{limit} if ($args{limit} && $args{limit} =~ /\d+/ ); $group = $args{group} if ($args{group} && $args{group} =~ /[\w.]+/ ); $kNNTPServer = $args{server} if ($args{server} && $args{server} =~ /[\w.]+/ ); $google = lc($args{google}) if ($args{google}); if ($google eq "true" || $google eq "single") { $linktype = "http:\/\/groups.google.com\/groups?selm="; $grouplinktype = "http:\/\/groups.google.com\/groups?group="; } else { if ($google eq "threaded") { $linktype = "http:\/\/groups.google.com\/groups?threadm="; $grouplinktype = "http:\/\/groups.google.com\/groups?group="; } } my $news = Net::NNTP->new($kNNTPServer); my ($articleCount, $firstID, $lastID) = $news->group($group); my @articles = (); # load articles from server for (my $id = $lastID; $id >= $firstID && $lastID - $id <= $limit; $id--) { my $articleRef = {}; my $headersRef = $news->head($id); # Process the headers of the article my %headers = unheader(@{$headersRef}); if ($headers{'message_id'}) { $headers{'message_id'} =~ s/^<(.*)>/$1/; } for ('from','subject','date','message_id') { $articleRef->{$_} = encode_entities($headers{$_} || ''); } #fix the date, for servers which can't seem to do RFC 822 correctly my $pf = DateTime::Format::Mail->new( loose => 1); my $datetime = $pf->parse_datetime($articleRef->{'date'}); $articleRef->{'date'} = DateTime::Format::Mail->format_datetime( $datetime ); # Process the body of the article my $bodyRef = $news->body($id); my $articlebody = ''; for (@{$bodyRef}) { $articlebody .= $_; } $articleRef->{body} = Transform($articlebody); push @articles, $articleRef; } $news->quit; # spit them out in RSS print < $group $grouplinktype$group Latest $limit posts from the USENET newsgroup, $group. 15 nntp2rss.pl http://backend.userland.com/rss HEADER for (@articles) { print "\n", " $_->{subject}\n", " $_->{date}\n", " $_->{from}\n", " $linktype$_->{message_id}\n", " $_->{body}\n", "\n"; } print < FOOTER sub Transform { my ($in) = @_; # Here we process using blogger's conventions: single newline gives # a
, double newline starts a new paragraph. my @paras = split /\r?\n\r?\n/, encode_entities(decode_entities($in)); for my $p (@paras) { if ($p !~ m/^<(?:table|ol|ul|pre|select|form|blockquote|div)/) { $p =~ s!\r?\n!
\n!g; $p = "

$p

"; } } # Finally, encode the result, suitable for inclusion as escaped (X)HTML. encode_entities( join("\n\n", @paras) ); }