#!/usr/bin/perl -w use strict; use XML::RSS::JavaScript; use LWP::Simple; use HTML::Entities; use HTML::TokeParser::Simple; my $rss = XML::RSS::JavaScript->new; my $url = "http://www.linux.org.uk/~telsa/Diary/diary.html"; my $page = get($url); my $stream = HTML::TokeParser::Simple->new(\$page); my $tag; $rss->channel(title => "The more accurate diary. Really.", link => $url, description => "Telsa's diary of life with a hacker:" . " the current ramblings"); while ($tag = $stream->get_tag('a')) { next unless $tag->return_attr("name") ne ""; my $link = $tag->return_attr("name"); $tag = $stream->get_tag ('strong'); $tag = $stream->get_token; my $title = $tag->as_is; $tag = $stream->get_tag ('dd'); my $content = ""; $tag = $stream->get_token; until ($tag->is_end_tag('/dd')) { $content .= $tag->as_is; $tag = $stream->get_token; next; } $rss->add_item(title => $title, link => "$url#$link", description => encode_entities($content)); } print $rss->as_javascript; # We can also use $rss->save('file.xml') # as well as $rss->save_javascript('file.js') # to have this script write files.