cvs commit: www manage_news.pl

lizardo at linuxfromscratch.org lizardo at linuxfromscratch.org
Thu Oct 16 14:01:53 PDT 2003


lizardo     03/10/16 15:01:53

  Modified:    .        manage_news.pl
  Log:
  manage_news.pl: major code improvements. "Read previous news" implementation.
  
  Revision  Changes    Path
  1.13      +125 -79   www/manage_news.pl
  
  Index: manage_news.pl
  ===================================================================
  RCS file: /home/cvsroot/www/manage_news.pl,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- manage_news.pl	19 Sep 2003 20:53:07 -0000	1.12
  +++ manage_news.pl	16 Oct 2003 21:01:53 -0000	1.13
  @@ -17,13 +17,14 @@
   # along with this program; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   
  -# $Id: manage_news.pl,v 1.12 2003/09/19 20:53:07 lizardo Exp $
  +# $Id: manage_news.pl,v 1.13 2003/10/16 21:01:53 lizardo Exp $
   
   use warnings;
   use strict;
   
   use File::Path qw(mkpath);
   use File::Spec qw(splitpath);
  +use POSIX qw(strftime);
   use Getopt::Long;
   use MIME::Parser;
   use HTML::Parser;
  @@ -58,104 +59,133 @@
   # Text buffer, used to store parsed HTML data
   my $buffer = "";
   
  -# parsed MIME::Entity object
  -my $entity;
  -# Filename of the input file, or <STDIN> in case of standard input
  -my $source;
  +my $mime_entity;
  +my $input_name;
   
   # Read the input file if one is given; otherwise, read from STDIN
   if ($infile) {
  -    $source = $infile;
  -    eval { $entity = $parser->parse_open($infile) } or pod2usage("$0: $@");
  +    $input_name = $infile;
  +    eval { $mime_entity = $parser->parse_open($infile) } or pod2usage("$0\: $@");
   }
   else {
  -    $source = "<STDIN>";
  -    $entity = $parser->parse(\*STDIN);
  +    $input_name = "<STDIN>";
  +    $mime_entity = $parser->parse(\*STDIN);
   }
   
  -my $base_url = "";
  -my $section = lc(Get_header($entity->head, "section"));
  -die "$0\: $source\: Invalid section name: " . $section . "\n" unless $section =~ /^[\w-]+$/;
  +my $lfs_url = "http://linuxfromscratch.org/";
  +my $section = lc(Get_header($mime_entity->head, "section"));
  +die "$0\: $input_name\: Invalid section name: " . $section . "\n" unless $section =~ /^[\w-]+$/;
  +if ($section =~ /^general$/) {
  +    $section = "";
  +}
  +else {
  +    # Create the path to $section for URLs
  +    $section .= "/";
  +}
  +
  +my $change_urls = sub { return $_[0] };
   
   if ($archive_under) {
  -    my %archive_files = ();
  -    foreach my $part ($entity->parts) {
  -        # Date field must be in YYYY/MM/DD format
  -        # It also avoids possible cross-site scripting
  -        if (Get_header($part->head, "date") !~ /^\d{4}\/\d{2}\/\d{2}$/) {
  -            die "$0\: $source\: Invalid date: " . Get_header($part->head, "date") . "\n" .
  -            "News header:\n---\n" . $part->head->as_string . "\n---\n";
  -        }
  -        # Split year and month, and store the destination archive location
  -        # ($archive_under/YYYY/MM.html)
  -        my ($year, $month, undef) = split /\//, Get_header($part->head, "date");
  -        $archive_files{$archive_under . "/" . $year . "/" . $month . ".html"} = 1;
  -    }
  -    foreach my $archive_file (keys %archive_files) {
  -        # Create the destination archive location
  -        my (undef, $dir, undef) = File::Spec->splitpath($archive_file);
  +    # News items' dates (%dates->year->month)
  +    my $dates = {};
  +    $dates->{isodate2any($_->head, '%Y')}->{isodate2any($_->head, '%m')} = 1 foreach ($mime_entity->parts);
  +
  +    my $html_p = HTML::Parser->new(api_version => 3,
  +        start_h => [\&handle_StartTag, "tagname, attr"],
  +        end_h => [\&handle_EndTag, "tagname"],
  +        text_h => [\&handle_Text, "dtext" ],
  +        declaration_h => [\&handle_Dec, "text"],
  +    );
  +    $html_p->attr_encoded(1);
  +    $html_p->xml_mode(1);
  +
  +    # Create index.html files
  +    foreach my $year (keys %{$dates}) {
  +        my $dir = $archive_under . "/" . $section . $year;
           eval { mkpath($dir) unless -d $dir } or die "$0\: Could not create " . $dir . ": $@\n";
  -        # If a top template is given, prepend it to the output
  +        open (INDEX, ">$dir/index.html") or die "$0\: Could not open $dir/index.html: $!\n";
           if ($top) {
  +            $change_urls = sub { return $_[0] };
  +            $html_p->parse_file($top) or die "$0\: Could not parse " . $top . ": $!\n";
  +            $html_p->eof;
  +            print INDEX $buffer;
  +            $buffer = "";
  +        }
  +        print INDEX "<h3>$year</h3>\n<ul>\n";
  +        foreach my $month (keys %{$dates->{$year}}) {
  +            print INDEX "\t<li><a href=\"$month.html\">" . strftime('%B',0,0,0,1,$month - 1,$year - 1900) .
  +            "</a></li>\n";
  +        }
  +        print INDEX "</ul>\n";
  +        if ($bottom) {
  +            $change_urls = sub { return $_[0] };
  +            $html_p->parse_file($bottom) or die "$0\: Could not parse " . $bottom . ": $!\n";
  +            $html_p->eof;
  +            print INDEX $buffer;
  +            $buffer = "";
  +        }
  +        close INDEX;
  +    }
  +    # Create news archives
  +    foreach my $year (keys %{$dates}) {
  +        foreach my $month (keys %{$dates->{$year}}) {
  +            my $archive_file = $archive_under . "/" . $section . $year . "/" . $month . ".html";
               open(NEWS, ">$archive_file") or die "$0\: Could not open $archive_file" . ": $!\n";
  -            open(TOP, $top) or die "$0\: Could not open " . $top . ": $!\n";
  -            print NEWS <TOP>;
  -            close TOP;
  +            if ($top) {
  +                $change_urls = sub { return $_[0] };
  +                $html_p->parse_file($top) or die "$0\: Could not parse " . $top . ": $!\n";
  +                $html_p->eof;
  +                print NEWS $buffer;
  +                $buffer = "";
  +            }
               close NEWS;
           }
       }
  -    foreach my $part ($entity->parts) {
  -        if ($section =~ /^general$/) {
  -            $base_url = "http://linuxfromscratch.org/news/";
  -        }
  -        else {
  -            $base_url = "http://linuxfromscratch.org/".$section."/";
  -        }
  -        # Use the HTML::Parser and URI modules to resolve relative links
  -        my $html_p = HTML::Parser->new(api_version => 3,
  -            start_h => [\&handle_StartTag, "tagname, attr"],
  -            end_h => [\&handle_EndTag, "tagname"],
  -            text_h => [\&handle_Text, "dtext" ],
  -        );
  -        $html_p->attr_encoded(1);
  -        my ($year, $month, undef) = split /\//, Get_header($part->head, "date");
  -        my $archive_file = $archive_under."/".$year."/".$month.".html";
  +    foreach my $part ($mime_entity->parts) {
  +        my $archive_file = $archive_under . "/" . $section . isodate2any($part->head, '%Y/%m') . ".html";
           open(NEWS, ">>$archive_file") or die "$0\: Could not open $archive_file" . ": $!\n";
  -        $html_p->parse(mime2html($section, $part));
  +        $change_urls = sub { local $URI::ABS_REMOTE_LEADING_DOTS = 1; return URI->new($_[0])->abs($lfs_url . $section) };
  +        $html_p->parse(mime2html($part));
           $html_p->eof;
           print NEWS $buffer . "\n\n";
           $buffer = "";
           close NEWS;
       }
  -    foreach my $archive_file (keys %archive_files) {
  -        # If a bottom template is given, append it to the output
  -        if ($bottom) {
  +    foreach my $year (keys %{$dates}) {
  +        foreach my $month (keys %{$dates->{$year}}) {
  +            my $archive_file = $archive_under . "/" . $section . $year . "/" . $month . ".html";
               open(NEWS, ">>$archive_file") or die "$0\: Could not open $archive_file" . ": $!\n";
  -            open(BOTTOM, $bottom) or die "$0\: Could not open " . $bottom . ": $!\n";
  -            print NEWS <BOTTOM>;
  -            close BOTTOM;
  +            if ($bottom) {
  +                $change_urls = sub { return $_[0] };
  +                $html_p->parse_file($bottom) or die "$0\: Could not parse " . $bottom . ": $!\n";
  +                $html_p->eof;
  +                print NEWS $buffer;
  +                $buffer = "";
  +            }
               close NEWS;
           }
       }
   }
   else {
       print '<p><a href="#header">Back to the top.</a></p>' . "\n" .
  -    '<h2 id="generalnews">General news</h2>' . "\n" if $section eq "general";
  +    '<h2 id="generalnews">General news</h2>' . "\n" unless $section;
       my $count = 0;
  -    foreach my $part ($entity->parts) {
  +    foreach my $part ($mime_entity->parts) {
           last if $count++ == 2;
  -        print mime2html($section, $part);
  +        print mime2html($part);
       }
  +    my $year = isodate2any(${[$mime_entity->parts]}[0]->head, '%Y');
  +    print "<p><a href=\"${lfs_url}news/${section}${year}/\">Read previous news</a></p>\n";
       print '<p><a href="#header">Back to the top.</a></p>' . "\n" .
  -    '<h2 id="changelog">Latest CVS changes:</h2>' . "\n" if $section eq "general";
  +    '<h2 id="changelog">Latest CVS changes:</h2>' . "\n" unless $section;
   }
   
   if ($parser->results->errors) {
  -    print STDERR "$0\: $source\: " . $_ foreach ($parser->results->errors);
  +    print STDERR "$0\: $input_name\: " . $_ foreach ($parser->results->errors);
   }
   
   if ($parser->results->warnings) {
  -    print STDERR "$0\: $source\: " . $_ foreach ($parser->results->warnings);
  +    print STDERR "$0\: $input_name\: " . $_ foreach ($parser->results->warnings);
   }
   
   ##########################
  @@ -164,11 +194,11 @@
   sub handle_StartTag {
       my ($tag, $attrs) = @_;
   
  -    if ($tag eq "a" and $$attrs{"href"}) {
  -        $$attrs{"href"} = URI->new($$attrs{"href"})->abs($base_url)->as_string;
  +    if ($$attrs{"href"}) {
  +        $$attrs{"href"} = &$change_urls($$attrs{"href"}) if $$attrs{"href"} !~ /^#/;
       }
       elsif ($tag eq "img" and $$attrs{"src"}) {
  -        $$attrs{"src"} = URI->new($$attrs{"src"})->abs($base_url)->as_string;
  +        $$attrs{"src"} = &$change_urls($$attrs{"src"});
       }
       $buffer .= "<$tag";
       $buffer .= " $_=\"$$attrs{$_}\"" foreach (keys %$attrs);
  @@ -189,9 +219,17 @@
   sub handle_Text {
       my ($text) = @_;
   
  -    $text =~ s/\&/\&/g;
  -    $text =~ s/</\</g;
  -    $text =~ s/>/\>/g;
  +    # Only substitute ampersand if it is not part of an entity
  +    # Regexp extracted from lfs2rss.pl, written by Rob Park
  +    $text =~ s/&(?!\w+;)/&/g;
  +    $text =~ s/</</g;
  +    $text =~ s/>/>/g;
  +    $buffer .= $text;
  +}
  +
  +sub handle_Dec {
  +    my ($text) = @_;
  +
       $buffer .= $text;
   }
   
  @@ -204,7 +242,7 @@
       my $hdr_content = $header->get($name);
       # Die if the field is not found
       if (!defined($hdr_content)) {
  -        die "$0\: $source\: Could not find header field " . $name . "\n" .
  +        die "$0\: $input_name\: Could not find header field " . $name . "\n" .
           "Header contents:\n" . $header->as_string . "\n";
       }
       $hdr_content =~ s/^\s*//;
  @@ -213,18 +251,26 @@
       return $hdr_content;
   }
   
  -# Translate the MIME data to HTML
  -sub mime2html {
  -    my ($sect, $mime_part) = @_;
  -
  -    my ($year, $month, undef) = split /\//, Get_header($mime_part->head, "date");
  -    my $archive_url;
  -    if ($sect =~ /^general$/) {
  -        $archive_url = "http://linuxfromscratch.org/news/".$year."/".$month.".html";
  +# Convert ISO 8601 date (yyyy/mm/dd) to the specified format
  +sub isodate2any {
  +    my ($header, $format) = @_;
  +    # Date field must be in YYYY/MM/DD format
  +    # This check avoids possible cross-site scripting
  +    if (Get_header($header, "date") =~ /^(\d{4})\/(\d{2})\/(\d{2})$/) {
  +        return strftime($format, 0, 0, 0, $3, $2 - 1, $1 - 1900);
       }
       else {
  -        $archive_url = "http://linuxfromscratch.org/news/".$sect."/".$year."/".$month.".html";
  +        die "$0\: $input_name\: Invalid date: " . Get_header($header, "date") . "\n" .
  +        "News header:\n---\n" . $header->as_string . "\n---\n";
       }
  +}
  +
  +# Translate the MIME data to HTML
  +sub mime2html {
  +    my ($mime_part) = @_;
  +
  +    my $archive_url = $lfs_url . "news/" . $section .
  +    isodate2any($mime_part->head, '%Y/%m') . ".html";
       my $news_id;
       if (defined($mime_part->head->get("id"))) {
           $news_id = lc(Get_header($mime_part->head, "id"));
  @@ -256,7 +302,7 @@
           --infile|-i         Parse MIME news database from given file
           --top|-t            Prepend top.html to the output
           --bottom|-b         Append bottom.html to the output
  -        --archive-under|-a  Output news under output_dir/YYYY/MM.html
  +        --archive-under|-a  Output news under output_dir/{,section/}YYYY/MM.html
           --help              Show brief help message
           --man               Full documentation
   
  @@ -282,7 +328,7 @@
   
   =item B<--archive-under output_dir>
   
  -Output news under F<output_dir/[section/]YYYY/MM.html>, where YYYY and MM are numeric
  +Output news under F<output_dir/{,section/}YYYY/MM.html>, where YYYY and MM are numeric
   values for year and month, respectively. By default, B<manage_news.pl>
   outputs the five last news to standard output.
   
  
  
  



More information about the website mailing list