#!/usr/bin/perl -w
# sitegen.pl populates a website's pages with drop-down menus.
# this script relies on these two custom Meta tags:
#
#
# the "group" tag should be in each page
# the "groupTitle" tag only needs to be in
# one of each group's pages.
# This scheme also relies on pages consistently having page titles,
# and unique page titles within each group's pages.
# set group to "NOSHOW" for pages not to appear in the menus.
# also, each named group must have at least one associated page
# so it can be displayed in the menus
# if an argument of "-z" is provided to this script,
# it will remove all its content from the
# set of Web pages
# WARNING: this script replaces pages en masse.
# ALWAYS maintain a full backup of your page set.
# Copyright (c) 2005, P. Lutus, released under the GPL.
# BEGIN user settings
$source = ($ARGV[0])?$ARGV[0]:"/path/to/Web/pages";
$plainTextIndexPath = "/path/siteIndex.txt";
$rightArrowPath = "$source/images/rightarrow.png";
$leftArrowPath = "$source/images/leftarrow.png";
# END user settings
$startBlockTag = "";
$endBlockTag = "";
$idTag = "";
$scriptTag =
"\n";
use File::Find;
sub processDir {
my $path = $File::Find::name;
# if it's a file
if ( -f $path ) {
# if it has an apparent HTML/PHP page suffix
# and doesn't have a tilde at the start of its name
if ( $path =~ /\.(htm[l|]|php)$/ && !( $path =~ /\/\~.*/ ) ) {
push( @fileList, $path );
}
}
}
sub readFile {
local $/;
open( FILE, $_[0] );
my $data = ;
close FILE;
return $data;
}
sub writeFile {
open( FILE, ">$_[0]" );
print FILE $_[1];
close FILE;
}
sub readMetaTag {
my ( $dataRef, $tag ) = @_;
my ($value) =
${$dataRef} =~ /meta\s*name\s*=\s*\"$tag\"\s*content\s*=\s*\"(.*?)\"/is;
return $value || "";
}
sub readPageTitle {
my $dataRef = $_[0];
my ($title) = ${$dataRef} =~ /\s*(.*?)\s*<\/title>/is;
if ( !$title ) {
$path = $_[1];
if ( !$path =~ /\.php$/i ) {
print "Error: HTML page without a title: \"$path\"\n";
}
$title = "No Title";
}
return $title;
}
# relative path for HTML links
sub makeRelPath {
my ( $from, $to ) = @_;
my @fromlist = split( "/", $from );
my @tolist = split( "/", $to );
# are the paths identical?
if ( $from eq $to ) {
$result = pop @tolist;
}
else {
# drop path elements until unequal
while ( $fromlist[0] eq $tolist[0] ) {
shift @fromlist;
shift @tolist;
}
# create relative path from -> to
$result = "../" x $#fromlist;
$result .= join( "/", @tolist );
}
return $result;
}
# use ' members' and ' grouptitle'
# (note leading spaces)
# to assure that these items appear
# before the subgroup list
sub create_siteindex {
my @fileList = @{ $_[0] };
for my $file (@fileList) {
my $data = readFile($file);
my $groupName = readMetaTag( \$data, "group" );
my $groupTitle = readMetaTag( \$data, "groupTitle" );
my $pageTitle = readPageTitle( \$data, $file );
if ( length($groupName) > 0 ) {
$hasgroup++;
if ( $groupName eq "NOSHOW" ) {
# exclude these pages
$noshow++;
}
else {
my $tree = $_[1];
my @items = split( "/", $groupName );
for my $item (@items) {
$tree->{$item} = {} unless defined $tree->{$item};
$tree = $tree->{$item};
}
$tree->{' members'}{$pageTitle} = $file;
if ( length($groupTitle) > 0 ) {
$tree->{' grouptitle'} = $groupTitle;
}
}
}
$total++;
}
}
# just for debugging
sub debug_dump_hash {
my ( $treeRef, $path ) = @_;
for my $key ( sort keys %{$treeRef} ) {
my $value = $treeRef->{$key};
if ( ref($value) eq "HASH" ) {
debug_dump_hash( \%{$value}, "$path|$key" );
}
else {
print "$path|$key = $value\n";
}
}
}
sub makeSiteIndexBlock {
my ( $parentRef, $keysetRef, $treeRef, $i, $sourcePath ) = @_;
my $result = $startBlockTag . "\n" . $idTag . "\n
";
for ( $j = 1 ; $j <= $i ; $j++ ) {
my $destPath = $pagePaths[$j];
my $destName = $groupTitles[$j];
$result .=
"
\n" . $endBlockTag;
}
# process all the member pages of a named group
sub process_group {
my ( $parentRef, $keysetRef, $treeRef, $i ) = @_;
my $groupPath = "";
my $pagePath = "";
for ( $j = 1 ; $j <= $i ; $j++ ) {
$groupPath .= "$groupTitles[$j]|";
$pagePath .= "$pagePaths[$j]|";
}
# $key = page title, $value = page path
for my $key ( @{$keysetRef} ) {
my $value = $treeRef->{$key};
print INDEXFILE "$groupPath$key\t$value\n";
my $pageData = readFile($value);
$changed = 0;
if ($zap) { # if only remove index content
if ( $pageData =~ /$startBlockTag/ ) {
$pageData =~ s/($startBlockTag.*?$endBlockTag\s*)//isg;
$changed = 1;
}
}
else {
# the site index block is unique to each page
# because it uses relative page addressing
# so it must be regenerated for each page
my $topIndexBlock =
makeSiteIndexBlock( $parentRef, $keysetRef, $treeRef, $i,
$value );
my $bottomIndexBlock = $topIndexBlock;
# the bottom index doesn't need the Javascript code
$bottomIndexBlock =~ s/\n//sg;
# get the original blocks from the page data
my ( $originalTop, $originalBottom ) =
$pageData =~ /($startBlockTag.*?$endBlockTag)/isg;
# test: have there been any changes?
# if not, there's no need to rewrite this page
$changed =
( !$originalTop
|| !$originalBottom
|| $topIndexBlock ne $originalTop
|| $bottomIndexBlock ne $originalBottom ) ? 1 : 0;
if ($changed) {
# zap existing blocks
if ( $pageData =~ /$startBlockTag/ ) {
$pageData =~ s/($startBlockTag.*?$endBlockTag\s*)//isg;
}
# put in new blocks
$pageData =~ s/()(\s*)/$1\n$topIndexBlock\n/is;
$pageData =~
s/(.*?)(\s*)(<\/body.*?>)/$1\n$bottomIndexBlock\n$3/is;
}
}
if ($changed) {
writeFile( $value, $pageData );
$changedPages++;
}
}
}
sub process_tree {
my ( $treeRef, $i ) = @_;
for my $key ( sort keys %{$treeRef} ) {
my $value = $treeRef->{$key};
if ( ref($value) eq "HASH" ) {
if ( $key eq " members" ) {
# it's a hash of member pages
@keyset = ( sort keys %{$value} );
my $firstkey = $keyset[0];
my $firstvalue = $value->{$firstkey};
$pageTitles[$i] = $firstkey;
$pagePaths[$i] = $firstvalue;
process_group( $treeRef, \@keyset, $value, $i );
}
else {
# it's a subgroup hash
$groupNames[$i] = $key;
process_tree( \%{$value}, $i + 1 );
}
}
else {
# it contains the group's title
if ( $key eq " grouptitle" ) {
$groupTitles[$i] = $value;
}
}
}
}
# the zap option removes all this script's
# content from the page set
$zap = ( $ARGV[0] && $ARGV[0] eq "-z" ) ? 1 : 0;
@fileList = ();
find( { wanted => \&processDir, no_chdir => 1 }, $source );
%tree = ();
$hasgroup = 0;
$total = 0;
$noshow = 0;
$changedPages = 0;
create_siteindex( \@fileList, \%tree );
@groupTitles = ();
@groupNames = ();
@pageTitles = ();
@pagePaths = ();
# debug_dump_hash(\%tree,"");
open( INDEXFILE, ">$plainTextIndexPath" );
process_tree( \%tree, 0 );
close INDEXFILE;
print "total pages = $total, changed = $changedPages, noshow = $noshow\n";
# use this for "if ./sitegen.pl; then do something; fi
# when pages are changed
exit( $changedPages == 0 );