Nice, relaxing Perl munging. :) Have another change to put into this

changeset - afaik, I can edit changesets with bk. So, rather than wait, I will commit this now, finish up the other code, try to alter the changeset and then push all the changes up. Docs/Support/docbook-fixup.pl: Rewrote to use a more native Perl style. Increase strictness of error checking. Simplified and optimized regular expressions. Fixed several problems with conversion. Still have some minor issues to sort out. Rewrote to accept input from stdin or from filename arg(s) on the command line. Improved speed ~6x. Docs/Support/docbook-split: Rewrote to use a more native Perl style. Increase strictness of error checking. Simplified and optimized. Rewrote to accept input from stdin or from filename arg(s) on the command line.

Nice, relaxing Perl munging. :) Have another change to put into this
changeset - afaik, I can edit changesets with bk. So, rather than wait, I will commit this now, finish up the other code, try to alter the changeset and then push all the changes up. Docs/Support/docbook-fixup.pl: Rewrote to use a more native Perl style. Increase strictness of error checking. Simplified and optimized regular expressions. Fixed several problems with conversion. Still have some minor issues to sort out. Rewrote to accept input from stdin or from filename arg(s) on the command line. Improved speed ~6x. Docs/Support/docbook-split: Rewrote to use a more native Perl style. Increase strictness of error checking. Simplified and optimized. Rewrote to accept input from stdin or from filename arg(s) on the command line.
633d54b6 · unknown · e8291479 · 633d54b6 · 633d54b6
Commit 633d54b6 authored Feb 27, 2002 by unknown
Hide whitespace changes
Inline Side-by-side

Showing with 163 additions and 182 deletions

Docs/Support/docbook-fixup.pl Docs/Support/docbook-fixup.pl +127 -120

Docs/Support/docbook-split Docs/Support/docbook-split +36 -62

No files found.
--- a/Docs/Support/docbook-fixup.pl
+++ b/Docs/Support/docbook-fixup.pl
 #!/usr/bin/perl -w
-# 2002-02-15 zak@mysql.com
-# Use -w to make perl print useful warnings about the script being run
-sub fix_underscore {
+# Fix the output of `makeinfo --docbook` version 4.0c
-  $str = shift;
+# Convert the broken docbook output to well-formed XML that conforms to the O'Reilly idiom
-  $str =~ tr/_/-/;
+# See code for detailed comments
-  return $str;
+# Authors: Arjen Lentz and Zak Greant
-};
-sub strip_emph {
+use strict;
-  $str = shift;
-  $str =~ s{<emphasis>(.+?)</emphasis>}
-           {$1}gs;
-  return $str;
-};
-print STDERR "\n--Post-processing makeinfo output--\n";
+my $data  = '';
+my @apx   = ();
+my $apx   = '';
+my @nodes = ();
+my $nodes = '';
-# 2002-02-15 zak@mysql.com
+msg ("\n-- Post-processing `makeinfo --docbook` output --");
-print STDERR "Discard DTD - ORA can add the appropriate DTD for their flavour of DocBook\n";
+msg ("** Written to work with makeinfo version 4.0c **\n");
-<STDIN>;
-print STDERR "Slurp! In comes the rest of the file. :)\n";
+msg ("Discarding DTD - not required by subsequent scripts");
-$data = join "", <STDIN>;
+# <> is a magic filehandle - either reading lines from stdin or from file(s) specified on the command line
+<>;
-# 2002-02-15 zak@mysql.com
+msg ("Create an XML PI with ISO-8859-1 character encoding");
-print STDERR "Add an XML processing instruction with the right character encoding\n";
+$data = "<?xml version='1.0' encoding='ISO-8859-1'?>";
-$data = "<?xml version='1.0' encoding='ISO-8859-1'?>" . $data;
-# 2002-02-15 zak@mysql.com
+msg ("Get the rest of the data");
-# Less than optimal - should be fixed in makeinfo
+$data = $data . join "", <>;
-print STDERR "Put in missing <bookinfo> and <abstract>\n";
-$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/gs;
-# 2002-02-15 zak@mysql.com
+msg ("Add missing <bookinfo> and <abstract> opening tags");
-print STDERR "Convert existing ampersands to escape sequences \n";
+# Note the absence of the g (global) pattern modified. This situation can only happen once.
-$data =~ s/&(?!\w+;)/&amp;/gs;
+# ...as soon as we find the first instance, we can stop looking.
+$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/;
-# 2002-02-15 zak@mysql.com
+msg ("Removing mailto: from email addresses...");
-# Need to talk to Arjen about what the <n> bits are for
+$data =~ s/mailto://g;
-print STDERR "Rework references of the notation '<n>'\n";
-$data =~ s/<(\d)>/[$1]/gs;
+msg ("Removing INFORMALFIGURE...");
+$data =~ s{<informalfigure>.+?</informalfigure>}
+          {}gs;
+msg ("Convert ampersands to XML escape sequences ");
+$data =~ s/&(?!\w+;)/&amp;/g;
-# 2002-02-15 zak@mysql.com
+msg ("Changing @@ to @...");
-# We might need to encode the high-bit characters to ensure proper representation
+$data =~ s/@@/@/g;
-# print STDERR "Converting high-bit characters to entities\n";
-# $data =~ s/([\200-\400])/&get_entity($1)>/gs;
-# There is no get_entity function yet - no point writing it til we need it :)
-print STDERR "Changing @@ to @...\n";
+msg ("Rework references of the notation '<n>'");
-$data =~ s/@@/@/gs;
+# Need to talk to Arjen about what the <n> bits are for
+$data =~ s/<(\d)>/[$1]/g;
-print STDERR "Changing '_' to '-' in references...\n";
+msg ("Changing '_' to '-' in references...");
-$data =~ s{id=\"(.+?)\"}
+$data =~ s{((?:id|linkend)=\".+?\")}
-          {"id=\"".&fix_underscore($1)."\""}gsex;
+          {&underscore2hyphen($1)}gex;
-$data =~ s{linkend=\"(.+?)\"}
-          {"linkend=\"".&fix_underscore($1)."\""}gsex;
-print STDERR "Changing ULINK to SYSTEMITEM...\n";
+msg ("Changing ULINK to SYSTEMITEM...");
-$data =~ s{<ulink url=\"(.+?)\"></ulink>}
+$data =~ s{<ulink url=\"(.+?)\">\s*</ulink>}
          {<systemitem role=\"url\">$1</systemitem>}gs;
-print STDERR "Removing INFORMALFIGURE...\n";
+msg ("Adding PARA inside ENTRY...");
-$data =~ s{<informalfigure>(.+?)</informalfigure>}
-          {}gs;
-print STDERR "Adding PARA inside ENTRY...\n";
 $data =~ s{<entry>(.*?)</entry>}
          {<entry><para>$1</para></entry>}gs;
-print STDERR "Removing mailto: from email addresses...\n";
+msg ("Fixing spacing problem with titles...");
-$data =~ s{mailto:}
+$data =~ s{(</\w+>)(\w{2,})}
-          {}gs;
+          {$1 $2}gs;
-print STDERR "Fixing spacing problem with titles...\n";
-$data =~ s{</(\w+)>(\w{2,})}
-          {</$1> $2}gs;
-# 2002-02-15 arjen@mysql.com
+msg ("Adding closing / to XREF and COLSPEC tags...");
-print STDERR "Adding closing / to XREF...\n";
+$data =~ s{<(xref|colspec) (.+?)>}
-$data =~ s{<xref (.+?)>}
+          {<$1 $2 />}gs;
-          {<xref $1 />}gs;
-# 2002-02-22 arjen@mysql.com
+# Probably need to strip these
-print STDERR "Adding \"See \" to XREFs that used to be \@xref...\n";
+msg ('Adding "See " to XREFs that used to be @xref...');
-$data =~ s{([\.\'\!\)])[\n ]*<xref }
+$data =~ s{([.'!)])\s*<xref }
          {$1 See <xref }gs;
-# 2002-02-22 arjen@mysql.com
+msg ('Adding "see " to (XREFs) that used to be (@pxref)...');
-print STDERR "Adding \"see \" to (XREFs) that used to be (\@pxref)...\n";
+$data =~ s{([([,;])(\s*)<xref }
-$data =~ s{(\(|[[,;])([\n]*[ ]*)<xref }
          {$1$2see <xref }gs;
-# 2002-01-30 arjen@mysql.com
+msg ("Making first row in table THEAD...");
-print STDERR "Removing COLSPEC...\n";
+$data =~ s{( *)<tbody>(\s*<row>.+?</row>)}
-$data =~ s{\n *<colspec colwidth=\"[0-9]+\*\">}
+          {$1<thead>$2\n$1</thead>\n$1<tbody>}gs;
-          {}gs;
-# 2002-01-31 arjen@mysql.com
-print STDERR "Making first row in table THEAD...\n";
-$data =~ s{([ ]*)<tbody>\n([ ]*<row>(.+?)</row>)}
-          {$1<thead>\n$2\n$1</thead>\n$1<tbody>}gs;
-# 2002-01-31 arjen@mysql.com
+msg ("Removing EMPHASIS inside THEAD...");
-print STDERR "Removing EMPHASIS inside THEAD...\n";
 $data =~ s{<thead>(.+?)</thead>}
-          {"<thead>".&strip_emph($1)."</thead>"}gsex;
+          {"<thead>".&strip_tag($1, 'emphasis')."</thead>"}gsex;
-# 2002-01-31 arjen@mysql.com
+msg ("Removing empty PARA...");
-print STDERR "Removing lf before /PARA in ENTRY...\n";
+$data =~ s{<para>\s*</para>}
-$data =~ s{(<entry><para>(.+?))\n(</para></entry>)}
+          {}gs;
-          {$1$3}gs;
-# 2002-01-31 arjen@mysql.com (2002-02-15 added \n stuff)
-print STDERR "Removing whitespace before /PARA if not on separate line...\n";
-$data =~ s{([^\n ])[ ]+</para>}
-          {$1</para>}gs;
-# 2002-01-31 arjen@mysql.com
+msg ("Removing lf before /PARA in ENTRY...");
-print STDERR "Removing empty PARA in ENTRY...\n";
+$data =~ s{\n(</para></entry>)}
-$data =~ s{<entry><para></para></entry>}
+          {$1}gs;
-          {<entry></entry>}gs;
-# 2002-01-31 arjen@mysql.com
+msg ("Removing whitespace before /PARA if not on separate line...");
-print STDERR "Removing PARA around INDEXENTRY if no text in PARA...\n";
+$data =~ s{(\S+)[\t ]+</para>}
-$data =~ s{<para>((<indexterm role=\"(cp|fn)\">(<(primary|secondary)>[^<]+?</(primary|secondary)>)+?</indexterm>)+?)[\n]*</para>[\n]*}
+          {$1</para>}g;
-          {$1\n}gs;
-# -----
+msg ("Removing PARA around INDEXTERM if no text in PARA...");
+$data =~ s{<para>((?:<indexterm role=\"(?:cp|fn)\">(?:<(primary|secondary)>[^>]+</\2>)+?</indexterm>)+?)\s*</para>}
+          {$1}gs;
-@apx = ("Users", "MySQL Testimonials", "News",
+@apx = ("Users", "MySQL Testimonials", "News", "GPL-license", "LGPL-license");
-        "GPL-license", "LGPL-license");
 foreach $apx (@apx) {
-  print STDERR "Removing appendix $apx...\n";
+    msg ("Removing appendix $apx...");
-  $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>}
+    $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>}
-            {}gs;
+              {}gs;
-  print STDERR " ... Building list of removed nodes ...\n";
+    # Skip to next appendix regex if the regex did not match anything
-  foreach(split "\n", $&) {
+    next unless (defined $&);
-    push @nodes, $2 if(/<(\w+) id=\"(.+?)\">/)
-  };
+    msg ("...Building list of removed nodes...");
-};
+    # Split the last bracketed regex match into an array
+    # Extract the node names from the tags and push them into an array
+    foreach (split "\n", $&) {
+        push @nodes, $1 if /<\w+ id=\"(.+?)\">/
+    }
+}
 # 2002-02-22 arjen@mysql.com (added fix " /" to end of regex, to make it match)
-print STDERR "Fixing references to removed nodes...\n";
+msg ("Fixing references to removed nodes...");
-foreach $node (@nodes) {
+# Merge the list of node names into a set of regex alternations
-  $web = $node;
+$nodes = join "|", @nodes;
-  $web =~ s/[ ]/_/;
-  $web = "http://www.mysql.com/doc/" .
+# Find all references to removed nodes and convert them to absolute URLs
-         (join "/", (split //, $web)[0..1])."/$web.html";
+$data =~ s{<\w+ linkend="($nodes)" />}
-  print STDERR "$node -> $web\n";
+          {&xref2link($1)}ges;
-  $data =~ s{<(\w+) linkend=\"$node\" />}
-            {$web}gs;
-};
 print STDOUT $data;
+exit;
+#
+# Definitions for helper sub-routines
+#
+sub msg {
+    print STDERR shift, "\n";
+}
+sub strip_tag($$) {
+    (my $str, my $tag) = @_;
+    $str =~ s{<$tag>(.+?)</$tag>}{$1}gs;
+    return $str;
+}
+sub underscore2hyphen($) {
+    my $str = shift;
+    $str =~ tr/_/-/;
+    return $str;
+}
+sub xref2link {
+    my $ref = shift;
+    $ref =~ tr/ /_/;
+    $ref =~ s{^((.)(.).+)$}{$2/$3/$1.html};
+    return "http://www.mysql.com/doc/" . $ref;
+}
+# We might need to encode the high-bit characters to ensure proper representation
+# msg ("Converting high-bit characters to entities");
+# $data =~ s/([\200-\400])/&get_entity($1)>/gs;
+# There is no get_entity function yet - no point writing it til we need it :)
--- a/Docs/Support/docbook-split
+++ b/Docs/Support/docbook-split
-#! /usr/local/bin/perl
+#! /usr/bin/perl -w
 # O'Reilly's Perl script to chop mysql.xml into separate ch/apps/index files.
 # The indexes are actually not used, they're created straight from the xrefs.
-use strict;
 # Breaks the MySQL reference manual into chapters, appendices, and indexes.
-my $input_file;
+use strict;
-my $directory;
-my $chap_num;
-my $app_letter;
-my $start_text;
-my $line;
-my $input_file;
-my $output_name;
-$input_file = "mysql.xml";
-$directory="chaps_apps_index";
-$chap_num=1;       # Start chapter numbers at one (there is no preface)
-$app_letter="a";   # Start appendix letters at "a"
-$start_text="";
-$line="";
-open (INPUT_FILE, '<' . $input_file) or die "Cannot open $input_file";
-if (-d $directory) {
-    my $unlinked = unlink <$directory/*>;
-    printf(Removed "%d files\n", $unlinked);
-}
-else {
-    mkdir $directory or die "Cannot make $directory subdirectory";
-}
-while (1) {
+my $app_letter      = "a";                                  # Start appendix letters at "a"
+my $chap_num        = 1;                                    # Start chapter numbers at one (there is no preface)
+my $directory       = "chaps_apps_index";
+my $ext             = ".xml";
+my $line            = "";
+my $output_name     = "";
+my $start_text      = "";
-    # Terminating statement for loop.
+mkdir $directory unless -d $directory;
-    exit if not defined $line;
-    if ($line =~ /(?:.*)(<chapter.*)/i ) {
+while (defined $line) {
+    if ($line =~ /(<chapter.+)/i ) {
        $start_text = $1;
-        $output_name = &make_chapter_name($chap_num);
+        $output_name = sprintf("ch%02d%s", $chap_num, $ext);
-        $chap_num++;
+        ++$chap_num;
        &process_file("chapter");
    }
-    elsif ($line =~ /(?:.*)(<appendix.*)/i ) {
+    elsif ($line =~ /(<appendix.+)/i ) {
        $start_text = $1 ;
-        $output_name = &make_appendix_name($app_letter);
+        $output_name = "app$app_letter$ext";
-        $app_letter++;
+        ++$app_letter;
        &process_file("appendix");
    }
-    elsif ($line =~ /(?:.*)(<index\s+id=")(.*?)(">.*)/i ) {
+    elsif ($line =~ /(<index\s+id=")(.*?)(">.*)/i ) {
        $start_text = $1 . $2 . $3;
-        $output_name = lc($2) . ".xml";
+        $output_name = lc($2) . $ext;
        &process_file("index");
    }
    else {
-        # Automatically skips junk in between chapters, appendices,
+        # Skip junk in between chapters, appendices and indexes.
-        # and indexes.
+        $line = <>;
-        $line = <INPUT_FILE>;
    }
 }
-sub make_chapter_name {
+sub process_file {
-    my $num = shift;
+    my $marker  = shift;
-    my $name = "ch" . sprintf("%02d", $num) . ".xml";
+    my $path    = "$directory/$output_name";
-    return $name;
-}
-sub make_appendix_name {
+    open (OUTPUT_FILE, ">$path") or die "Cannot open $path";
-    my $letter = shift;
-    my $name = "app" . sprintf("%s", $letter) . ".xml";
-    return $name;
-}
-sub process_file {
+    print STDERR "Creating $path\n";
-    my $marker=shift;
-    open (OUTPUT_FILE, '>' . $directory . "/" . $output_name) or
+    # Print out XML PI
-        die "Cannot open $output_name";
+    print OUTPUT_FILE "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
    # Print whatever happened to appear at the end of the previous chapter.
-    print OUTPUT_FILE $start_text . "\n" if $start_text;
+    print OUTPUT_FILE "$start_text\n" if $start_text;
-    while (1) {
-        $line = <INPUT_FILE>;
+    while (defined $line) {
-        exit if not defined $line;
+        $line = <>;
        # Note: Anything after the terminating marker is lost, just like
        # lines in between chapters.
        if ($line =~ /(.*<\/\s*$marker\s*>)/i ) {
-            print OUTPUT_FILE $1 . "\n" if $1;
+            print OUTPUT_FILE "$1\n" if $1;
            close OUTPUT_FILE;
            return;
        }