#!/usr/bin/perl
#
# rm_ids.pl
#
# Eugene Eric Kim <eekim@eekim.com>
# http://www.eekim.com/software/purple/
#
# $Id: rm_ids.pl,v 1.4 2001/05/04 02:46:20 eekim Exp $
#
# Copyright (c) Eugene Eric Kim 2000-2001.  All rights reserved.
# See COPYING for licensing terms.

=head1 NAME

rm_ids.pl - Remove statement IDs and hierarchical addresses from a
purple.dtd XML file.

=head1 SYNOPSIS

Usage:
  rm_ids.pl -r rules.purple file.xml

=head1 DESCRIPTION

Parses the XML file conforming to purple.dtd, and removes all SIDs and
hierarchical addresses.  Saves the original XML file to file.xml~.

This script is not very robust.  It doesn't validate the XML file, and
it doesn't handle errors well.  So be careful.

=cut

use strict;
use File::Copy;
use File::IO;
use Getopt::Long;
use XML::DOM;

my $rules_file;
GetOptions('r=s'=>\$rules_file);

my $xml_file = $ARGV[0];
if (!$xml_file) {
  print <<EOM;
Usage:
  $0 [-r rules.purple] file.xml

where file.xml is an XML file conforming to purple.dtd.
EOM
  exit;
}
File::Copy::move($xml_file, "$xml_file~");
&XML::DOM::setTagCompression(\&my_tag_compression);
my $parser = new XML::DOM::Parser;
my $doc = $parser->parsefile("$xml_file~");

### element rules

# default values for purple.dtd
my $tag_lastsid = 'lastsid';
my @tags_with_sids = ('h','p','item','example','figure');

# override default rules with values in rules file, if it exists
if (-e $rules_file) {
  my $fh = new IO::File $rules_file;
  if (defined $fh) {
    undef $tag_lastsid;
    undef %tags_with_sids;
    while (my $line = <$fh>) {
      chomp $line;
      if ($line =~ /^([A-Z_]+)=([A-Za-z,]+)$/) {
        my $var_name = $1;
        my $var_value = $2;
        if ($var_name eq 'LAST_SID') {
          $tag_lastsid = $var_value;
        }
        elsif ($var_name eq 'TAGS_WITH_SIDS') {
          @tags_with_sids = split(',', $var_value);
        }
      }
    }
  }
  $fh->close;
}

### remove SIDs and HIDs

foreach my $tag (@tags_with_sids) {
  &remove_ids($tag);
}
&remove_lastsid if ($tag_lastsid);

### print to file

$doc->printToFile("$xml_file");

# fini

### subroutines

sub remove_ids {
  my $element_name = shift;
  my @nodes = $doc->getElementsByTagName($element_name);

  foreach my $node (@nodes) {
	$node->removeAttribute('sid');
	$node->removeAttribute('hid');
  }
}

sub remove_lastsid {
  my @nodes = $doc->getElementsByTagName($tag_lastsid);
  my $node = ($nodes[0]->getChildNodes)[0];

  $nodes[0]->removeChild($node);
}


sub my_tag_compression {
  my ($tag, $elem) = @_;

  return 1 if ($tag =~ /^(lastsid|p)$/);
  return 0;
}

=head1 AUTHOR

Eugene Eric Kim <eekim@eekim.com>

=cut

