#! /usr/bin/perl

use strict;
use Unicode::String;
use Unicode::Map();
use vars qw($LOCALE_CODE $Map);

$LOCALE_CODE = 'GB2312';
$Map = new Unicode::Map($LOCALE_CODE);

while (<>) {
    chomp;
    if (/^(.*\sname=")([^\042]*)(".*)$/) {
	my ($pre, $name, $post) = ($1, $2, $3);
	print $pre, &convert_old2new($name), $post, "\n";
    } else {
	print $_, "\n";
    }
}

exit 0;

sub convert_old2new {
    my $locale_string = &decode_old_format_string(shift);
    my $newstr_utf16 = $Map->to_unicode($locale_string);
    my $newstr = Unicode::String->new;
    $newstr->utf16($newstr_utf16);
    my $output_raw = $newstr->utf8;
    my $output = '';
    foreach my $c (split //, $output_raw) {
	if (ord($c) < 0x20 || ord($c) > 0x7f || $c eq '&' ||
	    $c eq '<' || $c eq '>' || $c eq '%' || $c eq '"') {
	    $output .= sprintf("%%%02X", ord($c));
	} else {
	    $output .= $c;
	}
    }
    return $output;
}

sub decode_old_format_string {
    my $s = shift;
    my @c = ();
    while ($s =~ /^(.*?)&\043(\d+);(.*)$/) {
	push(@c, (split //, $1));
	push(@c, pack('C', $2));
	$s = $3;
    }
    push(@c, (split //, $s)) if $s ne '';

    my $r = '';
    my @r = ();
    for (my $i = 0; $i <= $#c; $i++) {
	my $n = ord($c[$i]);
	if ($n < 0x80) {
	    push @r, $c[$i];
	} elsif ($n >= 0xc0 && $n <= 0xdf) {
	    my $n1 = ($n & 0x1f) << 6;
	    $n1 |= (ord($c[$i+1]) & 0x3f);
	    push @r, pack('C', $n1);
	}
    }
    return join("", @r);
}

# end of conversion script
