Скачиваний:
45
Добавлен:
14.08.2013
Размер:
1.45 Кб
Скачать
#!perl
# Mati Pentus, 2000 June 16, 2000 July 13
# Converts Esperanto x-characters to UTF-8 in XML text nodes
# (but not in attributes).

use strict;
use XML::Parser::Expat;

sub dos2utf
{
s/([\x80-\xaf\xe0-\xf1])/
$1 eq "\xf0" ? "\xd0\x81" :
$1 eq "\xf1" ? "\xd1\x91" :
ord($1) < 0xe0 ? "\xd0".chr(ord($1)+16) :
"\xd1".chr(ord($1)-96)
/eg;
}

sub utfchr
{
my ($code) = $_;
return $_ if $code < 0x80;
return '?' if $code >= 0x800;
return chr(0xc0 + $_ / 64).chr(0x80 + $_ % 64);
}

my @xmap = map chr($_).'x', 0..255;
@xmap[map ord, qw(C c G g H h J j S s U u)] = map utfchr, 264, 265, 284, 285,
292, 293, 308, 309, 348, 349, 364, 365;

sub ch
{
local $_ = $_[0]->original_string();
dos2utf();
s/(.)x/$xmap[ord($1)]/eg;
print TO $_;
}

sub dh
{
local $_ = $_[0]->original_string();
dos2utf();
print TO $_;
}

sub xh
{
print TO "<?xml version=\x22$_[1]\x22?>\n\n";
}

my $parser = new XML::Parser::Expat;
$parser->setHandlers('Start' => \&dh,
'End' => \&dh,
'XMLDecl' => \&xh,
'Char' => \&ch);
open FROM, "<$ARGV[0]" or die "Failed to open $ARGV[0]\n$!";
open TO, ">$ARGV[1]" or die "Failed to write $ARGV[1]\n$!";
$parser->parse(*FROM);
print TO "\n";
close TO;
close FROM;
Соседние файлы в папке ll_src