#!/usr/bin/perl # # Author: Petter Reinholdtsen # Date: 2004-06-01 # # License: GPL v2 or later at your choice # # Test the content of GNU libc locales, detect some common errors. # # The latest version is available from # . use warnings; use strict; use vars qw($locale $warncount $errcount); sub uxx_to_utf8 { my $line = shift; $line =~ s//pack('U',hex($1))/ge; return $line; } sub error { my $msg = shift; $errcount++; print "error: $locale: $msg\n"; } sub warning { my $msg = shift; $warncount++; print "warning: $locale: $msg\n"; } sub check_lc_identification { my @lines = @_; for my $line (@lines) { if ($line =~ m/^category\s+(\S+)$/) { warning "LC_IDENTIFICATION: missing quotes around category standard ref: $1" if ($1 !~ m/\".+\"/); } if ($line =~ m/^email\s+(\S+)\s*$/) { my $email = $1; warning "LC_IDENTIFICATION: obsolete email: $email" if ($email =~ m/"?bug-glibc\@gnu.org"?/); } } } sub check_lc_paper { my @lines = @_; my $height = undef; my $width = undef; for (@lines) { $height = $1 if (m/^height\s*(\d+)\s*$/); $width = $1 if (m/^width\s*(\d+)\s*$/); return if (m/^copy\s+/); # Nothing to check } if (!defined $width || defined $height) { # warning "LC_PAPER: Missing height or width."; return; } if (210 == $width && 297 == $height) { # ISO A4 } elsif (216 == $width && 279 == $height) { # US Letter } else { warning "LC_PAPER: unknown paper size."; } } sub check_lc_measurement { my @lines = @_; for my $line (@lines) { if ($line =~ m/^\s*(measurement)\s+(\S+)\s*$/) { my $value = $2; if (defined $value && $value !~ m/^\d+$/) { warning "LC_MEASUREMENT: measurements should be number 1 or 2."; } elsif ($value < 1 && 2 < $value) { warning "LC_MEASUREMENT: measurements should be 1 or 2."; } } } } sub check_lc_numeric { my @lines = @_; for my $line (@lines) { next if ($line eq "LC_NUMERIC" || $line eq "END LC_NUMERIC"); next if ($line =~ m/^$/); if ($line =~ m/^\s*(grouping)\s+(\S+)\s*$/) { my $value = $2; if ($value =~ m/^-?\d+$/) { # Only digits (or - digits) if ( $value < 1 && $value != -1) { warning "LC_NUMERIC: grouping should positive or -1: $value"; } } else { if (defined $value && $value !~ m/\d+;\d+/) { warning "LC_NUMERIC: grouping should use ; as separator: $value"; } } } elsif ($line =~ m/^\s*(decimal_point)\s+(\S+)\s*$/) { } elsif ($line =~ m/^\s*(thousands_sep)\s+(\S+)\s*$/) { my $sep = $2; # print "P: '$sep'\n"; warning "LC_NUMERIC: Unusual thousands_sep '$sep' [". uxx_to_utf8($sep)."]" unless (grep { $_ eq $sep; } ('""', '""', '""', '""', '""', '""')); } elsif ($line =~ m/^\s*(copy)\s+(\S+)\s*$/) { } else { warning "LC_NUMERIC: Unknown keyword '$line'"; } } } sub check_lc_messages { my @lines = @_; for (@lines) { if (m/^\s*(yesexpr|noexpr)\s+(.+)$/) { my $type = $1; my $regex = uxx_to_utf8($2); unless ($regex =~ m/^"\^/) { error "LC_MESSAGES: $type missing '^' prefix: $regex"; } unless ($regex =~ m/\[.+\]|$.+$/) { error "LC_MESSAGES: $type missing '[.+]|(.+)' content: $regex"; } if ($regex =~ m/\.\*"$/) { warning "LC_MESSAGES: $type have '.*' postfix: $regex"; } if ($regex =~ m/[0-9]/) { warning "LC_MESSAGES: $type have numbers in regex: $regex"; } if ($type eq "yesexpr" && ($regex !~ m/y/ || $regex !~ m/Y/)) { warning "LC_MESSAGES: $type missing 'yY' in content: $regex"; } if ($type eq "noexpr" && ($regex !~ m/n/ || $regex !~ m/N/)) { warning "LC_MESSAGES: $type missing 'nN' in content: $regex"; } } } } sub check_order { my @blocks = @_; my @order= qw(LC_IDENTIFICATION LC_CTYPE LC_COLLATE LC_MONETARY LC_NUMERIC LC_TIME LC_MESSAGES LC_PAPER LC_NAME LC_ADDRESS LC_TELEPHONE LC_MEASUREMENT); my $pos = 0; for my $section (@blocks) { if ($section eq $order[$pos]) { $pos++; } else { $pos = 0; $pos++ while ($order[$pos] ne $section); warning "$section: not following section $order[$pos-1]"; } } } sub check_charset { my @blocks = @_; for my $section (@blocks) { if ($section =~ m/^%\s*[Cc]arset:\s*$/) { return; } } warning "Missing '% Charset: ' info"; } my $section; my %sections; my @blocks = (); for my $filename (@ARGV) { open (FILE, "<$filename") || die "Unable to read $filename"; $locale = $filename; my $buf = undef; my @lines; %sections = (); while () { chomp; s/%.*$//; # Remove comments s/\#.*$//; if (m%^(.+)/$%) { $buf .= $1; next; } if (! defined $buf) { $buf = $_; } else { $buf .= $_; } if (!$section && m/^(LC_.*)\s*$/) { $section = $1; @lines = (); #print "Found section $section\n"; } push(@lines, $buf); if (m/^END (LC_.+)\s*$/) { if (exists $sections{$section}) { warning "duplicate section $section"; } push(@blocks, $section); $sections{$section} = [@lines]; $section = ""; #print "Stored section $section\n"; } undef $buf; } close(FILE); # check_order(@blocks); check_charset(@blocks); check_lc_identification(@{$sections{'LC_IDENTIFICATION'}}); check_lc_messages(@{$sections{'LC_MESSAGES'}}); check_lc_numeric(@{$sections{'LC_NUMERIC'}}); check_lc_paper(@{$sections{'LC_PAPER'}}); check_lc_measurement(@{$sections{'LC_MEASUREMENT'}}); }