#!/usr/bin/perl
#
# Author: Petter Reinholdtsen
# Date: 2004-06-01
#
# License: GPL v2 or later at your choice
#
# Test the content of GNU libc locales, detect some common errors.
#
# The latest version is available from
# <URL:http://www.hungry.com/~pere/linux/glibc/>.

use warnings;
use strict;

use vars qw($locale $warncount $errcount);

sub uxx_to_utf8 {
    my $line = shift;
    $line =~ s/<U([0-9A-Za-z]{4})>/pack('U',hex($1))/ge;
    return $line;
}

sub error {
    my $msg = shift;
    $errcount++;
    print "error: $locale: $msg\n";
}

sub warning {
    my $msg = shift;
    $warncount++;
    print "warning: $locale: $msg\n";
}

sub check_lc_identification {
    my @lines = @_;
    for my $line (@lines) {
        if ($line =~ m/^category\s+(\S+)$/) {
            warning "LC_IDENTIFICATION: missing quotes around category standard ref: $1"
                if ($1 !~ m/\".+\"/);
        }
        if ($line =~ m/^email\s+(\S+)\s*$/) {
            my $email = $1;
            warning "LC_IDENTIFICATION: obsolete email: $email"
                if ($email =~ m/"?bug-glibc\@gnu.org"?/);
        }
    }
}
sub check_lc_paper {
    my @lines = @_;
    my $height = undef;
    my $width = undef;
    for (@lines) {
        $height = $1 if (m/^height\s*(\d+)\s*$/);
        $width = $1 if (m/^width\s*(\d+)\s*$/);
        return if (m/^copy\s+/); # Nothing to check
    }

    if (!defined $width || defined $height) {
#       warning "LC_PAPER: Missing height or width.";
        return;
    }

    if (210 == $width && 297 == $height) {      # ISO A4
    } elsif (216 == $width && 279 == $height) { # US Letter
    } else {
        warning "LC_PAPER: unknown paper size.";
    }
}

sub check_lc_measurement {
    my @lines = @_;
    for my $line (@lines) {
        if ($line =~ m/^\s*(measurement)\s+(\S+)\s*$/) {
            my $value = $2;
            if (defined $value && $value !~ m/^\d+$/) {
                warning "LC_MEASUREMENT: measurements should be number 1 or 2.";
            } elsif  ($value < 1 && 2 < $value) {
                warning "LC_MEASUREMENT: measurements should be 1 or 2.";
            }
        }
    }
}

sub check_lc_numeric {
    my @lines = @_;
    for my $line (@lines) {
        next if ($line eq "LC_NUMERIC" || $line eq "END LC_NUMERIC");
        next if ($line =~ m/^$/);
        if ($line =~ m/^\s*(grouping)\s+(\S+)\s*$/) {
            my $value = $2;
            if ($value =~ m/^-?\d+$/) {
                # Only digits (or - digits)
                if ( $value < 1 && $value != -1) {
                    warning "LC_NUMERIC: grouping should positive or -1: $value";
                }
            } else {
                if (defined $value && $value !~ m/\d+;\d+/) {
                    warning "LC_NUMERIC: grouping should use ; as separator: $value";
                }
            }
        } elsif ($line =~ m/^\s*(decimal_point)\s+(\S+)\s*$/) {
        } elsif ($line =~ m/^\s*(thousands_sep)\s+(\S+)\s*$/) {
            my $sep = $2;
#           print "P: '$sep'\n";
            warning "LC_NUMERIC: Unusual thousands_sep '$sep' [".
                uxx_to_utf8($sep)."]"
                unless (grep { $_ eq $sep; }  ('"<U0020>"',
                                               '"<U0027>"',
                                               '"<U002C>"',
                                               '"<U002E>"',
                                               '"<U00A0>"',
                                               '""'));
        } elsif ($line =~ m/^\s*(copy)\s+(\S+)\s*$/) {
        } else {
            warning "LC_NUMERIC: Unknown keyword '$line'";
        }
    }
}

sub check_lc_messages {
    my @lines = @_;
    for (@lines) {
        if (m/^\s*(yesexpr|noexpr)\s+(.+)$/) {
            my $type  = $1;
            my $regex = uxx_to_utf8($2);
            unless ($regex =~ m/^"\^/) {
                error "LC_MESSAGES: $type missing '^' prefix: $regex";
            }
            unless ($regex =~ m/\[.+\]|\(.+\)/) {
                error "LC_MESSAGES: $type missing '[.+]|(.+)' content: $regex";
            }
            if ($regex =~ m/\.\*"$/) {
                warning "LC_MESSAGES: $type have '.*' postfix: $regex";
            }
            if ($regex =~ m/[0-9]/) {
                warning "LC_MESSAGES: $type have numbers in regex: $regex";
            }
            if ($type eq "yesexpr" && ($regex !~ m/y/ ||
                                       $regex !~ m/Y/)) {
                warning "LC_MESSAGES: $type missing 'yY' in content: $regex";
            }
            if ($type eq "noexpr" && ($regex !~ m/n/ ||
                                      $regex !~ m/N/)) {
                warning "LC_MESSAGES: $type missing 'nN' in content: $regex";
            }
        }
    }
}

sub check_order {
    my @blocks = @_;
    my @order= qw(LC_IDENTIFICATION
                  LC_CTYPE
                  LC_COLLATE
                  LC_MONETARY
                  LC_NUMERIC
                  LC_TIME
                  LC_MESSAGES
                  LC_PAPER
                  LC_NAME
                  LC_ADDRESS
                  LC_TELEPHONE
                  LC_MEASUREMENT);
    my $pos = 0;
    for my $section (@blocks) {
        if ($section eq $order[$pos]) {
            $pos++;
        } else {
            $pos = 0;
            $pos++ while ($order[$pos] ne $section);
            warning "$section: not following section $order[$pos-1]";
        }
    }
}

sub check_charset {
    my @blocks = @_;
    for my $section (@blocks) {
        if ($section =~ m/^%\s*[Cc]arset:\s*$/) {
            return;
        }
    }
    warning "Missing '% Charset: <charset>' info";
}

my $section;
my %sections;
my @blocks = ();
for my $filename (@ARGV) {
    open (FILE, "<$filename") || die "Unable to read $filename";
    $locale = $filename;
    my $buf = undef;
    my @lines;
    %sections = ();
    while (<FILE>) {
        chomp;
        s/%.*$//; # Remove comments
        s/\#.*$//;
        if (m%^(.+)/$%) {
            $buf .= $1;
            next;
        }
        if (! defined $buf) {
            $buf = $_;
        } else {
            $buf .= $_;
        }
        if (!$section && m/^(LC_.*)\s*$/) {
            $section = $1;
            @lines = ();
            #print "Found section $section\n";
        }
        push(@lines, $buf);
        if (m/^END (LC_.+)\s*$/) {
            if (exists $sections{$section}) {
                warning "duplicate section $section";
            }
            push(@blocks, $section);
            $sections{$section} = [@lines];
            $section = "";
            #print "Stored section $section\n";
        }
        undef $buf;
    }
    close(FILE);

#    check_order(@blocks);
    check_charset(@blocks);
    check_lc_identification(@{$sections{'LC_IDENTIFICATION'}});
    check_lc_messages(@{$sections{'LC_MESSAGES'}});
    check_lc_numeric(@{$sections{'LC_NUMERIC'}});
    check_lc_paper(@{$sections{'LC_PAPER'}});
    check_lc_measurement(@{$sections{'LC_MEASUREMENT'}});
}
