# Selima Website Content Management System # GetLang.pm: The subroutine to match the user preferred languages with our available languages. # Copyright (c) 2003-2018 imacat. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Author: imacat # First written: 2003-03-23 package Selima::GetLang; use 5.008; use strict; use warnings; use base qw(Exporter); use vars qw(@EXPORT @EXPORT_OK); BEGIN { @EXPORT = qw(getlang getcharset); @EXPORT_OK = @EXPORT; # Prototype declaration sub getlang(;$); sub getcharset(); sub getlang_real(); sub getlang_filename(); sub getlang_env(); sub getlang_accept(); sub getlang_setenv($); sub getcharset_accept(); sub all_charsets(); } use CGI::Cookie qw(); use Selima::Cache qw(:getlang); use Selima::DataVars qw(:input :l10n :lninfo :output :requri); use Selima::LnInfo; # getlang: Get the appropriate language from the user-agent sub getlang(;$) { local ($_, %_); $_ = $_[0]; # Return the proper data type return defined $_? ln(getlang_real, $_): getlang_real; } # getcharset: Get the appropriate character set from the user-agent. sub getcharset() { local ($_, %_); my $default; # Obtained before return $GetLang_charset if defined $GetLang_charset; $default = getlang(LN_CHARSET); # We have no choice return ($GetLang_charset = $default) if all_charsets < 2; # Parse the character set by the Accept-Charset header return $GetLang_charset if defined($GetLang_charset = getcharset_accept); # Cannot parse -- return the default return ($GetLang_charset = $default); } # getlang_real: The real subroutine sub getlang_real() { local ($_, %_); # Obtained before return $GetLang_lang if defined $GetLang_lang; # Uni-lingual return ($GetLang_lang = $DEFAULT_LANG) if @ALL_LINGUAS == 1; # Check the file name for specified language # No setting environment in this case return $GetLang_lang if defined($GetLang_lang = getlang_filename); # Methods below should set the language in the environment # Check the environment for specified language if (defined($GetLang_lang = getlang_env)) { getlang_setenv $GetLang_lang; return $GetLang_lang; } # Parse the language by the Accept-Language header if (defined($GetLang_lang = getlang_accept)) { getlang_setenv $GetLang_lang; return $GetLang_lang; } # Cannot parse -- return the default $GetLang_lang = $DEFAULT_LANG; getlang_setenv $GetLang_lang; return $GetLang_lang; } # getlang_filename: Check the file name for specified language sub getlang_filename() { local ($_, %_); my $langfile; # Check the file name format return undef unless defined $REQUEST_PATH && $REQUEST_PATH =~ /\.([^\.\/]+)\.[^\.\/]+$/; $langfile = $1; # Check each language for its file name format @_ = grep $langfile = $_, map ln($_, LN_FILENAME), @ALL_LINGUAS; return $_[0] if @_ > 0; # Not found return undef; } # getlang_env: Check the environment for specified language sub getlang_env() { local ($_, %_); %_ = map { $_ => 1 } @ALL_LINGUAS; # Check the query string return $_ if defined $GET && defined($_ = $GET->param("lang")) && exists $_{$_}; # Check the POSTed form return $_ if defined $POST && defined($_ = $POST->param("lang")) && exists $_{$_}; # Check the cookies return $_ if exists $COOKIES{"lang"} && exists $_{$_ = $COOKIES{"lang"}->value}; # Not set return undef; } # getlang_accept: Parse the language by the Accept-Language header # Refer to HTTP/1.1 section 14.4 for this algorism sub getlang_accept() { local ($_, %_); my (@rngs, %rngqf, $defqf, $ln, @attrs, %tagqf, $tag, $match); # Accept-Language not set return undef if !exists $ENV{"HTTP_ACCEPT_LANGUAGE"}; # Split into language ranges $_ = $ENV{"HTTP_ACCEPT_LANGUAGE"}; s/^\s*(.*?)\s*$/$1/; @rngs = split /\s*,\s*/, $_; %rngqf = qw(); foreach my $range (@rngs) { # Split into attributes $range =~ s/^\s*(.*?)\s*$/$1/; @attrs = split /\s*;\s*/, $range; # First piece is the language range $ln = shift @attrs; # Lower-case it $ln = lc $ln; # Find the quality factor foreach my $attr (@attrs) { # A numeric quality factor found $rngqf{$ln} = $1+0 if $attr =~ /^q=([01](?:\.\d{1,3})?)$/; } # Default quality factor to 1 $rngqf{$ln} = 1 if !exists $rngqf{$ln}; } # The default quality factor if (exists $rngqf{"*"}) { $defqf = $rngqf{"*"}; delete $rngqf{"*"}; } else { $defqf = 0; } # Language tags (what we have) %tagqf = qw(); # Calculated quality factor foreach my $ln (@ALL_LINGUAS) { # Language tag, as specified in ISO $tag = ln $ln, LN_NAME; # Matched range of the quality factor undef $match if defined $match; # Language ranges (what the user sent to match us) foreach my $range (keys %rngqf) { # Exactly match or match a prefix if ($tag eq $range || $tag =~ /^\Q$range\E-/) { # Not matched yet if (!defined $match) { $tagqf{$ln} = $rngqf{$range}; # Quality Factor $match = $range; # Record the matched range # A longer match range } elsif (length $range > length $match) { $tagqf{$ln} = $rngqf{$range}; # Quality Factor $match = $range; # Record the matched range } } } # Not matched -- apply a default quality factor $tagqf{$ln} = $defqf if !exists $tagqf{$ln}; } # Drop unacceptable languages foreach my $ln (keys %tagqf) { delete $tagqf{$ln} unless $tagqf{$ln} > 0; } # Nothing acceptable return undef if scalar(keys %tagqf) == 0; # Sort by the quality factor @_ = sort { $tagqf{$b} <=> $tagqf{$a} || ($a eq $DEFAULT_LANG? -1: 0) || ($b eq $DEFAULT_LANG? 1: 0) } keys %tagqf; # A preferred match return $_[0]; } # getlang_setenv: Check the environment for specified language sub getlang_setenv($) { local ($_, %_); $_ = $_[0]; # Set the cookie to keep the result $NEWCOOKIES{"lang"} = new CGI::Cookie(-name=>"lang", -value=>$_) if !exists $COOKIES{"lang"} || $COOKIES{"lang"}->value ne $_; return; } # getcharset_accept: Parse the character set by the Accept-Charset header # Refer to HTTP/1.1 section 14.2 for this algorism sub getcharset_accept() { local ($_, %_); my (@rngs, %rngqf, $defqf, $cs, @attrs, %tagqf, $tag, $default); # Accept-Charset not set return undef if !exists $ENV{"HTTP_ACCEPT_CHARSET"}; $default = getlang(LN_CHARSET); # Split into character set ranges $_ = $ENV{"HTTP_ACCEPT_CHARSET"}; s/^\s*(.*?)\s*$/$1/; @rngs = split /\s*,\s*/, $_; %rngqf = qw(); foreach my $range (@rngs) { # Split into attributes $range =~ s/^\s*(.*?)\s*$/$1/; @attrs = split /\s*;\s*/, $range; # First piece is the character set range $cs = shift @attrs; # Lower-case it $cs = lc $cs; # Find the quality factor foreach my $attr (@attrs) { # A numeric quality factor found $rngqf{$cs} = $1+0 if $attr =~ /^q=([01](?:\.\d{1,3})?)$/; } # Default quality factor to 1 $rngqf{$cs} = 1 if !exists $rngqf{$cs}; } # The default quality factor if (exists $rngqf{"*"}) { $defqf = $rngqf{"*"}; delete $rngqf{"*"}; } else { # Default ISO-8859-1 to 1 $rngqf{"iso-8859-1"} = 1 if !exists $rngqf{"iso-8859-1"}; $defqf = 0; } # Character set tags (what we have) %tagqf = qw(); # Calculated quality factor foreach my $cs (all_charsets) { # Character set tag, as specified in ISO $tag = lc $cs; # Character set ranges (what the user sent to match us) foreach my $range (keys %rngqf) { # Matched $tagqf{$cs} = $rngqf{$range} # Quality Factor if $tag eq $range; } # Not matched -- apply a default quality factor $tagqf{$cs} = $defqf if !exists $tagqf{$cs}; } # Drop unacceptable character sets foreach my $cs (keys %tagqf) { delete $tagqf{$cs} unless $tagqf{$cs} > 0; } # Nothing acceptable return undef if scalar(keys %tagqf) == 0; # Sort by the quality factor @_ = sort { $tagqf{$b} <=> $tagqf{$a} || ($a eq $default? -1: 0) || ($b eq $default? 1: 0) } keys %tagqf; # A preferred match return $_[0]; } # all_charsets: Obtain all the available character sets # Available character sets are the default character set of this # language, and UTF-8 sub all_charsets() { local ($_, %_); return @GetLang_all_charsets if @GetLang_all_charsets > 0; %_ = qw(); $_ = getlang(LN_CHARSET); $_{$_} = 1; $_{"UTF-8"} = 1; @GetLang_all_charsets = keys %_; return @GetLang_all_charsets; } return 1;