304 lines
9.6 KiB
Perl
304 lines
9.6 KiB
Perl
# Selima Website Content Management System
|
|
# GetLang.pm: The subroutine to match the user preferred languages with our available languages.
|
|
|
|
# Copyright (c) 2003-2018 imacat.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Author: imacat <imacat@mail.imacat.idv.tw>
|
|
# First written: 2003-03-23
|
|
|
|
package Selima::GetLang;
|
|
use 5.008;
|
|
use strict;
|
|
use warnings;
|
|
use base qw(Exporter);
|
|
use vars qw(@EXPORT @EXPORT_OK);
|
|
BEGIN {
|
|
@EXPORT = qw(getlang getcharset);
|
|
@EXPORT_OK = @EXPORT;
|
|
# Prototype declaration
|
|
sub getlang(;$);
|
|
sub getcharset();
|
|
sub getlang_real();
|
|
sub getlang_filename();
|
|
sub getlang_env();
|
|
sub getlang_accept();
|
|
sub getlang_setenv($);
|
|
sub getcharset_accept();
|
|
sub all_charsets();
|
|
}
|
|
|
|
use CGI::Cookie qw();
|
|
|
|
use Selima::Cache qw(:getlang);
|
|
use Selima::DataVars qw(:input :l10n :lninfo :output :requri);
|
|
use Selima::LnInfo;
|
|
|
|
# getlang: Get the appropriate language from the user-agent
|
|
sub getlang(;$) {
|
|
local ($_, %_);
|
|
$_ = $_[0];
|
|
# Return the proper data type
|
|
return defined $_? ln(getlang_real, $_): getlang_real;
|
|
}
|
|
|
|
# getcharset: Get the appropriate character set from the user-agent.
|
|
sub getcharset() {
|
|
local ($_, %_);
|
|
my $default;
|
|
# Obtained before
|
|
return $GetLang_charset if defined $GetLang_charset;
|
|
$default = getlang(LN_CHARSET);
|
|
|
|
# We have no choice
|
|
return ($GetLang_charset = $default) if all_charsets < 2;
|
|
# Parse the character set by the Accept-Charset header
|
|
return $GetLang_charset if defined($GetLang_charset = getcharset_accept);
|
|
# Cannot parse -- return the default
|
|
return ($GetLang_charset = $default);
|
|
}
|
|
|
|
# getlang_real: The real subroutine
|
|
sub getlang_real() {
|
|
local ($_, %_);
|
|
# Obtained before
|
|
return $GetLang_lang if defined $GetLang_lang;
|
|
|
|
# Uni-lingual
|
|
return ($GetLang_lang = $DEFAULT_LANG) if @ALL_LINGUAS == 1;
|
|
# Check the file name for specified language
|
|
# No setting environment in this case
|
|
return $GetLang_lang if defined($GetLang_lang = getlang_filename);
|
|
# Methods below should set the language in the environment
|
|
# Check the environment for specified language
|
|
if (defined($GetLang_lang = getlang_env)) {
|
|
getlang_setenv $GetLang_lang;
|
|
return $GetLang_lang;
|
|
}
|
|
# Parse the language by the Accept-Language header
|
|
if (defined($GetLang_lang = getlang_accept)) {
|
|
getlang_setenv $GetLang_lang;
|
|
return $GetLang_lang;
|
|
}
|
|
# Cannot parse -- return the default
|
|
$GetLang_lang = $DEFAULT_LANG;
|
|
getlang_setenv $GetLang_lang;
|
|
return $GetLang_lang;
|
|
}
|
|
|
|
# getlang_filename: Check the file name for specified language
|
|
sub getlang_filename() {
|
|
local ($_, %_);
|
|
my $langfile;
|
|
# Check the file name format
|
|
return undef unless defined $REQUEST_PATH && $REQUEST_PATH =~ /\.([^\.\/]+)\.[^\.\/]+$/;
|
|
$langfile = $1;
|
|
# Check each language for its file name format
|
|
@_ = grep $langfile = $_, map ln($_, LN_FILENAME), @ALL_LINGUAS;
|
|
return $_[0] if @_ > 0;
|
|
# Not found
|
|
return undef;
|
|
}
|
|
|
|
# getlang_env: Check the environment for specified language
|
|
sub getlang_env() {
|
|
local ($_, %_);
|
|
%_ = map { $_ => 1 } @ALL_LINGUAS;
|
|
# Check the query string
|
|
return $_ if defined $GET
|
|
&& defined($_ = $GET->param("lang")) && exists $_{$_};
|
|
# Check the POSTed form
|
|
return $_ if defined $POST
|
|
&& defined($_ = $POST->param("lang")) && exists $_{$_};
|
|
# Check the cookies
|
|
return $_ if exists $COOKIES{"lang"}
|
|
&& exists $_{$_ = $COOKIES{"lang"}->value};
|
|
# Not set
|
|
return undef;
|
|
}
|
|
|
|
# getlang_accept: Parse the language by the Accept-Language header
|
|
# Refer to HTTP/1.1 section 14.4 for this algorism
|
|
sub getlang_accept() {
|
|
local ($_, %_);
|
|
my (@rngs, %rngqf, $defqf, $ln, @attrs, %tagqf, $tag, $match);
|
|
# Accept-Language not set
|
|
return undef if !exists $ENV{"HTTP_ACCEPT_LANGUAGE"};
|
|
|
|
# Split into language ranges
|
|
$_ = $ENV{"HTTP_ACCEPT_LANGUAGE"};
|
|
s/^\s*(.*?)\s*$/$1/;
|
|
@rngs = split /\s*,\s*/, $_;
|
|
%rngqf = qw();
|
|
foreach my $range (@rngs) {
|
|
# Split into attributes
|
|
$range =~ s/^\s*(.*?)\s*$/$1/;
|
|
@attrs = split /\s*;\s*/, $range;
|
|
# First piece is the language range
|
|
$ln = shift @attrs;
|
|
# Lower-case it
|
|
$ln = lc $ln;
|
|
# Find the quality factor
|
|
foreach my $attr (@attrs) {
|
|
# A numeric quality factor found
|
|
$rngqf{$ln} = $1+0 if $attr =~ /^q=([01](?:\.\d{1,3})?)$/;
|
|
}
|
|
# Default quality factor to 1
|
|
$rngqf{$ln} = 1 if !exists $rngqf{$ln};
|
|
}
|
|
# The default quality factor
|
|
if (exists $rngqf{"*"}) {
|
|
$defqf = $rngqf{"*"};
|
|
delete $rngqf{"*"};
|
|
} else {
|
|
$defqf = 0;
|
|
}
|
|
|
|
# Language tags (what we have)
|
|
%tagqf = qw(); # Calculated quality factor
|
|
foreach my $ln (@ALL_LINGUAS) {
|
|
# Language tag, as specified in ISO
|
|
$tag = ln $ln, LN_NAME;
|
|
# Matched range of the quality factor
|
|
undef $match if defined $match;
|
|
# Language ranges (what the user sent to match us)
|
|
foreach my $range (keys %rngqf) {
|
|
# Exactly match or match a prefix
|
|
if ($tag eq $range || $tag =~ /^\Q$range\E-/) {
|
|
# Not matched yet
|
|
if (!defined $match) {
|
|
$tagqf{$ln} = $rngqf{$range}; # Quality Factor
|
|
$match = $range; # Record the matched range
|
|
# A longer match range
|
|
} elsif (length $range > length $match) {
|
|
$tagqf{$ln} = $rngqf{$range}; # Quality Factor
|
|
$match = $range; # Record the matched range
|
|
}
|
|
}
|
|
}
|
|
# Not matched -- apply a default quality factor
|
|
$tagqf{$ln} = $defqf if !exists $tagqf{$ln};
|
|
}
|
|
|
|
# Drop unacceptable languages
|
|
foreach my $ln (keys %tagqf) {
|
|
delete $tagqf{$ln} unless $tagqf{$ln} > 0;
|
|
}
|
|
# Nothing acceptable
|
|
return undef if scalar(keys %tagqf) == 0;
|
|
|
|
# Sort by the quality factor
|
|
@_ = sort { $tagqf{$b} <=> $tagqf{$a}
|
|
|| ($a eq $DEFAULT_LANG? -1: 0)
|
|
|| ($b eq $DEFAULT_LANG? 1: 0) } keys %tagqf;
|
|
# A preferred match
|
|
return $_[0];
|
|
}
|
|
|
|
# getlang_setenv: Check the environment for specified language
|
|
sub getlang_setenv($) {
|
|
local ($_, %_);
|
|
$_ = $_[0];
|
|
# Set the cookie to keep the result
|
|
$NEWCOOKIES{"lang"} = new CGI::Cookie(-name=>"lang", -value=>$_)
|
|
if !exists $COOKIES{"lang"} || $COOKIES{"lang"}->value ne $_;
|
|
return;
|
|
}
|
|
|
|
# getcharset_accept: Parse the character set by the Accept-Charset header
|
|
# Refer to HTTP/1.1 section 14.2 for this algorism
|
|
sub getcharset_accept() {
|
|
local ($_, %_);
|
|
my (@rngs, %rngqf, $defqf, $cs, @attrs, %tagqf, $tag, $default);
|
|
# Accept-Charset not set
|
|
return undef if !exists $ENV{"HTTP_ACCEPT_CHARSET"};
|
|
$default = getlang(LN_CHARSET);
|
|
|
|
# Split into character set ranges
|
|
$_ = $ENV{"HTTP_ACCEPT_CHARSET"};
|
|
s/^\s*(.*?)\s*$/$1/;
|
|
@rngs = split /\s*,\s*/, $_;
|
|
%rngqf = qw();
|
|
foreach my $range (@rngs) {
|
|
# Split into attributes
|
|
$range =~ s/^\s*(.*?)\s*$/$1/;
|
|
@attrs = split /\s*;\s*/, $range;
|
|
# First piece is the character set range
|
|
$cs = shift @attrs;
|
|
# Lower-case it
|
|
$cs = lc $cs;
|
|
# Find the quality factor
|
|
foreach my $attr (@attrs) {
|
|
# A numeric quality factor found
|
|
$rngqf{$cs} = $1+0 if $attr =~ /^q=([01](?:\.\d{1,3})?)$/;
|
|
}
|
|
# Default quality factor to 1
|
|
$rngqf{$cs} = 1 if !exists $rngqf{$cs};
|
|
}
|
|
# The default quality factor
|
|
if (exists $rngqf{"*"}) {
|
|
$defqf = $rngqf{"*"};
|
|
delete $rngqf{"*"};
|
|
} else {
|
|
# Default ISO-8859-1 to 1
|
|
$rngqf{"iso-8859-1"} = 1 if !exists $rngqf{"iso-8859-1"};
|
|
$defqf = 0;
|
|
}
|
|
|
|
# Character set tags (what we have)
|
|
%tagqf = qw(); # Calculated quality factor
|
|
foreach my $cs (all_charsets) {
|
|
# Character set tag, as specified in ISO
|
|
$tag = lc $cs;
|
|
# Character set ranges (what the user sent to match us)
|
|
foreach my $range (keys %rngqf) {
|
|
# Matched
|
|
$tagqf{$cs} = $rngqf{$range} # Quality Factor
|
|
if $tag eq $range;
|
|
}
|
|
# Not matched -- apply a default quality factor
|
|
$tagqf{$cs} = $defqf if !exists $tagqf{$cs};
|
|
}
|
|
|
|
# Drop unacceptable character sets
|
|
foreach my $cs (keys %tagqf) {
|
|
delete $tagqf{$cs} unless $tagqf{$cs} > 0;
|
|
}
|
|
# Nothing acceptable
|
|
return undef if scalar(keys %tagqf) == 0;
|
|
|
|
# Sort by the quality factor
|
|
@_ = sort { $tagqf{$b} <=> $tagqf{$a}
|
|
|| ($a eq $default? -1: 0)
|
|
|| ($b eq $default? 1: 0) } keys %tagqf;
|
|
# A preferred match
|
|
return $_[0];
|
|
}
|
|
|
|
# all_charsets: Obtain all the available character sets
|
|
# Available character sets are the default character set of this
|
|
# language, and UTF-8
|
|
sub all_charsets() {
|
|
local ($_, %_);
|
|
return @GetLang_all_charsets if @GetLang_all_charsets > 0;
|
|
%_ = qw();
|
|
$_ = getlang(LN_CHARSET);
|
|
$_{$_} = 1;
|
|
$_{"UTF-8"} = 1;
|
|
@GetLang_all_charsets = keys %_;
|
|
return @GetLang_all_charsets;
|
|
}
|
|
|
|
return 1;
|