444 lines
14 KiB
Perl
444 lines
14 KiB
Perl
# Selima Website Content Management System
|
|
# DecForm.pm: The subroutines to decode the the input form from various character sets.
|
|
|
|
# Copyright (c) 2004-2018 imacat.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Author: imacat <imacat@mail.imacat.idv.tw>
|
|
# First written: 2004-09-10
|
|
|
|
package Selima::DecForm;
|
|
use 5.008;
|
|
use strict;
|
|
use warnings;
|
|
use base qw(Exporter);
|
|
use vars qw(@EXPORT @EXPORT_OK);
|
|
BEGIN {
|
|
@EXPORT = qw(init_forms decode_forms decode_forms_delay);
|
|
@EXPORT_OK = @EXPORT;
|
|
# Prototype declaration
|
|
sub init_forms();
|
|
sub decode_forms();
|
|
sub decode_forms_delay();
|
|
sub try_decode_form($$);
|
|
sub split_upload($);
|
|
}
|
|
|
|
use Encode qw(decode FB_CROAK);
|
|
use URI::Escape qw(uri_escape);
|
|
use HTML::Entities qw(%entity2char);
|
|
|
|
use Selima::DataVars qw(:env :input :l10n :lninfo);
|
|
use Selima::GetLang;
|
|
use Selima::HTTP;
|
|
use Selima::LnInfo;
|
|
use Selima::Unicode;
|
|
|
|
use vars qw(@TRY);
|
|
@TRY = qw(US-ASCII Big5 GB2312 Shift-JIS UTF-8 ISO-8859-1 ISO-8859-15);
|
|
|
|
# init_forms: Initialize the user-input form data
|
|
sub init_forms() {
|
|
local ($_, %_);
|
|
my ($r, $rawget, $rawpost, $postdelay);
|
|
|
|
# Do not redo
|
|
return if scalar(keys %USER_INPUT) > 0;
|
|
|
|
$r = $IS_MP2? Apache2::RequestUtil->request: Apache->request
|
|
if $IS_MODPERL;
|
|
|
|
# Scan the GET query
|
|
$rawget = $IS_MODPERL? (defined $r->args? $r->args: ""):
|
|
(exists $ENV{"QUERY_STRING"}? $ENV{"QUERY_STRING"}: "");
|
|
$GET = new CGI($rawget);
|
|
|
|
# Scan the POST form
|
|
$postdelay = 0;
|
|
if (($IS_MODPERL? $r->method: $ENV{"REQUEST_METHOD"}) eq "POST") {
|
|
if ($IS_MODPERL) {
|
|
# Ordinary POST form
|
|
if ( !defined($_ = $r->headers_in->get("Content-Type"))
|
|
|| $_ eq "application/x-www-form-urlencoded") {
|
|
if ($IS_MP2) {
|
|
$r->read($rawpost, $r->headers_in->get("Content-Length"));
|
|
} else {
|
|
$rawpost = $r->content;
|
|
}
|
|
$POST = new CGI($rawpost);
|
|
$r->headers_in->set("X-Selima-POST", $rawpost);
|
|
# Others (file uploads, etc) and CGI environment is ready
|
|
# - parse with CGI.pm default parser
|
|
} elsif (exists $ENV{"SERVER_SOFTWARE"}) {
|
|
$POST = new CGI;
|
|
@_ = qw();
|
|
foreach my $name ($POST->param) {
|
|
foreach my $val ($POST->param($name)) {
|
|
push @_, uri_escape($name) . "=" . uri_escape($val);
|
|
}
|
|
}
|
|
$rawpost = join "&", @_;
|
|
# Others (file uploads, etc) and CGI environment is not ready yet
|
|
# - delay the parsing
|
|
} else {
|
|
$rawpost = "";
|
|
$POST = new CGI($rawpost);
|
|
$postdelay = 1;
|
|
}
|
|
} else {
|
|
# POST form was obtained by mod_perl handler before
|
|
if (exists $ENV{"HTTP_X_SELIMA_POST"}) {
|
|
$rawpost = $ENV{"HTTP_X_SELIMA_POST"};
|
|
$POST = new CGI($rawpost);
|
|
# Ordinary POST form -- parse the STDIN
|
|
} elsif ( !exists $ENV{"HTTP_CONTENT_TYPE"}
|
|
|| $ENV{"HTTP_CONTENT_TYPE"} eq "application/x-www-form-urlencoded") {
|
|
$rawpost = <STDIN>;
|
|
$POST = new CGI($rawpost);
|
|
# Others (file uploads, etc) - parse with CGI.pm default parser
|
|
} else {
|
|
$POST = new CGI;
|
|
@_ = qw();
|
|
foreach my $name ($POST->param) {
|
|
foreach my $val ($POST->param($name)) {
|
|
push @_, uri_escape($name) . "=" . uri_escape($val);
|
|
}
|
|
}
|
|
$rawpost = join "&", @_;
|
|
}
|
|
}
|
|
} else {
|
|
$rawpost = "";
|
|
$POST = new CGI($rawpost);
|
|
}
|
|
|
|
# Split the uploaded files
|
|
($POST, $UPLOAD) = split_upload $POST;
|
|
|
|
# Initialize the data deposit
|
|
%USER_INPUT = (
|
|
"GET_RAW" => $GET,
|
|
"GET_UTF8" => $GET,
|
|
"GET_CHARSET" => undef,
|
|
"GET_CSERR" => 1,
|
|
"GET_RAWDATA" => $rawget,
|
|
"GET_KEYS" => [ $GET->param ],
|
|
"POST_RAW" => $POST,
|
|
"POST_UTF8" => $POST,
|
|
"POST_CHARSET" => undef,
|
|
"POST_CSERR" => 1,
|
|
"POST_RAWDATA" => $rawpost,
|
|
"POST_DELAY" => $postdelay,
|
|
"UPLOAD" => $UPLOAD,
|
|
);
|
|
|
|
return;
|
|
}
|
|
|
|
# decode_forms: Decode user FORM input from some character set
|
|
sub decode_forms() {
|
|
local ($_, %_);
|
|
my (@charsets, @charsets_site);
|
|
|
|
# The possible character sets of this website
|
|
@charsets_site = map ln($_, LN_CHARSET), @ALL_LINGUAS;
|
|
|
|
# The GET arguments
|
|
# The character set candidates
|
|
@_ = qw();
|
|
push @_, $GET->param("charset") if defined $GET->param("charset");
|
|
push @_, getlang LN_CHARSET;
|
|
push @_, @charsets_site;
|
|
push @_, @TRY;
|
|
@charsets = qw();
|
|
%_ = qw();
|
|
foreach (@_) {
|
|
if (lc $_ eq "big5") {
|
|
if (!exists $_{"Big5-ETen"}) {
|
|
push @charsets, "Big5-ETen";
|
|
$_{"Big5-ETen"} = 1;
|
|
}
|
|
if (!exists $_{"Big5"}) {
|
|
push @charsets, "Big5";
|
|
$_{"Big5"} = 1;
|
|
}
|
|
if (!exists $_{"CP950"}) {
|
|
push @charsets, "CP950";
|
|
$_{"CP950"} = 1;
|
|
}
|
|
if (!exists $_{"Big5-HKSCS"}) {
|
|
push @charsets, "Big5-HKSCS";
|
|
$_{"Big5-HKSCS"} = 1;
|
|
}
|
|
} elsif (lc $_ eq "big5-hkscs") {
|
|
if (!exists $_{"Big5-HKSCS"}) {
|
|
push @charsets, "Big5-HKSCS";
|
|
$_{"Big5-HKSCS"} = 1;
|
|
}
|
|
} elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") {
|
|
if (!exists $_{"GB2312"}) {
|
|
push @charsets, "GB2312";
|
|
$_{"GB2312"} = 1;
|
|
}
|
|
if (!exists $_{"GB18030"}) {
|
|
push @charsets, "GB18030";
|
|
$_{"GB18030"} = 1;
|
|
}
|
|
} else {
|
|
if (!exists $_{$_}) {
|
|
push @charsets, $_;
|
|
$_{$_} = 1;
|
|
}
|
|
}
|
|
}
|
|
# Check each character set
|
|
foreach my $charset (@charsets) {
|
|
$_ = $USER_INPUT{"GET_RAW"};
|
|
# In this character set
|
|
if (defined($_ = try_decode_form $_, $charset)) {
|
|
$GET = $_;
|
|
$USER_INPUT{"GET_UTF8"} = $_;
|
|
$USER_INPUT{"GET_CHARSET"} = $charset;
|
|
$USER_INPUT{"GET_CSERR"} = 0;
|
|
$USER_INPUT{"GET_KEYS"} = [ $_->param ];
|
|
last;
|
|
}
|
|
}
|
|
|
|
# The POSTed form
|
|
# The character set candidates
|
|
@_ = qw();
|
|
push @_, $POST->param("charset") if defined $POST->param("charset");
|
|
push @_, getlang LN_CHARSET;
|
|
push @_, @charsets_site;
|
|
push @_, @TRY;
|
|
@charsets = qw();
|
|
%_ = qw();
|
|
foreach (@_) {
|
|
if (lc $_ eq "big5") {
|
|
if (!exists $_{"Big5-ETen"}) {
|
|
push @charsets, "Big5-ETen";
|
|
$_{"Big5-ETen"} = 1;
|
|
}
|
|
if (!exists $_{"Big5"}) {
|
|
push @charsets, "Big5";
|
|
$_{"Big5"} = 1;
|
|
}
|
|
if (!exists $_{"CP950"}) {
|
|
push @charsets, "CP950";
|
|
$_{"CP950"} = 1;
|
|
}
|
|
if (!exists $_{"Big5-HKSCS"}) {
|
|
push @charsets, "Big5-HKSCS";
|
|
$_{"Big5-HKSCS"} = 1;
|
|
}
|
|
} elsif (lc $_ eq "big5-hkscs") {
|
|
if (!exists $_{"Big5-HKSCS"}) {
|
|
push @charsets, "Big5-HKSCS";
|
|
$_{"Big5-HKSCS"} = 1;
|
|
}
|
|
} elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") {
|
|
if (!exists $_{"GB2312"}) {
|
|
push @charsets, "GB2312";
|
|
$_{"GB2312"} = 1;
|
|
}
|
|
if (!exists $_{"GB18030"}) {
|
|
push @charsets, "GB18030";
|
|
$_{"GB18030"} = 1;
|
|
}
|
|
} else {
|
|
if (!exists $_{$_}) {
|
|
push @charsets, $_;
|
|
$_{$_} = 1;
|
|
}
|
|
}
|
|
}
|
|
# Check each character set
|
|
foreach my $charset (@charsets) {
|
|
$_ = $USER_INPUT{"POST_RAW"};
|
|
# In this character set
|
|
if (defined($_ = try_decode_form $_, $charset)) {
|
|
$POST = $_;
|
|
$USER_INPUT{"POST_UTF8"} = $_;
|
|
$USER_INPUT{"POST_CHARSET"} = $charset;
|
|
$USER_INPUT{"POST_CSERR"} = 0;
|
|
last;
|
|
}
|
|
}
|
|
|
|
# No valid character set was found
|
|
http_500 "Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter."
|
|
if $USER_INPUT{"GET_CSERR"} || $USER_INPUT{"POST_CSERR"};
|
|
|
|
return;
|
|
}
|
|
|
|
# decode_forms_delay: Initialize and decode multipart/form-data (uploads)
|
|
# for mod_perl. We cannot obtain it before the CGI environment is ready.
|
|
sub decode_forms_delay() {
|
|
local ($_, %_);
|
|
my ($r, $rawpost, @charsets, @charsets_site);
|
|
# Parsing not delayed
|
|
return unless $USER_INPUT{"POST_DELAY"};
|
|
|
|
# Obtain the POST form
|
|
$POST = new CGI;
|
|
@_ = qw();
|
|
foreach my $name ($POST->param) {
|
|
foreach my $val ($POST->param($name)) {
|
|
push @_, uri_escape($name) . "=" . uri_escape($val);
|
|
}
|
|
}
|
|
$rawpost = join "&", @_;
|
|
|
|
# Split the uploaded files
|
|
($POST, $UPLOAD) = split_upload $POST;
|
|
|
|
# Record it
|
|
$USER_INPUT{"POST_RAW"} = $POST;
|
|
$USER_INPUT{"POST_UTF8"} = $POST;
|
|
$USER_INPUT{"POST_CHARSET"} = undef;
|
|
$USER_INPUT{"POST_CSERR"} = 1;
|
|
$USER_INPUT{"POST_RAWDATA"} = $rawpost;
|
|
$USER_INPUT{"UPLOAD"} = $UPLOAD;
|
|
|
|
# The character set candidates
|
|
@_ = qw();
|
|
push @_, $POST->param("charset") if defined $POST->param("charset");
|
|
push @_, getlang LN_CHARSET;
|
|
push @_, map ln($_, LN_CHARSET), @ALL_LINGUAS;
|
|
push @_, @TRY;
|
|
@charsets = qw();
|
|
%_ = qw();
|
|
foreach (@_) {
|
|
if (lc $_ eq "big5") {
|
|
if (!exists $_{"Big5-ETen"}) {
|
|
push @charsets, "Big5-ETen";
|
|
$_{"Big5-ETen"} = 1;
|
|
}
|
|
if (!exists $_{"Big5"}) {
|
|
push @charsets, "Big5";
|
|
$_{"Big5"} = 1;
|
|
}
|
|
if (!exists $_{"CP950"}) {
|
|
push @charsets, "CP950";
|
|
$_{"CP950"} = 1;
|
|
}
|
|
if (!exists $_{"Big5-HKSCS"}) {
|
|
push @charsets, "Big5-HKSCS";
|
|
$_{"Big5-HKSCS"} = 1;
|
|
}
|
|
} elsif (lc $_ eq "big5-hkscs") {
|
|
if (!exists $_{"Big5-HKSCS"}) {
|
|
push @charsets, "Big5-HKSCS";
|
|
$_{"Big5-HKSCS"} = 1;
|
|
}
|
|
} elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") {
|
|
if (!exists $_{"GB2312"}) {
|
|
push @charsets, "GB2312";
|
|
$_{"GB2312"} = 1;
|
|
}
|
|
if (!exists $_{"GB18030"}) {
|
|
push @charsets, "GB18030";
|
|
$_{"GB18030"} = 1;
|
|
}
|
|
} else {
|
|
if (!exists $_{$_}) {
|
|
push @charsets, $_;
|
|
$_{$_} = 1;
|
|
}
|
|
}
|
|
}
|
|
# Check each character set
|
|
foreach my $charset (@charsets) {
|
|
$_ = $USER_INPUT{"POST_RAW"};
|
|
# In this character set
|
|
if (defined($_ = try_decode_form $_, $charset)) {
|
|
$POST = $_;
|
|
$USER_INPUT{"POST_UTF8"} = $_;
|
|
$USER_INPUT{"POST_CHARSET"} = $charset;
|
|
$USER_INPUT{"POST_CSERR"} = 0;
|
|
last;
|
|
}
|
|
}
|
|
|
|
# No valid character set was found
|
|
http_500 "Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter."
|
|
if $USER_INPUT{"GET_CSERR"} || $USER_INPUT{"POST_CSERR"};
|
|
|
|
return;
|
|
}
|
|
|
|
# try_decode_form: Try to decode a CGI form with a specific character set
|
|
sub try_decode_form($$) {
|
|
local ($_, %_);
|
|
my ($SOURCE, $charset, $RESULT, $name, $is_upload);
|
|
($SOURCE, $charset) = @_;
|
|
|
|
# Obtain a copy of the converted result
|
|
$RESULT = new CGI("");
|
|
$is_upload = 0;
|
|
foreach $name ($SOURCE->param) {
|
|
@_ = $SOURCE->param($name);
|
|
return if !defined($name = hcref_decode($charset, $name));
|
|
foreach (@_) {
|
|
s/\x00//g;
|
|
return if !defined($_ = hcref_decode($charset, $_));
|
|
}
|
|
$RESULT->param($name, @_);
|
|
}
|
|
|
|
return $RESULT;
|
|
}
|
|
|
|
# split_upload: Split the uploaded files from the POST form
|
|
sub split_upload($) {
|
|
local ($_, %_);
|
|
my ($FORM, $UPLOAD, $name);
|
|
$FORM = $_[0];
|
|
|
|
$UPLOAD = $FORM;
|
|
$FORM = new CGI("");
|
|
foreach $name ($UPLOAD->param) {
|
|
my (@valp, @valu);
|
|
@valp = qw();
|
|
@valu = qw();
|
|
foreach ($UPLOAD->param($name)) {
|
|
# A scalar value
|
|
if (ref $_ eq "") {
|
|
push @valp, $_;
|
|
# Others (file uploads Fh, etc.)
|
|
} else {
|
|
push @valu, $_;
|
|
}
|
|
}
|
|
# Some scalar found
|
|
if (@valp > 0) {
|
|
# Move to the POST form
|
|
$FORM->param($name, @valp);
|
|
# There are some uploaded files for this column
|
|
if (@valu > 0) {
|
|
$UPLOAD->param($name, @valu);
|
|
# No uploaded file for this column
|
|
} else {
|
|
$UPLOAD->delete($name);
|
|
}
|
|
}
|
|
}
|
|
|
|
return ($FORM, $UPLOAD);
|
|
}
|
|
|
|
return 1;
|