Files
selima-perl/lib/perl5/Selima/DecForm.pm
2026-03-10 21:31:43 +08:00

444 lines
14 KiB
Perl

# Selima Website Content Management System
# DecForm.pm: The subroutines to decode the the input form from various character sets.
# Copyright (c) 2004-2018 imacat.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Author: imacat <imacat@mail.imacat.idv.tw>
# First written: 2004-09-10
package Selima::DecForm;
use 5.008;
use strict;
use warnings;
use base qw(Exporter);
use vars qw(@EXPORT @EXPORT_OK);
BEGIN {
@EXPORT = qw(init_forms decode_forms decode_forms_delay);
@EXPORT_OK = @EXPORT;
# Prototype declaration
sub init_forms();
sub decode_forms();
sub decode_forms_delay();
sub try_decode_form($$);
sub split_upload($);
}
use Encode qw(decode FB_CROAK);
use URI::Escape qw(uri_escape);
use HTML::Entities qw(%entity2char);
use Selima::DataVars qw(:env :input :l10n :lninfo);
use Selima::GetLang;
use Selima::HTTP;
use Selima::LnInfo;
use Selima::Unicode;
use vars qw(@TRY);
@TRY = qw(US-ASCII Big5 GB2312 Shift-JIS UTF-8 ISO-8859-1 ISO-8859-15);
# init_forms: Initialize the user-input form data
sub init_forms() {
local ($_, %_);
my ($r, $rawget, $rawpost, $postdelay);
# Do not redo
return if scalar(keys %USER_INPUT) > 0;
$r = $IS_MP2? Apache2::RequestUtil->request: Apache->request
if $IS_MODPERL;
# Scan the GET query
$rawget = $IS_MODPERL? (defined $r->args? $r->args: ""):
(exists $ENV{"QUERY_STRING"}? $ENV{"QUERY_STRING"}: "");
$GET = new CGI($rawget);
# Scan the POST form
$postdelay = 0;
if (($IS_MODPERL? $r->method: $ENV{"REQUEST_METHOD"}) eq "POST") {
if ($IS_MODPERL) {
# Ordinary POST form
if ( !defined($_ = $r->headers_in->get("Content-Type"))
|| $_ eq "application/x-www-form-urlencoded") {
if ($IS_MP2) {
$r->read($rawpost, $r->headers_in->get("Content-Length"));
} else {
$rawpost = $r->content;
}
$POST = new CGI($rawpost);
$r->headers_in->set("X-Selima-POST", $rawpost);
# Others (file uploads, etc) and CGI environment is ready
# - parse with CGI.pm default parser
} elsif (exists $ENV{"SERVER_SOFTWARE"}) {
$POST = new CGI;
@_ = qw();
foreach my $name ($POST->param) {
foreach my $val ($POST->param($name)) {
push @_, uri_escape($name) . "=" . uri_escape($val);
}
}
$rawpost = join "&", @_;
# Others (file uploads, etc) and CGI environment is not ready yet
# - delay the parsing
} else {
$rawpost = "";
$POST = new CGI($rawpost);
$postdelay = 1;
}
} else {
# POST form was obtained by mod_perl handler before
if (exists $ENV{"HTTP_X_SELIMA_POST"}) {
$rawpost = $ENV{"HTTP_X_SELIMA_POST"};
$POST = new CGI($rawpost);
# Ordinary POST form -- parse the STDIN
} elsif ( !exists $ENV{"HTTP_CONTENT_TYPE"}
|| $ENV{"HTTP_CONTENT_TYPE"} eq "application/x-www-form-urlencoded") {
$rawpost = <STDIN>;
$POST = new CGI($rawpost);
# Others (file uploads, etc) - parse with CGI.pm default parser
} else {
$POST = new CGI;
@_ = qw();
foreach my $name ($POST->param) {
foreach my $val ($POST->param($name)) {
push @_, uri_escape($name) . "=" . uri_escape($val);
}
}
$rawpost = join "&", @_;
}
}
} else {
$rawpost = "";
$POST = new CGI($rawpost);
}
# Split the uploaded files
($POST, $UPLOAD) = split_upload $POST;
# Initialize the data deposit
%USER_INPUT = (
"GET_RAW" => $GET,
"GET_UTF8" => $GET,
"GET_CHARSET" => undef,
"GET_CSERR" => 1,
"GET_RAWDATA" => $rawget,
"GET_KEYS" => [ $GET->param ],
"POST_RAW" => $POST,
"POST_UTF8" => $POST,
"POST_CHARSET" => undef,
"POST_CSERR" => 1,
"POST_RAWDATA" => $rawpost,
"POST_DELAY" => $postdelay,
"UPLOAD" => $UPLOAD,
);
return;
}
# decode_forms: Decode user FORM input from some character set
sub decode_forms() {
local ($_, %_);
my (@charsets, @charsets_site);
# The possible character sets of this website
@charsets_site = map ln($_, LN_CHARSET), @ALL_LINGUAS;
# The GET arguments
# The character set candidates
@_ = qw();
push @_, $GET->param("charset") if defined $GET->param("charset");
push @_, getlang LN_CHARSET;
push @_, @charsets_site;
push @_, @TRY;
@charsets = qw();
%_ = qw();
foreach (@_) {
if (lc $_ eq "big5") {
if (!exists $_{"Big5-ETen"}) {
push @charsets, "Big5-ETen";
$_{"Big5-ETen"} = 1;
}
if (!exists $_{"Big5"}) {
push @charsets, "Big5";
$_{"Big5"} = 1;
}
if (!exists $_{"CP950"}) {
push @charsets, "CP950";
$_{"CP950"} = 1;
}
if (!exists $_{"Big5-HKSCS"}) {
push @charsets, "Big5-HKSCS";
$_{"Big5-HKSCS"} = 1;
}
} elsif (lc $_ eq "big5-hkscs") {
if (!exists $_{"Big5-HKSCS"}) {
push @charsets, "Big5-HKSCS";
$_{"Big5-HKSCS"} = 1;
}
} elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") {
if (!exists $_{"GB2312"}) {
push @charsets, "GB2312";
$_{"GB2312"} = 1;
}
if (!exists $_{"GB18030"}) {
push @charsets, "GB18030";
$_{"GB18030"} = 1;
}
} else {
if (!exists $_{$_}) {
push @charsets, $_;
$_{$_} = 1;
}
}
}
# Check each character set
foreach my $charset (@charsets) {
$_ = $USER_INPUT{"GET_RAW"};
# In this character set
if (defined($_ = try_decode_form $_, $charset)) {
$GET = $_;
$USER_INPUT{"GET_UTF8"} = $_;
$USER_INPUT{"GET_CHARSET"} = $charset;
$USER_INPUT{"GET_CSERR"} = 0;
$USER_INPUT{"GET_KEYS"} = [ $_->param ];
last;
}
}
# The POSTed form
# The character set candidates
@_ = qw();
push @_, $POST->param("charset") if defined $POST->param("charset");
push @_, getlang LN_CHARSET;
push @_, @charsets_site;
push @_, @TRY;
@charsets = qw();
%_ = qw();
foreach (@_) {
if (lc $_ eq "big5") {
if (!exists $_{"Big5-ETen"}) {
push @charsets, "Big5-ETen";
$_{"Big5-ETen"} = 1;
}
if (!exists $_{"Big5"}) {
push @charsets, "Big5";
$_{"Big5"} = 1;
}
if (!exists $_{"CP950"}) {
push @charsets, "CP950";
$_{"CP950"} = 1;
}
if (!exists $_{"Big5-HKSCS"}) {
push @charsets, "Big5-HKSCS";
$_{"Big5-HKSCS"} = 1;
}
} elsif (lc $_ eq "big5-hkscs") {
if (!exists $_{"Big5-HKSCS"}) {
push @charsets, "Big5-HKSCS";
$_{"Big5-HKSCS"} = 1;
}
} elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") {
if (!exists $_{"GB2312"}) {
push @charsets, "GB2312";
$_{"GB2312"} = 1;
}
if (!exists $_{"GB18030"}) {
push @charsets, "GB18030";
$_{"GB18030"} = 1;
}
} else {
if (!exists $_{$_}) {
push @charsets, $_;
$_{$_} = 1;
}
}
}
# Check each character set
foreach my $charset (@charsets) {
$_ = $USER_INPUT{"POST_RAW"};
# In this character set
if (defined($_ = try_decode_form $_, $charset)) {
$POST = $_;
$USER_INPUT{"POST_UTF8"} = $_;
$USER_INPUT{"POST_CHARSET"} = $charset;
$USER_INPUT{"POST_CSERR"} = 0;
last;
}
}
# No valid character set was found
http_500 "Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter."
if $USER_INPUT{"GET_CSERR"} || $USER_INPUT{"POST_CSERR"};
return;
}
# decode_forms_delay: Initialize and decode multipart/form-data (uploads)
# for mod_perl. We cannot obtain it before the CGI environment is ready.
sub decode_forms_delay() {
local ($_, %_);
my ($r, $rawpost, @charsets, @charsets_site);
# Parsing not delayed
return unless $USER_INPUT{"POST_DELAY"};
# Obtain the POST form
$POST = new CGI;
@_ = qw();
foreach my $name ($POST->param) {
foreach my $val ($POST->param($name)) {
push @_, uri_escape($name) . "=" . uri_escape($val);
}
}
$rawpost = join "&", @_;
# Split the uploaded files
($POST, $UPLOAD) = split_upload $POST;
# Record it
$USER_INPUT{"POST_RAW"} = $POST;
$USER_INPUT{"POST_UTF8"} = $POST;
$USER_INPUT{"POST_CHARSET"} = undef;
$USER_INPUT{"POST_CSERR"} = 1;
$USER_INPUT{"POST_RAWDATA"} = $rawpost;
$USER_INPUT{"UPLOAD"} = $UPLOAD;
# The character set candidates
@_ = qw();
push @_, $POST->param("charset") if defined $POST->param("charset");
push @_, getlang LN_CHARSET;
push @_, map ln($_, LN_CHARSET), @ALL_LINGUAS;
push @_, @TRY;
@charsets = qw();
%_ = qw();
foreach (@_) {
if (lc $_ eq "big5") {
if (!exists $_{"Big5-ETen"}) {
push @charsets, "Big5-ETen";
$_{"Big5-ETen"} = 1;
}
if (!exists $_{"Big5"}) {
push @charsets, "Big5";
$_{"Big5"} = 1;
}
if (!exists $_{"CP950"}) {
push @charsets, "CP950";
$_{"CP950"} = 1;
}
if (!exists $_{"Big5-HKSCS"}) {
push @charsets, "Big5-HKSCS";
$_{"Big5-HKSCS"} = 1;
}
} elsif (lc $_ eq "big5-hkscs") {
if (!exists $_{"Big5-HKSCS"}) {
push @charsets, "Big5-HKSCS";
$_{"Big5-HKSCS"} = 1;
}
} elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") {
if (!exists $_{"GB2312"}) {
push @charsets, "GB2312";
$_{"GB2312"} = 1;
}
if (!exists $_{"GB18030"}) {
push @charsets, "GB18030";
$_{"GB18030"} = 1;
}
} else {
if (!exists $_{$_}) {
push @charsets, $_;
$_{$_} = 1;
}
}
}
# Check each character set
foreach my $charset (@charsets) {
$_ = $USER_INPUT{"POST_RAW"};
# In this character set
if (defined($_ = try_decode_form $_, $charset)) {
$POST = $_;
$USER_INPUT{"POST_UTF8"} = $_;
$USER_INPUT{"POST_CHARSET"} = $charset;
$USER_INPUT{"POST_CSERR"} = 0;
last;
}
}
# No valid character set was found
http_500 "Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter."
if $USER_INPUT{"GET_CSERR"} || $USER_INPUT{"POST_CSERR"};
return;
}
# try_decode_form: Try to decode a CGI form with a specific character set
sub try_decode_form($$) {
local ($_, %_);
my ($SOURCE, $charset, $RESULT, $name, $is_upload);
($SOURCE, $charset) = @_;
# Obtain a copy of the converted result
$RESULT = new CGI("");
$is_upload = 0;
foreach $name ($SOURCE->param) {
@_ = $SOURCE->param($name);
return if !defined($name = hcref_decode($charset, $name));
foreach (@_) {
s/\x00//g;
return if !defined($_ = hcref_decode($charset, $_));
}
$RESULT->param($name, @_);
}
return $RESULT;
}
# split_upload: Split the uploaded files from the POST form
sub split_upload($) {
local ($_, %_);
my ($FORM, $UPLOAD, $name);
$FORM = $_[0];
$UPLOAD = $FORM;
$FORM = new CGI("");
foreach $name ($UPLOAD->param) {
my (@valp, @valu);
@valp = qw();
@valu = qw();
foreach ($UPLOAD->param($name)) {
# A scalar value
if (ref $_ eq "") {
push @valp, $_;
# Others (file uploads Fh, etc.)
} else {
push @valu, $_;
}
}
# Some scalar found
if (@valp > 0) {
# Move to the POST form
$FORM->param($name, @valp);
# There are some uploaded files for this column
if (@valu > 0) {
$UPLOAD->param($name, @valu);
# No uploaded file for this column
} else {
$UPLOAD->delete($name);
}
}
}
return ($FORM, $UPLOAD);
}
return 1;