# Selima Website Content Management System # DecForm.pm: The subroutines to decode the the input form from various character sets. # Copyright (c) 2004-2018 imacat. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Author: imacat # First written: 2004-09-10 package Selima::DecForm; use 5.008; use strict; use warnings; use base qw(Exporter); use vars qw(@EXPORT @EXPORT_OK); BEGIN { @EXPORT = qw(init_forms decode_forms decode_forms_delay); @EXPORT_OK = @EXPORT; # Prototype declaration sub init_forms(); sub decode_forms(); sub decode_forms_delay(); sub try_decode_form($$); sub split_upload($); } use Encode qw(decode FB_CROAK); use URI::Escape qw(uri_escape); use HTML::Entities qw(%entity2char); use Selima::DataVars qw(:env :input :l10n :lninfo); use Selima::GetLang; use Selima::HTTP; use Selima::LnInfo; use Selima::Unicode; use vars qw(@TRY); @TRY = qw(US-ASCII Big5 GB2312 Shift-JIS UTF-8 ISO-8859-1 ISO-8859-15); # init_forms: Initialize the user-input form data sub init_forms() { local ($_, %_); my ($r, $rawget, $rawpost, $postdelay); # Do not redo return if scalar(keys %USER_INPUT) > 0; $r = $IS_MP2? Apache2::RequestUtil->request: Apache->request if $IS_MODPERL; # Scan the GET query $rawget = $IS_MODPERL? (defined $r->args? $r->args: ""): (exists $ENV{"QUERY_STRING"}? $ENV{"QUERY_STRING"}: ""); $GET = new CGI($rawget); # Scan the POST form $postdelay = 0; if (($IS_MODPERL? $r->method: $ENV{"REQUEST_METHOD"}) eq "POST") { if ($IS_MODPERL) { # Ordinary POST form if ( !defined($_ = $r->headers_in->get("Content-Type")) || $_ eq "application/x-www-form-urlencoded") { if ($IS_MP2) { $r->read($rawpost, $r->headers_in->get("Content-Length")); } else { $rawpost = $r->content; } $POST = new CGI($rawpost); $r->headers_in->set("X-Selima-POST", $rawpost); # Others (file uploads, etc) and CGI environment is ready # - parse with CGI.pm default parser } elsif (exists $ENV{"SERVER_SOFTWARE"}) { $POST = new CGI; @_ = qw(); foreach my $name ($POST->param) { foreach my $val ($POST->param($name)) { push @_, uri_escape($name) . "=" . uri_escape($val); } } $rawpost = join "&", @_; # Others (file uploads, etc) and CGI environment is not ready yet # - delay the parsing } else { $rawpost = ""; $POST = new CGI($rawpost); $postdelay = 1; } } else { # POST form was obtained by mod_perl handler before if (exists $ENV{"HTTP_X_SELIMA_POST"}) { $rawpost = $ENV{"HTTP_X_SELIMA_POST"}; $POST = new CGI($rawpost); # Ordinary POST form -- parse the STDIN } elsif ( !exists $ENV{"HTTP_CONTENT_TYPE"} || $ENV{"HTTP_CONTENT_TYPE"} eq "application/x-www-form-urlencoded") { $rawpost = ; $POST = new CGI($rawpost); # Others (file uploads, etc) - parse with CGI.pm default parser } else { $POST = new CGI; @_ = qw(); foreach my $name ($POST->param) { foreach my $val ($POST->param($name)) { push @_, uri_escape($name) . "=" . uri_escape($val); } } $rawpost = join "&", @_; } } } else { $rawpost = ""; $POST = new CGI($rawpost); } # Split the uploaded files ($POST, $UPLOAD) = split_upload $POST; # Initialize the data deposit %USER_INPUT = ( "GET_RAW" => $GET, "GET_UTF8" => $GET, "GET_CHARSET" => undef, "GET_CSERR" => 1, "GET_RAWDATA" => $rawget, "GET_KEYS" => [ $GET->param ], "POST_RAW" => $POST, "POST_UTF8" => $POST, "POST_CHARSET" => undef, "POST_CSERR" => 1, "POST_RAWDATA" => $rawpost, "POST_DELAY" => $postdelay, "UPLOAD" => $UPLOAD, ); return; } # decode_forms: Decode user FORM input from some character set sub decode_forms() { local ($_, %_); my (@charsets, @charsets_site); # The possible character sets of this website @charsets_site = map ln($_, LN_CHARSET), @ALL_LINGUAS; # The GET arguments # The character set candidates @_ = qw(); push @_, $GET->param("charset") if defined $GET->param("charset"); push @_, getlang LN_CHARSET; push @_, @charsets_site; push @_, @TRY; @charsets = qw(); %_ = qw(); foreach (@_) { if (lc $_ eq "big5") { if (!exists $_{"Big5-ETen"}) { push @charsets, "Big5-ETen"; $_{"Big5-ETen"} = 1; } if (!exists $_{"Big5"}) { push @charsets, "Big5"; $_{"Big5"} = 1; } if (!exists $_{"CP950"}) { push @charsets, "CP950"; $_{"CP950"} = 1; } if (!exists $_{"Big5-HKSCS"}) { push @charsets, "Big5-HKSCS"; $_{"Big5-HKSCS"} = 1; } } elsif (lc $_ eq "big5-hkscs") { if (!exists $_{"Big5-HKSCS"}) { push @charsets, "Big5-HKSCS"; $_{"Big5-HKSCS"} = 1; } } elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") { if (!exists $_{"GB2312"}) { push @charsets, "GB2312"; $_{"GB2312"} = 1; } if (!exists $_{"GB18030"}) { push @charsets, "GB18030"; $_{"GB18030"} = 1; } } else { if (!exists $_{$_}) { push @charsets, $_; $_{$_} = 1; } } } # Check each character set foreach my $charset (@charsets) { $_ = $USER_INPUT{"GET_RAW"}; # In this character set if (defined($_ = try_decode_form $_, $charset)) { $GET = $_; $USER_INPUT{"GET_UTF8"} = $_; $USER_INPUT{"GET_CHARSET"} = $charset; $USER_INPUT{"GET_CSERR"} = 0; $USER_INPUT{"GET_KEYS"} = [ $_->param ]; last; } } # The POSTed form # The character set candidates @_ = qw(); push @_, $POST->param("charset") if defined $POST->param("charset"); push @_, getlang LN_CHARSET; push @_, @charsets_site; push @_, @TRY; @charsets = qw(); %_ = qw(); foreach (@_) { if (lc $_ eq "big5") { if (!exists $_{"Big5-ETen"}) { push @charsets, "Big5-ETen"; $_{"Big5-ETen"} = 1; } if (!exists $_{"Big5"}) { push @charsets, "Big5"; $_{"Big5"} = 1; } if (!exists $_{"CP950"}) { push @charsets, "CP950"; $_{"CP950"} = 1; } if (!exists $_{"Big5-HKSCS"}) { push @charsets, "Big5-HKSCS"; $_{"Big5-HKSCS"} = 1; } } elsif (lc $_ eq "big5-hkscs") { if (!exists $_{"Big5-HKSCS"}) { push @charsets, "Big5-HKSCS"; $_{"Big5-HKSCS"} = 1; } } elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") { if (!exists $_{"GB2312"}) { push @charsets, "GB2312"; $_{"GB2312"} = 1; } if (!exists $_{"GB18030"}) { push @charsets, "GB18030"; $_{"GB18030"} = 1; } } else { if (!exists $_{$_}) { push @charsets, $_; $_{$_} = 1; } } } # Check each character set foreach my $charset (@charsets) { $_ = $USER_INPUT{"POST_RAW"}; # In this character set if (defined($_ = try_decode_form $_, $charset)) { $POST = $_; $USER_INPUT{"POST_UTF8"} = $_; $USER_INPUT{"POST_CHARSET"} = $charset; $USER_INPUT{"POST_CSERR"} = 0; last; } } # No valid character set was found http_500 "Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter." if $USER_INPUT{"GET_CSERR"} || $USER_INPUT{"POST_CSERR"}; return; } # decode_forms_delay: Initialize and decode multipart/form-data (uploads) # for mod_perl. We cannot obtain it before the CGI environment is ready. sub decode_forms_delay() { local ($_, %_); my ($r, $rawpost, @charsets, @charsets_site); # Parsing not delayed return unless $USER_INPUT{"POST_DELAY"}; # Obtain the POST form $POST = new CGI; @_ = qw(); foreach my $name ($POST->param) { foreach my $val ($POST->param($name)) { push @_, uri_escape($name) . "=" . uri_escape($val); } } $rawpost = join "&", @_; # Split the uploaded files ($POST, $UPLOAD) = split_upload $POST; # Record it $USER_INPUT{"POST_RAW"} = $POST; $USER_INPUT{"POST_UTF8"} = $POST; $USER_INPUT{"POST_CHARSET"} = undef; $USER_INPUT{"POST_CSERR"} = 1; $USER_INPUT{"POST_RAWDATA"} = $rawpost; $USER_INPUT{"UPLOAD"} = $UPLOAD; # The character set candidates @_ = qw(); push @_, $POST->param("charset") if defined $POST->param("charset"); push @_, getlang LN_CHARSET; push @_, map ln($_, LN_CHARSET), @ALL_LINGUAS; push @_, @TRY; @charsets = qw(); %_ = qw(); foreach (@_) { if (lc $_ eq "big5") { if (!exists $_{"Big5-ETen"}) { push @charsets, "Big5-ETen"; $_{"Big5-ETen"} = 1; } if (!exists $_{"Big5"}) { push @charsets, "Big5"; $_{"Big5"} = 1; } if (!exists $_{"CP950"}) { push @charsets, "CP950"; $_{"CP950"} = 1; } if (!exists $_{"Big5-HKSCS"}) { push @charsets, "Big5-HKSCS"; $_{"Big5-HKSCS"} = 1; } } elsif (lc $_ eq "big5-hkscs") { if (!exists $_{"Big5-HKSCS"}) { push @charsets, "Big5-HKSCS"; $_{"Big5-HKSCS"} = 1; } } elsif (lc $_ eq "gb2312" || lc $_ eq "gb18030") { if (!exists $_{"GB2312"}) { push @charsets, "GB2312"; $_{"GB2312"} = 1; } if (!exists $_{"GB18030"}) { push @charsets, "GB18030"; $_{"GB18030"} = 1; } } else { if (!exists $_{$_}) { push @charsets, $_; $_{$_} = 1; } } } # Check each character set foreach my $charset (@charsets) { $_ = $USER_INPUT{"POST_RAW"}; # In this character set if (defined($_ = try_decode_form $_, $charset)) { $POST = $_; $USER_INPUT{"POST_UTF8"} = $_; $USER_INPUT{"POST_CHARSET"} = $charset; $USER_INPUT{"POST_CSERR"} = 0; last; } } # No valid character set was found http_500 "Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter." if $USER_INPUT{"GET_CSERR"} || $USER_INPUT{"POST_CSERR"}; return; } # try_decode_form: Try to decode a CGI form with a specific character set sub try_decode_form($$) { local ($_, %_); my ($SOURCE, $charset, $RESULT, $name, $is_upload); ($SOURCE, $charset) = @_; # Obtain a copy of the converted result $RESULT = new CGI(""); $is_upload = 0; foreach $name ($SOURCE->param) { @_ = $SOURCE->param($name); return if !defined($name = hcref_decode($charset, $name)); foreach (@_) { s/\x00//g; return if !defined($_ = hcref_decode($charset, $_)); } $RESULT->param($name, @_); } return $RESULT; } # split_upload: Split the uploaded files from the POST form sub split_upload($) { local ($_, %_); my ($FORM, $UPLOAD, $name); $FORM = $_[0]; $UPLOAD = $FORM; $FORM = new CGI(""); foreach $name ($UPLOAD->param) { my (@valp, @valu); @valp = qw(); @valu = qw(); foreach ($UPLOAD->param($name)) { # A scalar value if (ref $_ eq "") { push @valp, $_; # Others (file uploads Fh, etc.) } else { push @valu, $_; } } # Some scalar found if (@valp > 0) { # Move to the POST form $FORM->param($name, @valp); # There are some uploaded files for this column if (@valu > 0) { $UPLOAD->param($name, @valu); # No uploaded file for this column } else { $UPLOAD->delete($name); } } } return ($FORM, $UPLOAD); } return 1;