239 lines
7.2 KiB
PHP
239 lines
7.2 KiB
PHP
<?php
|
|
// File name: decform.inc.php
|
|
// Description: PHP subroutine to decode user input FORMs to UTF-8
|
|
// Date: 2002-04-17
|
|
// Author: imacat <imacat@pristine.com.tw>
|
|
// Copyright: Copyright (C) 2002-2007 Pristine Communications
|
|
|
|
// Set the include path
|
|
if (!defined("INCPATH_SET")) {
|
|
require_once dirname(__FILE__) . "/incpath.inc.php";
|
|
}
|
|
// Referenced subroutines
|
|
require_once "monica/getlang.inc.php";
|
|
require_once "monica/http.inc.php";
|
|
require_once "monica/lninfo.inc.php";
|
|
require_once "monica/unicode.inc.php";
|
|
|
|
// Settings
|
|
$_DECFORM_TRY = array(
|
|
"US-ASCII", "Big5", "GB2312", "GB18030", "Shift-JIS", "UTF-8"
|
|
);
|
|
|
|
|
|
// decode_forms: Decode user input FORMs to UTF-8
|
|
function decode_forms()
|
|
{
|
|
// Don't redo
|
|
static $done;
|
|
if (isset($done)) {
|
|
return;
|
|
}
|
|
|
|
global $ALL_LINGUAS;
|
|
|
|
// Initialize the data deposit
|
|
$GLOBALS["USER_INPUT"] = array(
|
|
"GET_RAW" => $_GET,
|
|
"GET_UTF8" => $_GET,
|
|
"GET_CHARSET" => null,
|
|
"GET_CSERR" => true,
|
|
"GET_KEYS" => array_keys($_GET),
|
|
"POST_RAW" => $_POST,
|
|
"POST_UTF8" => $_POST,
|
|
"FILES_RAW" => $_FILES,
|
|
"FILES_UTF8" => $_FILES,
|
|
"POST_CHARSET" => null,
|
|
"POST_CSERR" => true,
|
|
);
|
|
global $USER_INPUT;
|
|
|
|
// The possible character sets of this website
|
|
$charsets_site = array();
|
|
for ($l = 0; $l < count($ALL_LINGUAS); $l++) {
|
|
$charsets_site[] = ln($ALL_LINGUAS[$l], LN_CHARSET);
|
|
}
|
|
|
|
// The GET arguments
|
|
// The character set candidates
|
|
$charsets = array();
|
|
if (array_key_exists("charset", $_GET)) {
|
|
$charsets[] = $_GET["charset"];
|
|
}
|
|
$charsets[] = getlang(LN_CHARSET);
|
|
$charsets = array_merge($charsets, $charsets_site);
|
|
$charsets = array_merge($charsets, $GLOBALS["_DECFORM_TRY"]);
|
|
$charsets = array_values(array_unique($charsets));
|
|
// Check each character set
|
|
foreach ($charsets as $charset) {
|
|
$GET = $USER_INPUT["GET_RAW"];
|
|
// In this character set
|
|
if (_decform_array2u8($GET, $charset)) {
|
|
$_GET = $GET;
|
|
$USER_INPUT["GET_UTF8"] = $GET;
|
|
$USER_INPUT["GET_CHARSET"] = $charset;
|
|
$USER_INPUT["GET_CSERR"] = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// The POSTed form
|
|
// The character set candidates
|
|
$cands = array();
|
|
if (array_key_exists("charset", $_POST)) {
|
|
$cands[] = $_POST["charset"];
|
|
}
|
|
$cands[] = getlang(LN_CHARSET);
|
|
$cands = array_merge($cands, $charsets_site);
|
|
$cands = array_merge($cands, $GLOBALS["_DECFORM_TRY"]);
|
|
$charsets = array();
|
|
$lcharsets = array();
|
|
foreach ($cands as $charset) {
|
|
$lcharset = strtolower($charset);
|
|
if (in_array($lcharset, $lcharsets)) {
|
|
continue;
|
|
}
|
|
$charsets[] = $charset;
|
|
$lcharsets[] = $lcharset;
|
|
switch (strtolower($lcharset)) {
|
|
case "big5":
|
|
$charsets[] = "CP950";
|
|
$lcharsets[] = "cp950";
|
|
break;
|
|
case "gb2312":
|
|
$charsets[] = "GB18030";
|
|
$lcharsets[] = "gb18030";
|
|
break;
|
|
}
|
|
}
|
|
// Check each character set
|
|
foreach ($charsets as $charset) {
|
|
$POST = $USER_INPUT["POST_RAW"];
|
|
$FILES = $USER_INPUT["FILES_RAW"];
|
|
// In this character set
|
|
if ( _decform_array2u8($POST, $charset)
|
|
&& _decform_files2u8($FILES, $charset)) {
|
|
$_POST = $POST;
|
|
$_FILES = $FILES;
|
|
$USER_INPUT["POST_UTF8"] = $POST;
|
|
$USER_INPUT["FILES_UTF8"] = $FILES;
|
|
$USER_INPUT["POST_CHARSET"] = $charset;
|
|
$USER_INPUT["POST_CSERR"] = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// No valid character set was found
|
|
if ($USER_INPUT["GET_CSERR"] || $USER_INPUT["POST_CSERR"]) {
|
|
http_400("Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter.");
|
|
}
|
|
|
|
$done = true;
|
|
return;
|
|
}
|
|
|
|
// _decform_array2u8: Convert an array from a specific character set
|
|
// to UTF-8
|
|
function _decform_array2u8(&$FORM, $charset)
|
|
{
|
|
// Convert each column value
|
|
foreach (array_keys($FORM) as $col) {
|
|
// Try to decode the colume name first
|
|
$colu8 = h_decode($col, $charset);
|
|
// Found something not in this character set
|
|
if (is_null($colu8)) {
|
|
return false;
|
|
}
|
|
// Not a piece of valid unicode text
|
|
if (!is_valid_unicode($colu8)) {
|
|
return false;
|
|
}
|
|
// Change the key
|
|
if ($col != $colu8) {
|
|
$FORM[$colu8] =& $FORM[$col];
|
|
unset($FORM[$col]);
|
|
$col = $colu8;
|
|
}
|
|
// An array of values
|
|
if (is_array($FORM[$col])) {
|
|
// Fail to decode this array
|
|
if (!_decform_array2u8($FORM[$col], $charset)) {
|
|
return false;
|
|
}
|
|
// A scalar value
|
|
} else {
|
|
$val = $FORM[$col];
|
|
// Remove "\x00". This will cause error with sql_esctext() and sql_esclike().
|
|
$val = str_replace("\x00", "", $val);
|
|
$val = h_decode($val, $charset);
|
|
// Found something not in this character set
|
|
if (is_null($val)) {
|
|
return false;
|
|
}
|
|
// Not a piece of valid unicode text
|
|
if (!is_valid_unicode($val)) {
|
|
return false;
|
|
}
|
|
$FORM[$col] = $val;
|
|
}
|
|
}
|
|
// Everything OK
|
|
return true;
|
|
}
|
|
|
|
// _decform_files2u8: Convert the POSTed files from a specific character set
|
|
// to UTF-8
|
|
function _decform_files2u8(&$FILES, $charset)
|
|
{
|
|
// Convert each filename
|
|
foreach (array_keys($FILES) as $col) {
|
|
// Try to decode the colume name first
|
|
$colu8 = h_decode($col, $charset);
|
|
// Found something not in this character set
|
|
if (is_null($colu8)) {
|
|
return false;
|
|
}
|
|
// Not a piece of valid unicode text
|
|
if (!is_valid_unicode($colu8)) {
|
|
return false;
|
|
}
|
|
// Not a piece of valid unicode text
|
|
if (!is_valid_unicode($colu8)) {
|
|
return false;
|
|
}
|
|
// Change the key
|
|
if ($col != $colu8) {
|
|
$FORM[$colu8] =& $FORM[$col];
|
|
unset($FORM[$col]);
|
|
$col = $colu8;
|
|
}
|
|
// An array of files
|
|
if ( !array_key_exists("tmp_name", $FILES[$col])
|
|
|| is_array($FILES[$col]["tmp_name"])) {
|
|
// Fail to decode this array
|
|
if (!_decform_files2u8($FILES[$col], $charset)) {
|
|
return false;
|
|
}
|
|
// A singular file
|
|
} else {
|
|
$val = $FILES[$col]["name"];
|
|
// Remove "\x00". This will cause error with sql_esctext() and sql_esclike().
|
|
$val = str_replace("\x00", "", $val);
|
|
$val = h_decode($val, $charset);
|
|
// Found something not in this character set
|
|
if (is_null($val)) {
|
|
return false;
|
|
}
|
|
// Not a piece of valid unicode text
|
|
if (!is_valid_unicode($val)) {
|
|
return false;
|
|
}
|
|
$FILES[$col]["name"] = $val;
|
|
}
|
|
}
|
|
// Everything OK
|
|
return true;
|
|
}
|
|
|
|
?>
|