// Copyright: Copyright (C) 2002-2007 Pristine Communications // Set the include path if (!defined("INCPATH_SET")) { require_once dirname(__FILE__) . "/incpath.inc.php"; } // Referenced subroutines require_once "monica/getlang.inc.php"; require_once "monica/http.inc.php"; require_once "monica/lninfo.inc.php"; require_once "monica/unicode.inc.php"; // Settings $_DECFORM_TRY = array( "US-ASCII", "Big5", "GB2312", "GB18030", "Shift-JIS", "UTF-8" ); // decode_forms: Decode user input FORMs to UTF-8 function decode_forms() { // Don't redo static $done; if (isset($done)) { return; } global $ALL_LINGUAS; // Initialize the data deposit $GLOBALS["USER_INPUT"] = array( "GET_RAW" => $_GET, "GET_UTF8" => $_GET, "GET_CHARSET" => null, "GET_CSERR" => true, "GET_KEYS" => array_keys($_GET), "POST_RAW" => $_POST, "POST_UTF8" => $_POST, "FILES_RAW" => $_FILES, "FILES_UTF8" => $_FILES, "POST_CHARSET" => null, "POST_CSERR" => true, ); global $USER_INPUT; // The possible character sets of this website $charsets_site = array(); for ($l = 0; $l < count($ALL_LINGUAS); $l++) { $charsets_site[] = ln($ALL_LINGUAS[$l], LN_CHARSET); } // The GET arguments // The character set candidates $charsets = array(); if (array_key_exists("charset", $_GET)) { $charsets[] = $_GET["charset"]; } $charsets[] = getlang(LN_CHARSET); $charsets = array_merge($charsets, $charsets_site); $charsets = array_merge($charsets, $GLOBALS["_DECFORM_TRY"]); $charsets = array_values(array_unique($charsets)); // Check each character set foreach ($charsets as $charset) { $GET = $USER_INPUT["GET_RAW"]; // In this character set if (_decform_array2u8($GET, $charset)) { $_GET = $GET; $USER_INPUT["GET_UTF8"] = $GET; $USER_INPUT["GET_CHARSET"] = $charset; $USER_INPUT["GET_CSERR"] = false; break; } } // The POSTed form // The character set candidates $cands = array(); if (array_key_exists("charset", $_POST)) { $cands[] = $_POST["charset"]; } $cands[] = getlang(LN_CHARSET); $cands = array_merge($cands, $charsets_site); $cands = array_merge($cands, $GLOBALS["_DECFORM_TRY"]); $charsets = array(); $lcharsets = array(); foreach ($cands as $charset) { $lcharset = strtolower($charset); if (in_array($lcharset, $lcharsets)) { continue; } $charsets[] = $charset; $lcharsets[] = $lcharset; switch (strtolower($lcharset)) { case "big5": $charsets[] = "CP950"; $lcharsets[] = "cp950"; break; case "gb2312": $charsets[] = "GB18030"; $lcharsets[] = "gb18030"; break; } } // Check each character set foreach ($charsets as $charset) { $POST = $USER_INPUT["POST_RAW"]; $FILES = $USER_INPUT["FILES_RAW"]; // In this character set if ( _decform_array2u8($POST, $charset) && _decform_files2u8($FILES, $charset)) { $_POST = $POST; $_FILES = $FILES; $USER_INPUT["POST_UTF8"] = $POST; $USER_INPUT["FILES_UTF8"] = $FILES; $USER_INPUT["POST_CHARSET"] = $charset; $USER_INPUT["POST_CSERR"] = false; break; } } // No valid character set was found if ($USER_INPUT["GET_CSERR"] || $USER_INPUT["POST_CSERR"]) { http_400("Unable to detect the character set of your submitted information. Please specify the input character set with charset= parameter."); } $done = true; return; } // _decform_array2u8: Convert an array from a specific character set // to UTF-8 function _decform_array2u8(&$FORM, $charset) { // Convert each column value foreach (array_keys($FORM) as $col) { // Try to decode the colume name first $colu8 = h_decode($col, $charset); // Found something not in this character set if (is_null($colu8)) { return false; } // Not a piece of valid unicode text if (!is_valid_unicode($colu8)) { return false; } // Change the key if ($col != $colu8) { $FORM[$colu8] =& $FORM[$col]; unset($FORM[$col]); $col = $colu8; } // An array of values if (is_array($FORM[$col])) { // Fail to decode this array if (!_decform_array2u8($FORM[$col], $charset)) { return false; } // A scalar value } else { $val = $FORM[$col]; // Remove "\x00". This will cause error with sql_esctext() and sql_esclike(). $val = str_replace("\x00", "", $val); $val = h_decode($val, $charset); // Found something not in this character set if (is_null($val)) { return false; } // Not a piece of valid unicode text if (!is_valid_unicode($val)) { return false; } $FORM[$col] = $val; } } // Everything OK return true; } // _decform_files2u8: Convert the POSTed files from a specific character set // to UTF-8 function _decform_files2u8(&$FILES, $charset) { // Convert each filename foreach (array_keys($FILES) as $col) { // Try to decode the colume name first $colu8 = h_decode($col, $charset); // Found something not in this character set if (is_null($colu8)) { return false; } // Not a piece of valid unicode text if (!is_valid_unicode($colu8)) { return false; } // Not a piece of valid unicode text if (!is_valid_unicode($colu8)) { return false; } // Change the key if ($col != $colu8) { $FORM[$colu8] =& $FORM[$col]; unset($FORM[$col]); $col = $colu8; } // An array of files if ( !array_key_exists("tmp_name", $FILES[$col]) || is_array($FILES[$col]["tmp_name"])) { // Fail to decode this array if (!_decform_files2u8($FILES[$col], $charset)) { return false; } // A singular file } else { $val = $FILES[$col]["name"]; // Remove "\x00". This will cause error with sql_esctext() and sql_esclike(). $val = str_replace("\x00", "", $val); $val = h_decode($val, $charset); // Found something not in this character set if (is_null($val)) { return false; } // Not a piece of valid unicode text if (!is_valid_unicode($val)) { return false; } $FILES[$col]["name"] = $val; } } // Everything OK return true; } ?>