Files
selima-perl/lib/php/monica/unused.inc.php
2026-03-10 21:31:43 +08:00

468 lines
15 KiB
PHP

<?php
// File name: unused.inc.php
// Description: PHP subroutines that are not used and not loaded
// Date: 2007-08-10
// Author: imacat <imacat@pristine.com.tw>
// Copyright: Copyright (C) 2007-2008 Pristine Communications
// Set the include path
if (!defined("INCPATH_SET")) {
require_once dirname(__FILE__) . "/incpath.inc.php";
}
// Referenced subroutines
require_once "monica/addcol.inc.php";
require_once "monica/errhndl.inc.php";
require_once "monica/hires.inc.php";
require_once "monica/sql.inc.php";
require_once "monica/unicode.inc.php";
require_once "monica/zh2py.inc.php";
//
// cnvtmap.inc.php
//
// cnvtmap.inc.php is removed now. It contains only this unsed
// mb_encode_numericentity_cnvtmap().
// mb_encode_numericentity_cnvtmap: Obtain the cnvtmap of a character set
// to be used in mb_encode_numericentity()
// This is an occational maintainance subroutine. Do not call it
// regularily. The result should be saved.
function mb_encode_numericentity_cnvtmap($charset, $archive = false)
{
// Preserve the original timeout
$timeout = ini_get("max_execution_time");
ini_set("max_execution_time", 0);
// Find the characters that does not fit into that character set
for ($i = 0, $ords = array(); $i < 65536 * 32; $i++) {
$c = iconv("UTF-32LE", "UTF-8", pack("V", $i));
$GLOBALS["php_errormsg"] = null;
set_error_handler("null_error_handler");
iconv("UTF-8", $charset, $c);
restore_error_handler();
if (!is_null($GLOBALS["php_errormsg"])) {
$ords[] = $i;
}
}
// Get the ranges
for ($i = 0, $prev = -2, $ranges = array(); $i < count($ords); $i++) {
// A new range
if ($ords[$i] != $prev + 1) {
$ranges[] = array($ords[$i], $ords[$i]);
}
// Adjust the end point
$ranges[count($ranges) - 1][1] = $ords[$i];
$prev = $ords[$i];
}
// Convert the ranges to cnvtmap for mb_encode_numericentity()
for ($i = 0, $cnvtmap = array(); $i < count($ranges); $i++) {
$cnvtmap[] = $ranges[$i][0];
$cnvtmap[] = $ranges[$i][1];
$cnvtmap[] = 0x000000;
$cnvtmap[] = 0xFFFFFF;
}
// Output it in a format suitable to be archived
if ($archive) {
echo "\$CNVTMAP[\"$charset\"] = array(\n";
for ($i = 0; $i < count($cnvtmap); $i += 4) {
printf(" 0x%06X, 0x%06X, 0x%06X, 0x%06X,\n",
$cnvtmap[$i], $cnvtmap[$i+1], $cnvtmap[$i+2], $cnvtmap[$i+3]);
}
echo ");\n";
}
// Restore the timeout
ini_set("max_execution_time", $timeout);
return $cnvtmap;
}
// mb_encode_numericentity_invalid_cnvtmap: Obtain the cnvtmap of invalid unicode characters
// This is an occational maintainance subroutine. Do not call it
// regularily. The result should be saved.
function mb_encode_numericentity_invalid_cnvtmap($archive = false)
{
// Preserve the original timeout
$timeout = ini_get("max_execution_time");
ini_set("max_execution_time", 0);
// Find the characters that does not fit into that character set
for ($i = 0, $ords = array(); $i < 65536; $i++) {
$c = iconv("UTF-32LE", "UTF-8", pack("V", $i));
$GLOBALS["php_errormsg"] = null;
set_error_handler("null_error_handler");
//iconv("UTF-8", $charset, $c);
$result = pg_query("SELECT '" . sql_esctext($c) . "';\n");
restore_error_handler();
if ($result === false) {
$ords[] = $i;
}
}
// Get the ranges
for ($i = 0, $prev = -2, $ranges = array(); $i < count($ords); $i++) {
// A new range
if ($ords[$i] != $prev + 1) {
$ranges[] = array($ords[$i], $ords[$i]);
}
// Adjust the end point
$ranges[count($ranges) - 1][1] = $ords[$i];
$prev = $ords[$i];
}
// Convert the ranges to cnvtmap for mb_encode_numericentity()
for ($i = 0, $cnvtmap = array(); $i < count($ranges); $i++) {
$cnvtmap[] = $ranges[$i][0];
$cnvtmap[] = $ranges[$i][1];
$cnvtmap[] = 0x000000;
$cnvtmap[] = 0xFFFFFF;
}
// Output it in a format suitable to be archived
if ($archive) {
echo "\$CNVTMAP[\"invalid\"] = array(\n";
for ($i = 0; $i < count($cnvtmap); $i += 4) {
printf(" 0x%06X, 0x%06X, 0x%06X, 0x%06X,\n",
$cnvtmap[$i], $cnvtmap[$i+1], $cnvtmap[$i+2], $cnvtmap[$i+3]);
}
echo ");\n";
}
// Restore the timeout
ini_set("max_execution_time", $timeout);
return $cnvtmap;
}
//
// unicode.inc.php
//
// rest_hcerefs: Restore HTML character entities references in the database
// This is an occational maintainance subroutine. Do not call it
// regularily. Also this will replace all HTML character entities
// references. Stop if you want to preserve any of them.
function rest_hcerefs()
{
// Preserve the original timeout
$timeout = ini_get("max_execution_time");
ini_set("max_execution_time", 0);
$t0 = time_hires();
// Lock the tables
$tables = sql_tables();
$locks = array();
foreach ($tables as $table) {
$locks[$table] = LOCK_EX;
}
sql_lock($locks);
$sqls = array();
// Loop each table
foreach ($tables as $table) {
$select = "SELECT * FROM $table;\n";
$result = sql_query($select);
$count = sql_num_rows($result);
// Loop each record
for ($i = 0; $i < $count; $i++) {
$cur = sql_fetch_assoc($result);
$new = $cur;
$cols = new AddCol($table, ADDCOL_UPDATE);
// Loop each column
foreach (sql_cols($table) as $col) {
// Skip non-string (numbers, boolean) columns
if (!is_string($new[$col])) {
continue;
}
// Read the character references with a_hcref2char()
$new[$col] = a_hcref2char($new[$col]);
$cols->addstr($col, $new[$col], $cur[$col]);
}
if ($cols->modified()) {
printf("%s - %s\n", $table, $cur["sn"]);
$sqls[] = "UPDATE $table " . $cols->ret()
. " WHERE sn=" . $cur["sn"] . ";\n";
}
}
}
// Update it
sql_begin();
for ($i = 0; $i < count($sqls); $i++) {
sql_query($sqls[$i]);
}
sql_commit();
// Restore the timeout
ini_set("max_execution_time", $timeout);
$t1 = time_hires();
printf("[%s] Done. %0.10f seconds elapsed\n", date("Y-m-d H:i:s"), $t1-$t0);
return;
}
//
// zh2py.inc.php
//
// The SQLite version - we are not using it
// test_zh2py_sqlite: Run tests on the speed of GDBM vs. SQLite
function test_zh2py_sqlite()
{
// Settings
if (!defined("_ZH2PY_SQLITE_DB")) {
define("_ZH2PY_SQLITE_DB", "/tmp/zh2py.db");
}
$GLOBALS["_ZH2PY_SQLITE"] = null;
if (!file_exists(_ZH2PY_SQLITE_DB)) {
zh2pydb_gdbm2sqlite();
}
$phrases = explode(" ", "臺北大塞車 我是依瑪貓 廚王爭霸戰 甜心酥餅 一口接一口 蒙大拿牛仔妹 恐龍入侵台灣 綠巨人玉米醬 小魚的故事 玉山銀行 我的一顆心 牛伯伯沙茶醬 王建民大勝利");
$count = 4;
$used = array();
$idx = rand(0, count($phrases) - 1);
$used[] = $idx;
// Open the connection first
zh2pys($phrases[$idx]);
zh2pys_sqlite($phrases[$idx]);
$suites = array(
array(1, 1),
array(5, 1),
array(1, 4),
array(5, 4),
);
foreach ($suites as $suite) {
for ($idxs = array(); count($idxs) < $suite[0]; ) {
$idx = rand(0, count($phrases) - 1);
if (!in_array($idx, $used)) {
$used[] = $idx;
$idxs[] = $idx;
}
}
for ($i = 0, $testphrases = array(); $i < count($idxs); $i++) {
$testphrases[] = $phrases[$idxs[$i]];
}
test_zh2py_sqlite_onetest($testphrases, $suite[1]);
}
return;
}
// test_zh2py_sqlite_onetest: Run one GDBM vs. SQLite test suite
function test_zh2py_sqlite_onetest($phrases, $count)
{
printf("=== Phrase %s for %d times ...\n", join(", ", $phrases), $count);
$t0 = time_hires();
for ($i = 0; $i < $count; $i++) {
for ($j = 0; $j < count($phrases); $j++) {
zh2pys($phrases[$j]);
}
}
printf("%-16s %0.10f seconds elapsed.\n", "zh2pys():", time_hires()-$t0);
$t0 = time_hires();
for ($i = 0; $i < $count; $i++) {
for ($j = 0; $j < count($phrases); $j++) {
zh2pys_sqlite($phrases[$j]);
}
}
printf("%-16s %0.10f seconds elapsed.\n", "zh2pys_sqlite():", time_hires()-$t0);
return;
}
// zh2pydb_gdbm2sqlite: Initialize the zh2py SQLite database from the GDBM database
function zh2pydb_gdbm2sqlite()
{
global $_ZH2PY, $_ZH2PY_SQLITE;
// Start the database
if (is_null($_ZH2PY)) {
$_ZH2PY = dba_open(_ZH2PY_DB, "r", "gdbm");
}
// Start the database
if (!is_null($_ZH2PY_SQLITE)) {
sqlite_close($_ZH2PY_SQLITE);
unset($_ZH2PY_SQLITE);
}
if (file_exists(_ZH2PY_SQLITE_DB)) {
unlink(_ZH2PY_SQLITE_DB);
}
if (is_null($_ZH2PY_SQLITE)) {
$error = null;
$_ZH2PY_SQLITE = sqlite_open(_ZH2PY_SQLITE_DB, 0666, $error);
if ($_ZH2PY_SQLITE === false) {
trigger_error("Failed sqlite_open().\n$error", E_USER_ERROR);
}
$error = null;
$create = "CREATE TABLE zh2py (ch varchar(3) NOT NULL, ord int NOT NULL, pinyin varchar(7) NOT NULL);\n";
$r = sqlite_exec($_ZH2PY_SQLITE, $create, $error);
if ($r === false) {
trigger_error("Failed sqlite_exec().\n$create\n$error", E_USER_ERROR);
}
}
$char = dba_firstkey($_ZH2PY);
while ($char !== false) {
$pinyins = explode("|", dba_fetch($char, $_ZH2PY));
for ($i = 0; $i < count($pinyins); $i++) {
$error = null;
$insert = "INSERT INTO zh2py (ch, ord, pinyin)"
. " VALUES ('" . sqlite_escape_string($char) . "', $i, '" . sqlite_escape_string($pinyins[$i]) . "');\n";
$r = sqlite_exec($_ZH2PY_SQLITE, $insert, $error);
if ($r === false) {
trigger_error("Failed sqlite_exec().\n$insert\n$error", E_USER_ERROR);
}
}
$char = dba_nextkey($_ZH2PY);
}
$error = null;
$create = "CREATE INDEX zh2py_char ON zh2py (ch);\n";
$r = sqlite_exec($_ZH2PY_SQLITE, $create, $error);
if ($r === false) {
trigger_error("Failed sqlite_exec().\n$create\n$error", E_USER_ERROR);
}
return;
}
// zh2pys_sqlite: Convert Chinese to pinyin, return all possibly pinyins
function zh2pys_sqlite($chinese)
{
// Bounce the empty text
if ($chinese == "") {
return "";
}
// Split text into Chinese or non-Chinese piecess
$pieces = _zh2py_sqlite_split_text($chinese);
// Convert each piece into a proper printf pattern
$chars = array();
for ($i = 0; $i < count($pieces); $i++) {
// A Chinese piece
if ($pieces[$i]["is_chinese"]) {
$patterns = array();
for ($j = 0; $j < mb_strlen($pieces[$i]["text"]); $j++) {
$char = mb_substr($pieces[$i]["text"], $j, 1);
$chars[] = $char;
$patterns[] = "%s";
}
$pieces[$i]["text"] = implode(" ", $patterns);
// A non-Chinese piece
} else {
// Escape the printf metacharacter
$pieces[$i]["text"] = str_replace("%", "%%", $pieces[$i]["text"]);
}
}
// Concatenate text pieces
$pinyin = $pieces[0]["text"];
for ($i = 1; $i < count($pieces); $i++) {
// Insert a space
if ( !preg_match("/\s$/", $pieces[$i-1]["text"])
&& !preg_match("/^\s/", $pieces[$i]["text"])) {
$pinyin .= " ";
}
$pinyin .= $pieces[$i]["text"];
}
// Get all the possible pinyins
$chars = _zh2py_sqlite_chars2py($chars);
$pinyins = array();
for ($i = 0; $i < count($chars); $i++) {
$pinyins[] = vsprintf($pinyin, $chars[$i]);
}
return $pinyins;
}
// _zh2py_sqlite_split_text: Split text into Chinese or non-Chinese piecess
function _zh2py_sqlite_split_text($text)
{
global $_ZH2PY_SQLITE;
// Start the database
if (is_null($_ZH2PY_SQLITE)) {
$error = null;
$_ZH2PY_SQLITE = sqlite_open(_ZH2PY_SQLITE_DB, 0666, $error);
if ($_ZH2PY_SQLITE === false) {
trigger_error("Failed sqlite_open().\n$error", E_USER_ERROR);
}
}
// Split into pieces
for ($i = 0, $chars = array(); $i < mb_strlen($text); $i++) {
$chars[] = mb_substr($text, $i, 1);
}
$pieces = array();
// Tag the first phrase
$error = null;
$select = "SELECT pinyin FROM zh2py"
. " WHERE ch='" . sqlite_escape_string($chars[0]) . "'"
. " LIMIT 1;\n";
$result = sqlite_query($select, $_ZH2PY_SQLITE, SQLITE_ASSOC, $error);
if ($result === false) {
trigger_error("Failed sqlite_query().\n$select\n$error", E_USER_ERROR);
}
$pieces[] = array(
"is_chinese" => sqlite_num_rows($result) > 0,
"text" => "",
);
foreach ($chars as $char) {
$error = null;
$select = "SELECT pinyin FROM zh2py"
. " WHERE ch='" . sqlite_escape_string($char) . "'"
. " LIMIT 1;\n";
$result = sqlite_query($select, $_ZH2PY_SQLITE, SQLITE_ASSOC, $error);
if ($result === false) {
trigger_error("Failed sqlite_query().\n$select\n$error", E_USER_ERROR);
}
// Chinese status changed
if (sqlite_num_rows($result) > 0 xor $pieces[count($pieces)-1]["is_chinese"]) {
// Start a new piece
$pieces[] = array(
"is_chinese" => sqlite_num_rows($result) > 0,
"text" => $char,
);
} else {
// Append to the current piece
$pieces[count($pieces)-1]["text"] .= $char;
}
}
return $pieces;
}
// _zh2py_sqlite_chars2py: Loop up a series of Chinese characters
// and return all possible pinyins
function _zh2py_sqlite_chars2py($chars)
{
global $_ZH2PY_SQLITE;
// No more characters to work with
if (count($chars) == 0) {
return array(array());
}
$char = array_shift($chars);
$error = null;
$select = "SELECT pinyin FROM zh2py"
. " WHERE ch='" . sqlite_escape_string($char) . "'"
. " ORDER BY ord;\n";
$result = sqlite_query($select, $_ZH2PY_SQLITE, SQLITE_ASSOC, $error);
if ($result === false) {
trigger_error("Failed sqlite_query().\n$select\n$error", E_USER_ERROR);
}
$count = sqlite_num_rows($result);
for ($i = 0, $pinyins = array(); $i < $count; $i++) {
$row = sqlite_fetch_array($result, SQLITE_ASSOC);
$pinyins[] = $row["pinyin"];
}
$follows = _ZH2PY_SQLITE_chars2py($chars);
$results = array();
for ($i = 0; $i < count($pinyins); $i++) {
for ($j = 0; $j < count($follows); $j++) {
$results[] = array_merge(array($pinyins[$i]), $follows[$j]);
}
}
return $results;
}
?>