Initial commit.
This commit is contained in:
467
lib/php/monica/unused.inc.php
Normal file
467
lib/php/monica/unused.inc.php
Normal file
@@ -0,0 +1,467 @@
|
||||
<?php
|
||||
// File name: unused.inc.php
|
||||
// Description: PHP subroutines that are not used and not loaded
|
||||
// Date: 2007-08-10
|
||||
// Author: imacat <imacat@pristine.com.tw>
|
||||
// Copyright: Copyright (C) 2007-2008 Pristine Communications
|
||||
|
||||
// Set the include path
|
||||
if (!defined("INCPATH_SET")) {
|
||||
require_once dirname(__FILE__) . "/incpath.inc.php";
|
||||
}
|
||||
// Referenced subroutines
|
||||
require_once "monica/addcol.inc.php";
|
||||
require_once "monica/errhndl.inc.php";
|
||||
require_once "monica/hires.inc.php";
|
||||
require_once "monica/sql.inc.php";
|
||||
require_once "monica/unicode.inc.php";
|
||||
require_once "monica/zh2py.inc.php";
|
||||
|
||||
//
|
||||
// cnvtmap.inc.php
|
||||
//
|
||||
// cnvtmap.inc.php is removed now. It contains only this unsed
|
||||
// mb_encode_numericentity_cnvtmap().
|
||||
// mb_encode_numericentity_cnvtmap: Obtain the cnvtmap of a character set
|
||||
// to be used in mb_encode_numericentity()
|
||||
// This is an occational maintainance subroutine. Do not call it
|
||||
// regularily. The result should be saved.
|
||||
function mb_encode_numericentity_cnvtmap($charset, $archive = false)
|
||||
{
|
||||
// Preserve the original timeout
|
||||
$timeout = ini_get("max_execution_time");
|
||||
ini_set("max_execution_time", 0);
|
||||
// Find the characters that does not fit into that character set
|
||||
for ($i = 0, $ords = array(); $i < 65536 * 32; $i++) {
|
||||
$c = iconv("UTF-32LE", "UTF-8", pack("V", $i));
|
||||
$GLOBALS["php_errormsg"] = null;
|
||||
set_error_handler("null_error_handler");
|
||||
iconv("UTF-8", $charset, $c);
|
||||
restore_error_handler();
|
||||
if (!is_null($GLOBALS["php_errormsg"])) {
|
||||
$ords[] = $i;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the ranges
|
||||
for ($i = 0, $prev = -2, $ranges = array(); $i < count($ords); $i++) {
|
||||
// A new range
|
||||
if ($ords[$i] != $prev + 1) {
|
||||
$ranges[] = array($ords[$i], $ords[$i]);
|
||||
}
|
||||
// Adjust the end point
|
||||
$ranges[count($ranges) - 1][1] = $ords[$i];
|
||||
$prev = $ords[$i];
|
||||
}
|
||||
|
||||
// Convert the ranges to cnvtmap for mb_encode_numericentity()
|
||||
for ($i = 0, $cnvtmap = array(); $i < count($ranges); $i++) {
|
||||
$cnvtmap[] = $ranges[$i][0];
|
||||
$cnvtmap[] = $ranges[$i][1];
|
||||
$cnvtmap[] = 0x000000;
|
||||
$cnvtmap[] = 0xFFFFFF;
|
||||
}
|
||||
|
||||
// Output it in a format suitable to be archived
|
||||
if ($archive) {
|
||||
echo "\$CNVTMAP[\"$charset\"] = array(\n";
|
||||
for ($i = 0; $i < count($cnvtmap); $i += 4) {
|
||||
printf(" 0x%06X, 0x%06X, 0x%06X, 0x%06X,\n",
|
||||
$cnvtmap[$i], $cnvtmap[$i+1], $cnvtmap[$i+2], $cnvtmap[$i+3]);
|
||||
}
|
||||
echo ");\n";
|
||||
}
|
||||
|
||||
// Restore the timeout
|
||||
ini_set("max_execution_time", $timeout);
|
||||
return $cnvtmap;
|
||||
}
|
||||
|
||||
// mb_encode_numericentity_invalid_cnvtmap: Obtain the cnvtmap of invalid unicode characters
|
||||
// This is an occational maintainance subroutine. Do not call it
|
||||
// regularily. The result should be saved.
|
||||
function mb_encode_numericentity_invalid_cnvtmap($archive = false)
|
||||
{
|
||||
// Preserve the original timeout
|
||||
$timeout = ini_get("max_execution_time");
|
||||
ini_set("max_execution_time", 0);
|
||||
// Find the characters that does not fit into that character set
|
||||
for ($i = 0, $ords = array(); $i < 65536; $i++) {
|
||||
$c = iconv("UTF-32LE", "UTF-8", pack("V", $i));
|
||||
$GLOBALS["php_errormsg"] = null;
|
||||
set_error_handler("null_error_handler");
|
||||
//iconv("UTF-8", $charset, $c);
|
||||
$result = pg_query("SELECT '" . sql_esctext($c) . "';\n");
|
||||
restore_error_handler();
|
||||
if ($result === false) {
|
||||
$ords[] = $i;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the ranges
|
||||
for ($i = 0, $prev = -2, $ranges = array(); $i < count($ords); $i++) {
|
||||
// A new range
|
||||
if ($ords[$i] != $prev + 1) {
|
||||
$ranges[] = array($ords[$i], $ords[$i]);
|
||||
}
|
||||
// Adjust the end point
|
||||
$ranges[count($ranges) - 1][1] = $ords[$i];
|
||||
$prev = $ords[$i];
|
||||
}
|
||||
|
||||
// Convert the ranges to cnvtmap for mb_encode_numericentity()
|
||||
for ($i = 0, $cnvtmap = array(); $i < count($ranges); $i++) {
|
||||
$cnvtmap[] = $ranges[$i][0];
|
||||
$cnvtmap[] = $ranges[$i][1];
|
||||
$cnvtmap[] = 0x000000;
|
||||
$cnvtmap[] = 0xFFFFFF;
|
||||
}
|
||||
|
||||
// Output it in a format suitable to be archived
|
||||
if ($archive) {
|
||||
echo "\$CNVTMAP[\"invalid\"] = array(\n";
|
||||
for ($i = 0; $i < count($cnvtmap); $i += 4) {
|
||||
printf(" 0x%06X, 0x%06X, 0x%06X, 0x%06X,\n",
|
||||
$cnvtmap[$i], $cnvtmap[$i+1], $cnvtmap[$i+2], $cnvtmap[$i+3]);
|
||||
}
|
||||
echo ");\n";
|
||||
}
|
||||
|
||||
// Restore the timeout
|
||||
ini_set("max_execution_time", $timeout);
|
||||
return $cnvtmap;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// unicode.inc.php
|
||||
//
|
||||
// rest_hcerefs: Restore HTML character entities references in the database
|
||||
// This is an occational maintainance subroutine. Do not call it
|
||||
// regularily. Also this will replace all HTML character entities
|
||||
// references. Stop if you want to preserve any of them.
|
||||
function rest_hcerefs()
|
||||
{
|
||||
// Preserve the original timeout
|
||||
$timeout = ini_get("max_execution_time");
|
||||
ini_set("max_execution_time", 0);
|
||||
$t0 = time_hires();
|
||||
|
||||
// Lock the tables
|
||||
$tables = sql_tables();
|
||||
$locks = array();
|
||||
foreach ($tables as $table) {
|
||||
$locks[$table] = LOCK_EX;
|
||||
}
|
||||
sql_lock($locks);
|
||||
|
||||
$sqls = array();
|
||||
// Loop each table
|
||||
foreach ($tables as $table) {
|
||||
$select = "SELECT * FROM $table;\n";
|
||||
$result = sql_query($select);
|
||||
$count = sql_num_rows($result);
|
||||
// Loop each record
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
$cur = sql_fetch_assoc($result);
|
||||
$new = $cur;
|
||||
$cols = new AddCol($table, ADDCOL_UPDATE);
|
||||
// Loop each column
|
||||
foreach (sql_cols($table) as $col) {
|
||||
// Skip non-string (numbers, boolean) columns
|
||||
if (!is_string($new[$col])) {
|
||||
continue;
|
||||
}
|
||||
// Read the character references with a_hcref2char()
|
||||
$new[$col] = a_hcref2char($new[$col]);
|
||||
$cols->addstr($col, $new[$col], $cur[$col]);
|
||||
}
|
||||
if ($cols->modified()) {
|
||||
printf("%s - %s\n", $table, $cur["sn"]);
|
||||
$sqls[] = "UPDATE $table " . $cols->ret()
|
||||
. " WHERE sn=" . $cur["sn"] . ";\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update it
|
||||
sql_begin();
|
||||
for ($i = 0; $i < count($sqls); $i++) {
|
||||
sql_query($sqls[$i]);
|
||||
}
|
||||
sql_commit();
|
||||
|
||||
// Restore the timeout
|
||||
ini_set("max_execution_time", $timeout);
|
||||
$t1 = time_hires();
|
||||
printf("[%s] Done. %0.10f seconds elapsed\n", date("Y-m-d H:i:s"), $t1-$t0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// zh2py.inc.php
|
||||
//
|
||||
// The SQLite version - we are not using it
|
||||
|
||||
// test_zh2py_sqlite: Run tests on the speed of GDBM vs. SQLite
|
||||
function test_zh2py_sqlite()
|
||||
{
|
||||
// Settings
|
||||
if (!defined("_ZH2PY_SQLITE_DB")) {
|
||||
define("_ZH2PY_SQLITE_DB", "/tmp/zh2py.db");
|
||||
}
|
||||
$GLOBALS["_ZH2PY_SQLITE"] = null;
|
||||
|
||||
if (!file_exists(_ZH2PY_SQLITE_DB)) {
|
||||
zh2pydb_gdbm2sqlite();
|
||||
}
|
||||
$phrases = explode(" ", "臺北大塞車 我是依瑪貓 廚王爭霸戰 甜心酥餅 一口接一口 蒙大拿牛仔妹 恐龍入侵台灣 綠巨人玉米醬 小魚的故事 玉山銀行 我的一顆心 牛伯伯沙茶醬 王建民大勝利");
|
||||
$count = 4;
|
||||
$used = array();
|
||||
$idx = rand(0, count($phrases) - 1);
|
||||
$used[] = $idx;
|
||||
// Open the connection first
|
||||
zh2pys($phrases[$idx]);
|
||||
zh2pys_sqlite($phrases[$idx]);
|
||||
|
||||
$suites = array(
|
||||
array(1, 1),
|
||||
array(5, 1),
|
||||
array(1, 4),
|
||||
array(5, 4),
|
||||
);
|
||||
foreach ($suites as $suite) {
|
||||
for ($idxs = array(); count($idxs) < $suite[0]; ) {
|
||||
$idx = rand(0, count($phrases) - 1);
|
||||
if (!in_array($idx, $used)) {
|
||||
$used[] = $idx;
|
||||
$idxs[] = $idx;
|
||||
}
|
||||
}
|
||||
for ($i = 0, $testphrases = array(); $i < count($idxs); $i++) {
|
||||
$testphrases[] = $phrases[$idxs[$i]];
|
||||
}
|
||||
test_zh2py_sqlite_onetest($testphrases, $suite[1]);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// test_zh2py_sqlite_onetest: Run one GDBM vs. SQLite test suite
|
||||
function test_zh2py_sqlite_onetest($phrases, $count)
|
||||
{
|
||||
printf("=== Phrase %s for %d times ...\n", join(", ", $phrases), $count);
|
||||
$t0 = time_hires();
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
for ($j = 0; $j < count($phrases); $j++) {
|
||||
zh2pys($phrases[$j]);
|
||||
}
|
||||
}
|
||||
printf("%-16s %0.10f seconds elapsed.\n", "zh2pys():", time_hires()-$t0);
|
||||
$t0 = time_hires();
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
for ($j = 0; $j < count($phrases); $j++) {
|
||||
zh2pys_sqlite($phrases[$j]);
|
||||
}
|
||||
}
|
||||
printf("%-16s %0.10f seconds elapsed.\n", "zh2pys_sqlite():", time_hires()-$t0);
|
||||
return;
|
||||
}
|
||||
|
||||
// zh2pydb_gdbm2sqlite: Initialize the zh2py SQLite database from the GDBM database
|
||||
function zh2pydb_gdbm2sqlite()
|
||||
{
|
||||
global $_ZH2PY, $_ZH2PY_SQLITE;
|
||||
// Start the database
|
||||
if (is_null($_ZH2PY)) {
|
||||
$_ZH2PY = dba_open(_ZH2PY_DB, "r", "gdbm");
|
||||
}
|
||||
// Start the database
|
||||
if (!is_null($_ZH2PY_SQLITE)) {
|
||||
sqlite_close($_ZH2PY_SQLITE);
|
||||
unset($_ZH2PY_SQLITE);
|
||||
}
|
||||
if (file_exists(_ZH2PY_SQLITE_DB)) {
|
||||
unlink(_ZH2PY_SQLITE_DB);
|
||||
}
|
||||
if (is_null($_ZH2PY_SQLITE)) {
|
||||
$error = null;
|
||||
$_ZH2PY_SQLITE = sqlite_open(_ZH2PY_SQLITE_DB, 0666, $error);
|
||||
if ($_ZH2PY_SQLITE === false) {
|
||||
trigger_error("Failed sqlite_open().\n$error", E_USER_ERROR);
|
||||
}
|
||||
$error = null;
|
||||
$create = "CREATE TABLE zh2py (ch varchar(3) NOT NULL, ord int NOT NULL, pinyin varchar(7) NOT NULL);\n";
|
||||
$r = sqlite_exec($_ZH2PY_SQLITE, $create, $error);
|
||||
if ($r === false) {
|
||||
trigger_error("Failed sqlite_exec().\n$create\n$error", E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
$char = dba_firstkey($_ZH2PY);
|
||||
while ($char !== false) {
|
||||
$pinyins = explode("|", dba_fetch($char, $_ZH2PY));
|
||||
for ($i = 0; $i < count($pinyins); $i++) {
|
||||
$error = null;
|
||||
$insert = "INSERT INTO zh2py (ch, ord, pinyin)"
|
||||
. " VALUES ('" . sqlite_escape_string($char) . "', $i, '" . sqlite_escape_string($pinyins[$i]) . "');\n";
|
||||
$r = sqlite_exec($_ZH2PY_SQLITE, $insert, $error);
|
||||
if ($r === false) {
|
||||
trigger_error("Failed sqlite_exec().\n$insert\n$error", E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
$char = dba_nextkey($_ZH2PY);
|
||||
}
|
||||
$error = null;
|
||||
$create = "CREATE INDEX zh2py_char ON zh2py (ch);\n";
|
||||
$r = sqlite_exec($_ZH2PY_SQLITE, $create, $error);
|
||||
if ($r === false) {
|
||||
trigger_error("Failed sqlite_exec().\n$create\n$error", E_USER_ERROR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// zh2pys_sqlite: Convert Chinese to pinyin, return all possibly pinyins
|
||||
function zh2pys_sqlite($chinese)
|
||||
{
|
||||
// Bounce the empty text
|
||||
if ($chinese == "") {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Split text into Chinese or non-Chinese piecess
|
||||
$pieces = _zh2py_sqlite_split_text($chinese);
|
||||
|
||||
// Convert each piece into a proper printf pattern
|
||||
$chars = array();
|
||||
for ($i = 0; $i < count($pieces); $i++) {
|
||||
// A Chinese piece
|
||||
if ($pieces[$i]["is_chinese"]) {
|
||||
$patterns = array();
|
||||
for ($j = 0; $j < mb_strlen($pieces[$i]["text"]); $j++) {
|
||||
$char = mb_substr($pieces[$i]["text"], $j, 1);
|
||||
$chars[] = $char;
|
||||
$patterns[] = "%s";
|
||||
}
|
||||
$pieces[$i]["text"] = implode(" ", $patterns);
|
||||
// A non-Chinese piece
|
||||
} else {
|
||||
// Escape the printf metacharacter
|
||||
$pieces[$i]["text"] = str_replace("%", "%%", $pieces[$i]["text"]);
|
||||
}
|
||||
}
|
||||
|
||||
// Concatenate text pieces
|
||||
$pinyin = $pieces[0]["text"];
|
||||
for ($i = 1; $i < count($pieces); $i++) {
|
||||
// Insert a space
|
||||
if ( !preg_match("/\s$/", $pieces[$i-1]["text"])
|
||||
&& !preg_match("/^\s/", $pieces[$i]["text"])) {
|
||||
$pinyin .= " ";
|
||||
}
|
||||
$pinyin .= $pieces[$i]["text"];
|
||||
}
|
||||
|
||||
// Get all the possible pinyins
|
||||
$chars = _zh2py_sqlite_chars2py($chars);
|
||||
|
||||
$pinyins = array();
|
||||
for ($i = 0; $i < count($chars); $i++) {
|
||||
$pinyins[] = vsprintf($pinyin, $chars[$i]);
|
||||
}
|
||||
|
||||
return $pinyins;
|
||||
}
|
||||
|
||||
// _zh2py_sqlite_split_text: Split text into Chinese or non-Chinese piecess
|
||||
function _zh2py_sqlite_split_text($text)
|
||||
{
|
||||
global $_ZH2PY_SQLITE;
|
||||
// Start the database
|
||||
if (is_null($_ZH2PY_SQLITE)) {
|
||||
$error = null;
|
||||
$_ZH2PY_SQLITE = sqlite_open(_ZH2PY_SQLITE_DB, 0666, $error);
|
||||
if ($_ZH2PY_SQLITE === false) {
|
||||
trigger_error("Failed sqlite_open().\n$error", E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
// Split into pieces
|
||||
for ($i = 0, $chars = array(); $i < mb_strlen($text); $i++) {
|
||||
$chars[] = mb_substr($text, $i, 1);
|
||||
}
|
||||
$pieces = array();
|
||||
// Tag the first phrase
|
||||
$error = null;
|
||||
$select = "SELECT pinyin FROM zh2py"
|
||||
. " WHERE ch='" . sqlite_escape_string($chars[0]) . "'"
|
||||
. " LIMIT 1;\n";
|
||||
$result = sqlite_query($select, $_ZH2PY_SQLITE, SQLITE_ASSOC, $error);
|
||||
if ($result === false) {
|
||||
trigger_error("Failed sqlite_query().\n$select\n$error", E_USER_ERROR);
|
||||
}
|
||||
$pieces[] = array(
|
||||
"is_chinese" => sqlite_num_rows($result) > 0,
|
||||
"text" => "",
|
||||
);
|
||||
foreach ($chars as $char) {
|
||||
$error = null;
|
||||
$select = "SELECT pinyin FROM zh2py"
|
||||
. " WHERE ch='" . sqlite_escape_string($char) . "'"
|
||||
. " LIMIT 1;\n";
|
||||
$result = sqlite_query($select, $_ZH2PY_SQLITE, SQLITE_ASSOC, $error);
|
||||
if ($result === false) {
|
||||
trigger_error("Failed sqlite_query().\n$select\n$error", E_USER_ERROR);
|
||||
}
|
||||
// Chinese status changed
|
||||
if (sqlite_num_rows($result) > 0 xor $pieces[count($pieces)-1]["is_chinese"]) {
|
||||
// Start a new piece
|
||||
$pieces[] = array(
|
||||
"is_chinese" => sqlite_num_rows($result) > 0,
|
||||
"text" => $char,
|
||||
);
|
||||
} else {
|
||||
// Append to the current piece
|
||||
$pieces[count($pieces)-1]["text"] .= $char;
|
||||
}
|
||||
}
|
||||
return $pieces;
|
||||
}
|
||||
|
||||
// _zh2py_sqlite_chars2py: Loop up a series of Chinese characters
|
||||
// and return all possible pinyins
|
||||
function _zh2py_sqlite_chars2py($chars)
|
||||
{
|
||||
global $_ZH2PY_SQLITE;
|
||||
|
||||
// No more characters to work with
|
||||
if (count($chars) == 0) {
|
||||
return array(array());
|
||||
}
|
||||
|
||||
$char = array_shift($chars);
|
||||
$error = null;
|
||||
$select = "SELECT pinyin FROM zh2py"
|
||||
. " WHERE ch='" . sqlite_escape_string($char) . "'"
|
||||
. " ORDER BY ord;\n";
|
||||
$result = sqlite_query($select, $_ZH2PY_SQLITE, SQLITE_ASSOC, $error);
|
||||
if ($result === false) {
|
||||
trigger_error("Failed sqlite_query().\n$select\n$error", E_USER_ERROR);
|
||||
}
|
||||
$count = sqlite_num_rows($result);
|
||||
for ($i = 0, $pinyins = array(); $i < $count; $i++) {
|
||||
$row = sqlite_fetch_array($result, SQLITE_ASSOC);
|
||||
$pinyins[] = $row["pinyin"];
|
||||
}
|
||||
$follows = _ZH2PY_SQLITE_chars2py($chars);
|
||||
$results = array();
|
||||
for ($i = 0; $i < count($pinyins); $i++) {
|
||||
for ($j = 0; $j < count($follows); $j++) {
|
||||
$results[] = array_merge(array($pinyins[$i]), $follows[$j]);
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
?>
|
||||
Reference in New Issue
Block a user