// Copyright: Copyright (C) 2004-2007 Pristine Communications // Set the include path if (!defined("INCPATH_SET")) { require_once dirname(__FILE__) . "/incpath.inc.php"; } // Referenced subroutines require_once "monica/chkwrite.inc.php"; require_once "monica/mkalldir.inc.php"; require_once "monica/xfileio.inc.php"; // Settings if (!defined("DBMDIR")) { define("DBMDIR", dirname(dirname(dirname(__FILE__))) . "/" . php_uname("m")); } define("_PINYIN_PINYIN2BIG5", dirname(__FILE__) . "/pinyin2big5"); define("_PINYIN_B52PY_DB", DBMDIR . "/b52py.db"); $_PINYIN_DB = null; define("_PINYIN_DBTYPE_DBA", 1); define("_PINYIN_DBTYPE_ARRAY", 2); $_PINYIN_SUP = array( " " => " ", "#" => "#", "﹟" => "#", "兙" => "shi2 ke4", "兛" => "qian1 ke4", "兞" => "mao2 ke4", "兝\" => "fen1 ke4", "兡" => "bai3 ke4", "兣" => "li2 ke4", "嗧" => "jia1 lun2", "瓩" => "qian1 wa3", "糎" => "li2 mi3", ); // b52py: Convert Traditional Chinese to Pinyin function b52py($big5) { global $_PINYIN_DB; // Initialize the database _init_b52py(); // Split into pieces $pieces = array(); $remains = $big5; while (preg_match("/^([\\x01-\\x7F]+|(?:[\\x80-\\xFE].)+)(.*)$/s", $remains, $m)) { $pieces[] = $m[1]; $remains = $m[2]; } // Process each piece switch (_PINYIN_DBTYPE) { // Using a DB file case _PINYIN_DBTYPE_DBA: for ($i = 0; $i < count($pieces); $i++) { // Big5 piece if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) { for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) { $char = substr($pieces[$i], $j, 2); if (dba_exists($char, $_PINYIN_DB)) { $words[] = dba_fetch($char, $_PINYIN_DB); } else { $words[] = $char; } } $pieces[$i] = implode(" ", $words); } } break; // Using an array case _PINYIN_DBTYPE_ARRAY: for ($i = 0; $i < count($pieces); $i++) { // Big5 piece if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) { for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) { $char = substr($pieces[$i], $j, 2); if (array_key_exists($char, $_PINYIN_DB)) { $words[] = $_PINYIN_DB[$char]; } else { $words[] = $char; } } $pieces[$i] = implode(" ", $words); } } break; } $pinyin = $pieces[0]; for ($i = 1; $i < count($pieces); $i++) { // Insert a space if ( !preg_match("/\s$/", $pieces[$i-1]) && !preg_match("/^\s/", $pieces[$i])) { $pinyin .= " "; } $pinyin .= $pieces[$i]; } return $pinyin; } // _init_b52py: Initialize the Big5 to Pinyin database function _init_b52py() { global $_PINYIN_DB, $_PINYIN_SUP; // Already initialized if (defined("_PINYIN_DBTYPE")) { return; } // Decide the action to do if (file_exists(_PINYIN_B52PY_DB)) { // Not a file if (!is_file(_PINYIN_B52PY_DB)) { define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY); // Not readable } elseif (!is_readable(_PINYIN_B52PY_DB)) { define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY); // Not writable -- read only } elseif (!is_writable(_PINYIN_B52PY_DB)) { $_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "r", "gdbm"); // No data if (dba_firstkey($_PINYIN_DB) === false) { dba_close($_PINYIN_DB); define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY); // OK } else { define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA); return; } // Writable } else { $_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "w", "gdbm"); // No data - initialize the data if (dba_firstkey($_PINYIN_DB) === false) { define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA); // OK } else { define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA); return; } } // File does not exist } else { // Not creatable $error = check_writable(_PINYIN_B52PY_DB); if (!is_null($error)) { define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY); // Creatable } else { mkalldir(dirname(_PINYIN_B52PY_DB)); $_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "c", "gdbm"); define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA); } } // Initialize the database switch (_PINYIN_DBTYPE) { // Initialize it as a DB file case _PINYIN_DBTYPE_DBA: $lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5)); foreach ($lines as $line) { // Skip comments if (substr($line, 0, 1) == "#") { continue; } // Skip empty lines if (!preg_match("/\S/", $line)) { continue; } $chars = explode(" ", $line); // First item is pinyin $pinyin = array_shift($chars); foreach ($chars as $char) { if (!dba_exists($char, $_PINYIN_DB)) { dba_insert($char, $pinyin, $_PINYIN_DB); } } } // Special meta characters foreach (array_keys($_PINYIN_SUP) as $char) { if (!dba_exists($char, $_PINYIN_DB)) { dba_insert($char, $_PINYIN_SUP[$char], $_PINYIN_DB); } } break; // Initialize it as an array case _PINYIN_DBTYPE_ARRAY: $_PINYIN_DB = array(); $lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5)); foreach ($lines as $line) { // Skip comments if (substr($line, 0, 1) == "#") { continue; } // Skip empty lines if (!preg_match("/\S/", $line)) { continue; } $chars = explode(" ", $line); // First item is pinyin $pinyin = array_shift($chars); foreach ($chars as $char) { $_PINYIN_DB[$char] = $pinyin; } } // Special meta characters foreach (array_keys($_PINYIN_SUP) as $char) { $_PINYIN_DB[$char] = $_PINYIN_SUP[$char]; } break; } return; } ?>