226 lines
6.8 KiB
PHP
226 lines
6.8 KiB
PHP
<?php
|
||
// File name: pinyin.inc.php
|
||
// Description: PHP subroutines to convert Traditional Chinese to Pinyin
|
||
// Date: 2004-04-14
|
||
// Author: imacat <imacat@pristine.com.tw>
|
||
// Copyright: Copyright (C) 2004-2007 Pristine Communications
|
||
|
||
// Set the include path
|
||
if (!defined("INCPATH_SET")) {
|
||
require_once dirname(__FILE__) . "/incpath.inc.php";
|
||
}
|
||
// Referenced subroutines
|
||
require_once "monica/chkwrite.inc.php";
|
||
require_once "monica/mkalldir.inc.php";
|
||
require_once "monica/xfileio.inc.php";
|
||
|
||
// Settings
|
||
if (!defined("DBMDIR")) {
|
||
define("DBMDIR", dirname(dirname(dirname(__FILE__))) . "/" . php_uname("m"));
|
||
}
|
||
define("_PINYIN_PINYIN2BIG5", dirname(__FILE__) . "/pinyin2big5");
|
||
define("_PINYIN_B52PY_DB", DBMDIR . "/b52py.db");
|
||
|
||
$_PINYIN_DB = null;
|
||
define("_PINYIN_DBTYPE_DBA", 1);
|
||
define("_PINYIN_DBTYPE_ARRAY", 2);
|
||
$_PINYIN_SUP = array(
|
||
" " => " ",
|
||
"#" => "#",
|
||
"﹟" => "#",
|
||
"兙" => "shi2 ke4",
|
||
"兛" => "qian1 ke4",
|
||
"兞" => "mao2 ke4",
|
||
"兝\" => "fen1 ke4",
|
||
"兡" => "bai3 ke4",
|
||
"兣" => "li2 ke4",
|
||
"嗧" => "jia1 lun2",
|
||
"瓩" => "qian1 wa3",
|
||
"糎" => "li2 mi3",
|
||
);
|
||
|
||
// b52py: Convert Traditional Chinese to Pinyin
|
||
function b52py($big5)
|
||
{
|
||
global $_PINYIN_DB;
|
||
// Initialize the database
|
||
_init_b52py();
|
||
|
||
// Split into pieces
|
||
$pieces = array();
|
||
$remains = $big5;
|
||
while (preg_match("/^([\\x01-\\x7F]+|(?:[\\x80-\\xFE].)+)(.*)$/s", $remains, $m)) {
|
||
$pieces[] = $m[1];
|
||
$remains = $m[2];
|
||
}
|
||
|
||
// Process each piece
|
||
switch (_PINYIN_DBTYPE) {
|
||
// Using a DB file
|
||
case _PINYIN_DBTYPE_DBA:
|
||
for ($i = 0; $i < count($pieces); $i++) {
|
||
// Big5 piece
|
||
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
|
||
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
|
||
$char = substr($pieces[$i], $j, 2);
|
||
if (dba_exists($char, $_PINYIN_DB)) {
|
||
$words[] = dba_fetch($char, $_PINYIN_DB);
|
||
} else {
|
||
$words[] = $char;
|
||
}
|
||
}
|
||
$pieces[$i] = implode(" ", $words);
|
||
}
|
||
}
|
||
break;
|
||
|
||
// Using an array
|
||
case _PINYIN_DBTYPE_ARRAY:
|
||
for ($i = 0; $i < count($pieces); $i++) {
|
||
// Big5 piece
|
||
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
|
||
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
|
||
$char = substr($pieces[$i], $j, 2);
|
||
if (array_key_exists($char, $_PINYIN_DB)) {
|
||
$words[] = $_PINYIN_DB[$char];
|
||
} else {
|
||
$words[] = $char;
|
||
}
|
||
}
|
||
$pieces[$i] = implode(" ", $words);
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
|
||
$pinyin = $pieces[0];
|
||
for ($i = 1; $i < count($pieces); $i++) {
|
||
// Insert a space
|
||
if ( !preg_match("/\s$/", $pieces[$i-1])
|
||
&& !preg_match("/^\s/", $pieces[$i])) {
|
||
$pinyin .= " ";
|
||
}
|
||
$pinyin .= $pieces[$i];
|
||
}
|
||
|
||
return $pinyin;
|
||
}
|
||
|
||
// _init_b52py: Initialize the Big5 to Pinyin database
|
||
function _init_b52py()
|
||
{
|
||
global $_PINYIN_DB, $_PINYIN_SUP;
|
||
// Already initialized
|
||
if (defined("_PINYIN_DBTYPE")) {
|
||
return;
|
||
}
|
||
|
||
// Decide the action to do
|
||
if (file_exists(_PINYIN_B52PY_DB)) {
|
||
// Not a file
|
||
if (!is_file(_PINYIN_B52PY_DB)) {
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||
// Not readable
|
||
} elseif (!is_readable(_PINYIN_B52PY_DB)) {
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||
// Not writable -- read only
|
||
} elseif (!is_writable(_PINYIN_B52PY_DB)) {
|
||
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "r", "gdbm");
|
||
// No data
|
||
if (dba_firstkey($_PINYIN_DB) === false) {
|
||
dba_close($_PINYIN_DB);
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||
// OK
|
||
} else {
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||
return;
|
||
}
|
||
// Writable
|
||
} else {
|
||
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "w", "gdbm");
|
||
// No data - initialize the data
|
||
if (dba_firstkey($_PINYIN_DB) === false) {
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||
// OK
|
||
} else {
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||
return;
|
||
}
|
||
}
|
||
// File does not exist
|
||
} else {
|
||
// Not creatable
|
||
$error = check_writable(_PINYIN_B52PY_DB);
|
||
if (!is_null($error)) {
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||
// Creatable
|
||
} else {
|
||
mkalldir(dirname(_PINYIN_B52PY_DB));
|
||
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "c", "gdbm");
|
||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||
}
|
||
}
|
||
|
||
// Initialize the database
|
||
switch (_PINYIN_DBTYPE) {
|
||
// Initialize it as a DB file
|
||
case _PINYIN_DBTYPE_DBA:
|
||
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
|
||
foreach ($lines as $line) {
|
||
// Skip comments
|
||
if (substr($line, 0, 1) == "#") {
|
||
continue;
|
||
}
|
||
// Skip empty lines
|
||
if (!preg_match("/\S/", $line)) {
|
||
continue;
|
||
}
|
||
$chars = explode(" ", $line);
|
||
// First item is pinyin
|
||
$pinyin = array_shift($chars);
|
||
foreach ($chars as $char) {
|
||
if (!dba_exists($char, $_PINYIN_DB)) {
|
||
dba_insert($char, $pinyin, $_PINYIN_DB);
|
||
}
|
||
}
|
||
}
|
||
// Special meta characters
|
||
foreach (array_keys($_PINYIN_SUP) as $char) {
|
||
if (!dba_exists($char, $_PINYIN_DB)) {
|
||
dba_insert($char, $_PINYIN_SUP[$char], $_PINYIN_DB);
|
||
}
|
||
}
|
||
break;
|
||
|
||
// Initialize it as an array
|
||
case _PINYIN_DBTYPE_ARRAY:
|
||
$_PINYIN_DB = array();
|
||
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
|
||
foreach ($lines as $line) {
|
||
// Skip comments
|
||
if (substr($line, 0, 1) == "#") {
|
||
continue;
|
||
}
|
||
// Skip empty lines
|
||
if (!preg_match("/\S/", $line)) {
|
||
continue;
|
||
}
|
||
$chars = explode(" ", $line);
|
||
// First item is pinyin
|
||
$pinyin = array_shift($chars);
|
||
foreach ($chars as $char) {
|
||
$_PINYIN_DB[$char] = $pinyin;
|
||
}
|
||
}
|
||
// Special meta characters
|
||
foreach (array_keys($_PINYIN_SUP) as $char) {
|
||
$_PINYIN_DB[$char] = $_PINYIN_SUP[$char];
|
||
}
|
||
break;
|
||
}
|
||
|
||
return;
|
||
}
|
||
|
||
?>
|