Initial commit.
This commit is contained in:
225
lib/php/monica/pinyin.inc.php
Normal file
225
lib/php/monica/pinyin.inc.php
Normal file
@@ -0,0 +1,225 @@
|
||||
<?php
|
||||
// File name: pinyin.inc.php
|
||||
// Description: PHP subroutines to convert Traditional Chinese to Pinyin
|
||||
// Date: 2004-04-14
|
||||
// Author: imacat <imacat@pristine.com.tw>
|
||||
// Copyright: Copyright (C) 2004-2007 Pristine Communications
|
||||
|
||||
// Set the include path
|
||||
if (!defined("INCPATH_SET")) {
|
||||
require_once dirname(__FILE__) . "/incpath.inc.php";
|
||||
}
|
||||
// Referenced subroutines
|
||||
require_once "monica/chkwrite.inc.php";
|
||||
require_once "monica/mkalldir.inc.php";
|
||||
require_once "monica/xfileio.inc.php";
|
||||
|
||||
// Settings
|
||||
if (!defined("DBMDIR")) {
|
||||
define("DBMDIR", dirname(dirname(dirname(__FILE__))) . "/" . php_uname("m"));
|
||||
}
|
||||
define("_PINYIN_PINYIN2BIG5", dirname(__FILE__) . "/pinyin2big5");
|
||||
define("_PINYIN_B52PY_DB", DBMDIR . "/b52py.db");
|
||||
|
||||
$_PINYIN_DB = null;
|
||||
define("_PINYIN_DBTYPE_DBA", 1);
|
||||
define("_PINYIN_DBTYPE_ARRAY", 2);
|
||||
$_PINYIN_SUP = array(
|
||||
" " => " ",
|
||||
"#" => "#",
|
||||
"﹟" => "#",
|
||||
"兙" => "shi2 ke4",
|
||||
"兛" => "qian1 ke4",
|
||||
"兞" => "mao2 ke4",
|
||||
"兝\" => "fen1 ke4",
|
||||
"兡" => "bai3 ke4",
|
||||
"兣" => "li2 ke4",
|
||||
"嗧" => "jia1 lun2",
|
||||
"瓩" => "qian1 wa3",
|
||||
"糎" => "li2 mi3",
|
||||
);
|
||||
|
||||
// b52py: Convert Traditional Chinese to Pinyin
|
||||
function b52py($big5)
|
||||
{
|
||||
global $_PINYIN_DB;
|
||||
// Initialize the database
|
||||
_init_b52py();
|
||||
|
||||
// Split into pieces
|
||||
$pieces = array();
|
||||
$remains = $big5;
|
||||
while (preg_match("/^([\\x01-\\x7F]+|(?:[\\x80-\\xFE].)+)(.*)$/s", $remains, $m)) {
|
||||
$pieces[] = $m[1];
|
||||
$remains = $m[2];
|
||||
}
|
||||
|
||||
// Process each piece
|
||||
switch (_PINYIN_DBTYPE) {
|
||||
// Using a DB file
|
||||
case _PINYIN_DBTYPE_DBA:
|
||||
for ($i = 0; $i < count($pieces); $i++) {
|
||||
// Big5 piece
|
||||
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
|
||||
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
|
||||
$char = substr($pieces[$i], $j, 2);
|
||||
if (dba_exists($char, $_PINYIN_DB)) {
|
||||
$words[] = dba_fetch($char, $_PINYIN_DB);
|
||||
} else {
|
||||
$words[] = $char;
|
||||
}
|
||||
}
|
||||
$pieces[$i] = implode(" ", $words);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
// Using an array
|
||||
case _PINYIN_DBTYPE_ARRAY:
|
||||
for ($i = 0; $i < count($pieces); $i++) {
|
||||
// Big5 piece
|
||||
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
|
||||
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
|
||||
$char = substr($pieces[$i], $j, 2);
|
||||
if (array_key_exists($char, $_PINYIN_DB)) {
|
||||
$words[] = $_PINYIN_DB[$char];
|
||||
} else {
|
||||
$words[] = $char;
|
||||
}
|
||||
}
|
||||
$pieces[$i] = implode(" ", $words);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$pinyin = $pieces[0];
|
||||
for ($i = 1; $i < count($pieces); $i++) {
|
||||
// Insert a space
|
||||
if ( !preg_match("/\s$/", $pieces[$i-1])
|
||||
&& !preg_match("/^\s/", $pieces[$i])) {
|
||||
$pinyin .= " ";
|
||||
}
|
||||
$pinyin .= $pieces[$i];
|
||||
}
|
||||
|
||||
return $pinyin;
|
||||
}
|
||||
|
||||
// _init_b52py: Initialize the Big5 to Pinyin database
|
||||
function _init_b52py()
|
||||
{
|
||||
global $_PINYIN_DB, $_PINYIN_SUP;
|
||||
// Already initialized
|
||||
if (defined("_PINYIN_DBTYPE")) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Decide the action to do
|
||||
if (file_exists(_PINYIN_B52PY_DB)) {
|
||||
// Not a file
|
||||
if (!is_file(_PINYIN_B52PY_DB)) {
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||||
// Not readable
|
||||
} elseif (!is_readable(_PINYIN_B52PY_DB)) {
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||||
// Not writable -- read only
|
||||
} elseif (!is_writable(_PINYIN_B52PY_DB)) {
|
||||
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "r", "gdbm");
|
||||
// No data
|
||||
if (dba_firstkey($_PINYIN_DB) === false) {
|
||||
dba_close($_PINYIN_DB);
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||||
// OK
|
||||
} else {
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||||
return;
|
||||
}
|
||||
// Writable
|
||||
} else {
|
||||
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "w", "gdbm");
|
||||
// No data - initialize the data
|
||||
if (dba_firstkey($_PINYIN_DB) === false) {
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||||
// OK
|
||||
} else {
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// File does not exist
|
||||
} else {
|
||||
// Not creatable
|
||||
$error = check_writable(_PINYIN_B52PY_DB);
|
||||
if (!is_null($error)) {
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
|
||||
// Creatable
|
||||
} else {
|
||||
mkalldir(dirname(_PINYIN_B52PY_DB));
|
||||
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "c", "gdbm");
|
||||
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the database
|
||||
switch (_PINYIN_DBTYPE) {
|
||||
// Initialize it as a DB file
|
||||
case _PINYIN_DBTYPE_DBA:
|
||||
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
|
||||
foreach ($lines as $line) {
|
||||
// Skip comments
|
||||
if (substr($line, 0, 1) == "#") {
|
||||
continue;
|
||||
}
|
||||
// Skip empty lines
|
||||
if (!preg_match("/\S/", $line)) {
|
||||
continue;
|
||||
}
|
||||
$chars = explode(" ", $line);
|
||||
// First item is pinyin
|
||||
$pinyin = array_shift($chars);
|
||||
foreach ($chars as $char) {
|
||||
if (!dba_exists($char, $_PINYIN_DB)) {
|
||||
dba_insert($char, $pinyin, $_PINYIN_DB);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Special meta characters
|
||||
foreach (array_keys($_PINYIN_SUP) as $char) {
|
||||
if (!dba_exists($char, $_PINYIN_DB)) {
|
||||
dba_insert($char, $_PINYIN_SUP[$char], $_PINYIN_DB);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
// Initialize it as an array
|
||||
case _PINYIN_DBTYPE_ARRAY:
|
||||
$_PINYIN_DB = array();
|
||||
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
|
||||
foreach ($lines as $line) {
|
||||
// Skip comments
|
||||
if (substr($line, 0, 1) == "#") {
|
||||
continue;
|
||||
}
|
||||
// Skip empty lines
|
||||
if (!preg_match("/\S/", $line)) {
|
||||
continue;
|
||||
}
|
||||
$chars = explode(" ", $line);
|
||||
// First item is pinyin
|
||||
$pinyin = array_shift($chars);
|
||||
foreach ($chars as $char) {
|
||||
$_PINYIN_DB[$char] = $pinyin;
|
||||
}
|
||||
}
|
||||
// Special meta characters
|
||||
foreach (array_keys($_PINYIN_SUP) as $char) {
|
||||
$_PINYIN_DB[$char] = $_PINYIN_SUP[$char];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
?>
|
||||
Reference in New Issue
Block a user