Files
selima-perl/lib/php/monica/pinyin.inc.php
2026-03-10 21:31:43 +08:00

226 lines
6.8 KiB
PHP
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
// File name: pinyin.inc.php
// Description: PHP subroutines to convert Traditional Chinese to Pinyin
// Date: 2004-04-14
// Author: imacat <imacat@pristine.com.tw>
// Copyright: Copyright (C) 2004-2007 Pristine Communications
// Set the include path
if (!defined("INCPATH_SET")) {
require_once dirname(__FILE__) . "/incpath.inc.php";
}
// Referenced subroutines
require_once "monica/chkwrite.inc.php";
require_once "monica/mkalldir.inc.php";
require_once "monica/xfileio.inc.php";
// Settings
if (!defined("DBMDIR")) {
define("DBMDIR", dirname(dirname(dirname(__FILE__))) . "/" . php_uname("m"));
}
define("_PINYIN_PINYIN2BIG5", dirname(__FILE__) . "/pinyin2big5");
define("_PINYIN_B52PY_DB", DBMDIR . "/b52py.db");
$_PINYIN_DB = null;
define("_PINYIN_DBTYPE_DBA", 1);
define("_PINYIN_DBTYPE_ARRAY", 2);
$_PINYIN_SUP = array(
" " => " ",
"" => "#",
"" => "#",
"" => "shi2 ke4",
"" => "qian1 ke4",
"" => "mao2 ke4",
"\" => "fen1 ke4",
"" => "bai3 ke4",
"" => "li2 ke4",
"" => "jia1 lun2",
"" => "qian1 wa3",
"" => "li2 mi3",
);
// b52py: Convert Traditional Chinese to Pinyin
function b52py($big5)
{
global $_PINYIN_DB;
// Initialize the database
_init_b52py();
// Split into pieces
$pieces = array();
$remains = $big5;
while (preg_match("/^([\\x01-\\x7F]+|(?:[\\x80-\\xFE].)+)(.*)$/s", $remains, $m)) {
$pieces[] = $m[1];
$remains = $m[2];
}
// Process each piece
switch (_PINYIN_DBTYPE) {
// Using a DB file
case _PINYIN_DBTYPE_DBA:
for ($i = 0; $i < count($pieces); $i++) {
// Big5 piece
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
$char = substr($pieces[$i], $j, 2);
if (dba_exists($char, $_PINYIN_DB)) {
$words[] = dba_fetch($char, $_PINYIN_DB);
} else {
$words[] = $char;
}
}
$pieces[$i] = implode(" ", $words);
}
}
break;
// Using an array
case _PINYIN_DBTYPE_ARRAY:
for ($i = 0; $i < count($pieces); $i++) {
// Big5 piece
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
$char = substr($pieces[$i], $j, 2);
if (array_key_exists($char, $_PINYIN_DB)) {
$words[] = $_PINYIN_DB[$char];
} else {
$words[] = $char;
}
}
$pieces[$i] = implode(" ", $words);
}
}
break;
}
$pinyin = $pieces[0];
for ($i = 1; $i < count($pieces); $i++) {
// Insert a space
if ( !preg_match("/\s$/", $pieces[$i-1])
&& !preg_match("/^\s/", $pieces[$i])) {
$pinyin .= " ";
}
$pinyin .= $pieces[$i];
}
return $pinyin;
}
// _init_b52py: Initialize the Big5 to Pinyin database
function _init_b52py()
{
global $_PINYIN_DB, $_PINYIN_SUP;
// Already initialized
if (defined("_PINYIN_DBTYPE")) {
return;
}
// Decide the action to do
if (file_exists(_PINYIN_B52PY_DB)) {
// Not a file
if (!is_file(_PINYIN_B52PY_DB)) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// Not readable
} elseif (!is_readable(_PINYIN_B52PY_DB)) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// Not writable -- read only
} elseif (!is_writable(_PINYIN_B52PY_DB)) {
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "r", "gdbm");
// No data
if (dba_firstkey($_PINYIN_DB) === false) {
dba_close($_PINYIN_DB);
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// OK
} else {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
return;
}
// Writable
} else {
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "w", "gdbm");
// No data - initialize the data
if (dba_firstkey($_PINYIN_DB) === false) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
// OK
} else {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
return;
}
}
// File does not exist
} else {
// Not creatable
$error = check_writable(_PINYIN_B52PY_DB);
if (!is_null($error)) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// Creatable
} else {
mkalldir(dirname(_PINYIN_B52PY_DB));
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "c", "gdbm");
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
}
}
// Initialize the database
switch (_PINYIN_DBTYPE) {
// Initialize it as a DB file
case _PINYIN_DBTYPE_DBA:
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
foreach ($lines as $line) {
// Skip comments
if (substr($line, 0, 1) == "#") {
continue;
}
// Skip empty lines
if (!preg_match("/\S/", $line)) {
continue;
}
$chars = explode(" ", $line);
// First item is pinyin
$pinyin = array_shift($chars);
foreach ($chars as $char) {
if (!dba_exists($char, $_PINYIN_DB)) {
dba_insert($char, $pinyin, $_PINYIN_DB);
}
}
}
// Special meta characters
foreach (array_keys($_PINYIN_SUP) as $char) {
if (!dba_exists($char, $_PINYIN_DB)) {
dba_insert($char, $_PINYIN_SUP[$char], $_PINYIN_DB);
}
}
break;
// Initialize it as an array
case _PINYIN_DBTYPE_ARRAY:
$_PINYIN_DB = array();
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
foreach ($lines as $line) {
// Skip comments
if (substr($line, 0, 1) == "#") {
continue;
}
// Skip empty lines
if (!preg_match("/\S/", $line)) {
continue;
}
$chars = explode(" ", $line);
// First item is pinyin
$pinyin = array_shift($chars);
foreach ($chars as $char) {
$_PINYIN_DB[$char] = $pinyin;
}
}
// Special meta characters
foreach (array_keys($_PINYIN_SUP) as $char) {
$_PINYIN_DB[$char] = $_PINYIN_SUP[$char];
}
break;
}
return;
}
?>