Initial commit.

This commit is contained in:
2026-03-10 21:25:26 +08:00
commit 78739bf725
3089 changed files with 472990 additions and 0 deletions

View File

@@ -0,0 +1,225 @@
<?php
// File name: pinyin.inc.php
// Description: PHP subroutines to convert Traditional Chinese to Pinyin
// Date: 2004-04-14
// Author: imacat <imacat@pristine.com.tw>
// Copyright: Copyright (C) 2004-2007 Pristine Communications
// Set the include path
if (!defined("INCPATH_SET")) {
require_once dirname(__FILE__) . "/incpath.inc.php";
}
// Referenced subroutines
require_once "monica/chkwrite.inc.php";
require_once "monica/mkalldir.inc.php";
require_once "monica/xfileio.inc.php";
// Settings
if (!defined("DBMDIR")) {
define("DBMDIR", dirname(dirname(dirname(__FILE__))) . "/" . php_uname("m"));
}
define("_PINYIN_PINYIN2BIG5", dirname(__FILE__) . "/pinyin2big5");
define("_PINYIN_B52PY_DB", DBMDIR . "/b52py.db");
$_PINYIN_DB = null;
define("_PINYIN_DBTYPE_DBA", 1);
define("_PINYIN_DBTYPE_ARRAY", 2);
$_PINYIN_SUP = array(
" " => " ",
"" => "#",
"" => "#",
"" => "shi2 ke4",
"" => "qian1 ke4",
"" => "mao2 ke4",
"\" => "fen1 ke4",
"" => "bai3 ke4",
"" => "li2 ke4",
"" => "jia1 lun2",
"" => "qian1 wa3",
"" => "li2 mi3",
);
// b52py: Convert Traditional Chinese to Pinyin
function b52py($big5)
{
global $_PINYIN_DB;
// Initialize the database
_init_b52py();
// Split into pieces
$pieces = array();
$remains = $big5;
while (preg_match("/^([\\x01-\\x7F]+|(?:[\\x80-\\xFE].)+)(.*)$/s", $remains, $m)) {
$pieces[] = $m[1];
$remains = $m[2];
}
// Process each piece
switch (_PINYIN_DBTYPE) {
// Using a DB file
case _PINYIN_DBTYPE_DBA:
for ($i = 0; $i < count($pieces); $i++) {
// Big5 piece
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
$char = substr($pieces[$i], $j, 2);
if (dba_exists($char, $_PINYIN_DB)) {
$words[] = dba_fetch($char, $_PINYIN_DB);
} else {
$words[] = $char;
}
}
$pieces[$i] = implode(" ", $words);
}
}
break;
// Using an array
case _PINYIN_DBTYPE_ARRAY:
for ($i = 0; $i < count($pieces); $i++) {
// Big5 piece
if (preg_match("/^[\\x80-\\xFF]/", $pieces[$i])) {
for ($j = 0, $words = array(); $j < strlen($pieces[$i]); $j += 2) {
$char = substr($pieces[$i], $j, 2);
if (array_key_exists($char, $_PINYIN_DB)) {
$words[] = $_PINYIN_DB[$char];
} else {
$words[] = $char;
}
}
$pieces[$i] = implode(" ", $words);
}
}
break;
}
$pinyin = $pieces[0];
for ($i = 1; $i < count($pieces); $i++) {
// Insert a space
if ( !preg_match("/\s$/", $pieces[$i-1])
&& !preg_match("/^\s/", $pieces[$i])) {
$pinyin .= " ";
}
$pinyin .= $pieces[$i];
}
return $pinyin;
}
// _init_b52py: Initialize the Big5 to Pinyin database
function _init_b52py()
{
global $_PINYIN_DB, $_PINYIN_SUP;
// Already initialized
if (defined("_PINYIN_DBTYPE")) {
return;
}
// Decide the action to do
if (file_exists(_PINYIN_B52PY_DB)) {
// Not a file
if (!is_file(_PINYIN_B52PY_DB)) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// Not readable
} elseif (!is_readable(_PINYIN_B52PY_DB)) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// Not writable -- read only
} elseif (!is_writable(_PINYIN_B52PY_DB)) {
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "r", "gdbm");
// No data
if (dba_firstkey($_PINYIN_DB) === false) {
dba_close($_PINYIN_DB);
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// OK
} else {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
return;
}
// Writable
} else {
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "w", "gdbm");
// No data - initialize the data
if (dba_firstkey($_PINYIN_DB) === false) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
// OK
} else {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
return;
}
}
// File does not exist
} else {
// Not creatable
$error = check_writable(_PINYIN_B52PY_DB);
if (!is_null($error)) {
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_ARRAY);
// Creatable
} else {
mkalldir(dirname(_PINYIN_B52PY_DB));
$_PINYIN_DB = dba_open(_PINYIN_B52PY_DB, "c", "gdbm");
define("_PINYIN_DBTYPE", _PINYIN_DBTYPE_DBA);
}
}
// Initialize the database
switch (_PINYIN_DBTYPE) {
// Initialize it as a DB file
case _PINYIN_DBTYPE_DBA:
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
foreach ($lines as $line) {
// Skip comments
if (substr($line, 0, 1) == "#") {
continue;
}
// Skip empty lines
if (!preg_match("/\S/", $line)) {
continue;
}
$chars = explode(" ", $line);
// First item is pinyin
$pinyin = array_shift($chars);
foreach ($chars as $char) {
if (!dba_exists($char, $_PINYIN_DB)) {
dba_insert($char, $pinyin, $_PINYIN_DB);
}
}
}
// Special meta characters
foreach (array_keys($_PINYIN_SUP) as $char) {
if (!dba_exists($char, $_PINYIN_DB)) {
dba_insert($char, $_PINYIN_SUP[$char], $_PINYIN_DB);
}
}
break;
// Initialize it as an array
case _PINYIN_DBTYPE_ARRAY:
$_PINYIN_DB = array();
$lines = explode("\n", xfread(_PINYIN_PINYIN2BIG5));
foreach ($lines as $line) {
// Skip comments
if (substr($line, 0, 1) == "#") {
continue;
}
// Skip empty lines
if (!preg_match("/\S/", $line)) {
continue;
}
$chars = explode(" ", $line);
// First item is pinyin
$pinyin = array_shift($chars);
foreach ($chars as $char) {
$_PINYIN_DB[$char] = $pinyin;
}
}
// Special meta characters
foreach (array_keys($_PINYIN_SUP) as $char) {
$_PINYIN_DB[$char] = $_PINYIN_SUP[$char];
}
break;
}
return;
}
?>