Files
selima-perl/lib/php/monica/gb2312.inc.php
2026-03-10 21:31:43 +08:00

135 lines
3.8 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
// File name: gb2312.inc.php
// Description: PHP subroutine to deal with GB2312 characters
// Date: 2004-04-27
// Author: imacat <imacat@pristine.com.tw>
// Copyright: Copyright (C) 2004-2007 Pristine Communications
// 編碼規則:
// 區號值: 1-94
// 位號值: 1-94
// 字碼: 最高位元1 + (區號值+0x20(32)), 最高位元1 + (位號值+0x20(32))
// 符號: 1-9區
// 一級字: 16-55區常用漢字
// 二級字: 56-87區次常用漢字
// 空白區: 10-16區, 87-94區
// Set the include path
if (!defined("INCPATH_SET")) {
require_once dirname(__FILE__) . "/incpath.inc.php";
}
// Referenced subroutines
require_once "monica/xfileio.inc.php";
// Settings
define("_GB2312_VAR", "/tmp/gb2312.var");
$_GB2312 = null;
// init_gb2312: Initialize the GB2312 characters array
function init_gb2312()
{
global $_GB2312;
// Already initialized
if (!is_null($_GB2312)) {
return;
}
// Initialize from the existing table
if (file_exists(_GB2312_VAR)) {
eval(xfread(_GB2312_VAR));
return;
}
// Initialize the characters
$_GB2312 = array(
// 符號
"睫瘍" => _gb2312_gen_chars(1, 9),
// 一級字
"珨撰趼" => _gb2312_gen_chars(16, 55),
// 二級字
"媼撰趼" => _gb2312_gen_chars(56, 87),
);
// Save the result
$string = "\$_GB2312 = " . var_export($_GB2312, true) . ";\n";
xfupdate(_GB2312_VAR, $string);
return;
}
// _gb2312_gen_chars: Generate a range of GB2312 characters
function _gb2312_gen_chars($start, $end)
{
$chars = array();
for ($sec = $start, $pos = 1; $sec <= $end; ) {
$code = ((($sec+0x20)|0x80) << 8) | (($pos+0x20)|0x80);
$chars[] = pack("n", $code);
// Next character
$pos++;
// Carry 進位
if ($pos > 94) {
$pos = 1;
$sec++;
}
// 不用的符號
if ($sec == 2 && $pos == 1) { // 02,01-02,16
$pos = 17;
} elseif ($sec == 2 && $pos == 67) { // 02,67-02,68
$pos = 69;
} elseif ($sec == 2 && $pos == 79) { // 02,79-02,80
$pos = 81;
} elseif ($sec == 2 && $pos == 93) { // 02,93-02,94
$pos = 1;
$sec++;
} elseif ($sec == 4 && $pos == 84) { // 04,84-04,94
$pos = 1;
$sec++;
} elseif ($sec == 5 && $pos == 87) { // 05,87-05,94
$pos = 1;
$sec++;
} elseif ($sec == 6 && $pos == 25) { // 06,25-06,32
$pos = 33;
} elseif ($sec == 6 && $pos == 57) { // 06,57-06,94
$pos = 1;
$sec++;
} elseif ($sec == 7 && $pos == 34) { // 07,34-07,48
$pos = 49;
} elseif ($sec == 7 && $pos == 82) { // 07,82-07,94
$pos = 1;
$sec++;
} elseif ($sec == 8 && $pos == 27) { // 08,27-08,36
$pos = 37;
} elseif ($sec == 8 && $pos == 74) { // 08,74-09,03
$pos = 4;
$sec++;
} elseif ($sec == 9 && $pos == 80) { // 09,80-09,94
break;
}
// 不用的一級字
if ($sec == 55 && $pos == 90) {
break;
}
}
return $chars;
}
// gb2312_punc_chars: Return the GB2312 punctuation characters
function gb2312_punc_chars()
{
init_gb2312();
return $GLOBALS["_GB2312"]["符號"];
}
// gb2312_feq_chars: Return the GB2312 frequently-used characters
function gb2312_feq_chars()
{
init_gb2312();
return $GLOBALS["_GB2312"]["一級字"];
}
// gb2312_nonfeq_chars: Return the GB2312 non-frequently-used characters
function gb2312_nonfeq_chars()
{
init_gb2312();
return $GLOBALS["_GB2312"]["二級字"];
}
?>