// Copyright: Copyright (C) 2002-2007 Pristine Communications // This file is in UTF-8 萬國碼 // Set the include path if (!defined("INCPATH_SET")) { require_once dirname(__FILE__) . "/incpath.inc.php"; } // Referenced subroutines require_once "monica/htmlchar.inc.php"; require_once "monica/markabbr.inc.php"; require_once "monica/urlregex.inc.php"; // The URL regular-expression pattern define("_A2HTML_URLREGEX_COMBINED", "(?:" . URLREGEX_EMAIL . "|" . URLREGEX_FTP . "|" . URLREGEX_FILE . "|" . URLREGEX_HTTP . "|" . URLREGEX_HTTPS . "|" . URLREGEX_GOPHER . "|" . URLREGEX_MAILTO . "|" . URLREGEX_NEWS . "|" . URLREGEX_NNTP . "|" . URLREGEX_TELNET . "|" . URLREGEX_WAIS . "|" . URLREGEX_PROSPERO . ")"); // The substitution ID configuration define("_A2HTML_SUBST_ID_LEN", 5); define("_A2HTML_SUBST_ID_MIN", pow(10, _A2HTML_SUBST_ID_LEN - 1)); define("_A2HTML_SUBST_ID_MAX", pow(10, _A2HTML_SUBST_ID_LEN) - 1); // The punctuation information $_A2HTML_QUOTES = array( "\"" => "\"", "'" => "'", "`" => "'", "(" => ")", "[" => "]", "{" => "}", "<" => ">", "「" => "」", "『" => "』", "〈" => "〉", "《" => "》", "【" => "】", "〔" => "〕", "”" => "”", "“" => "”", "‘" => "’", "‵" => "′", ); $_A2HTML_DOTS = array( "...", ".", ",", ":", ";", "!", "?", "。", ",", ":", ";", "、", "…", "!", "?", ); // a2html: Convert a textarea input into HTML content function a2html($source) { // The punctuation information global $_A2HTML_QUOTES, $_A2HTML_DOTS; $result = $source; // Find and record all the URL's // Process line by line. PHP PCRE has serial bugs that crashes // randomly when matching RFC-822 e-mails. The reason is not clear. // We try to avoid it by reducing the text piece size as possible // while keeping the integrity of the text. This is not a solution, // though. $lines = explode("\n", $result); for ($i = 0; $i < count($lines); $i++) { $temp = $lines[$i]; $lines[$i] = ""; if (substr($temp, 0, 1) == " ") { $lines[$i] .= " "; $temp = substr($temp, 1); } $substs = array(); while ( preg_match("/^(.*?)(" . _A2HTML_URLREGEX_COMBINED . ")(.*)$/s", $temp, $m)) { // De-quote foreach (array_keys($_A2HTML_QUOTES) as $start) { $end = $_A2HTML_QUOTES[$start]; // In a quotation if ( substr($m[1], -strlen($start)) == $start && substr($m[2], -strlen($end)) == $end) { $m[2] = substr($m[2], 0, -strlen($end)); $m[3] = $end . $m[3]; break; } } // Remove trailing dots foreach ($_A2HTML_DOTS as $dot) { // End in this dot if (substr($m[2], -strlen($dot)) == $dot) { $m[2] = substr($m[2], 0, -strlen($dot)); $m[3] = $dot . $m[3]; break; } } // Still matched after the above removal if (preg_match("/^" . _A2HTML_URLREGEX_COMBINED . "$/", $m[2])) { $lines[$i] .= _a2html_plain_line($m[1]) . _a2html_url2link($m[2]); } else { $lines[$i] .= _a2html_plain_line($m[1] . $m[2]); } $temp = $m[3]; } // Add the remains $lines[$i] .= _a2html_plain_line($temp); } $result = implode("\n", $lines); // Add line breaks $result = preg_replace("/(\r?\n)/", "
\\1", $result); return $result; } // _a2html_plain_line: Convert plain line content to HTML function _a2html_plain_line($line) { // Escape the HTML characters and mark the abbreviation $line = h_abbr($line); // Tag non-breaking spaces $line = str_replace(" ", "  ", $line); return $line; } // _a2html_url2link: Convert an URL to a link function _a2html_url2link($url) { // There is a scheme if (preg_match("/^" . URLREGEX_SCHEME . ":/", $url)) { return "" . h($url) . ""; // Schemeless - assumed to be an e-mail address } else { return "" . h($url) . ""; } } ?>