Files
selima-perl/lib/php/monica/a2html.inc.php
2026-03-10 21:31:43 +08:00

143 lines
4.6 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
// File name: a2html.inc.php
// Description: PHP subroutine to convert a textarea input into HTML content
// Date: 2002-05-23
// Author: imacat <imacat@pristine.com.tw>
// Copyright: Copyright (C) 2002-2007 Pristine Communications
// This file is in UTF-8 萬國碼
// Set the include path
if (!defined("INCPATH_SET")) {
require_once dirname(__FILE__) . "/incpath.inc.php";
}
// Referenced subroutines
require_once "monica/htmlchar.inc.php";
require_once "monica/markabbr.inc.php";
require_once "monica/urlregex.inc.php";
// The URL regular-expression pattern
define("_A2HTML_URLREGEX_COMBINED", "(?:" . URLREGEX_EMAIL . "|" . URLREGEX_FTP
. "|" . URLREGEX_FILE . "|" . URLREGEX_HTTP . "|" . URLREGEX_HTTPS
. "|" . URLREGEX_GOPHER . "|" . URLREGEX_MAILTO . "|" . URLREGEX_NEWS
. "|" . URLREGEX_NNTP . "|" . URLREGEX_TELNET . "|" . URLREGEX_WAIS
. "|" . URLREGEX_PROSPERO . ")");
// The substitution ID configuration
define("_A2HTML_SUBST_ID_LEN", 5);
define("_A2HTML_SUBST_ID_MIN", pow(10, _A2HTML_SUBST_ID_LEN - 1));
define("_A2HTML_SUBST_ID_MAX", pow(10, _A2HTML_SUBST_ID_LEN) - 1);
// The punctuation information
$_A2HTML_QUOTES = array(
"\"" => "\"",
"'" => "'",
"`" => "'",
"(" => ")",
"[" => "]",
"{" => "}",
"<" => ">",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
"" => "",
);
$_A2HTML_DOTS = array(
"...", ".", ",", ":", ";", "!", "?",
"", "", "", "", "", "", "", "",
);
// a2html: Convert a textarea input into HTML content
function a2html($source)
{
// The punctuation information
global $_A2HTML_QUOTES, $_A2HTML_DOTS;
$result = $source;
// Find and record all the URL's
// Process line by line. PHP PCRE has serial bugs that crashes
// randomly when matching RFC-822 e-mails. The reason is not clear.
// We try to avoid it by reducing the text piece size as possible
// while keeping the integrity of the text. This is not a solution,
// though.
$lines = explode("\n", $result);
for ($i = 0; $i < count($lines); $i++) {
$temp = $lines[$i];
$lines[$i] = "";
if (substr($temp, 0, 1) == " ") {
$lines[$i] .= "&nbsp;";
$temp = substr($temp, 1);
}
$substs = array();
while ( preg_match("/^(.*?)(" . _A2HTML_URLREGEX_COMBINED . ")(.*)$/s",
$temp, $m)) {
// De-quote
foreach (array_keys($_A2HTML_QUOTES) as $start) {
$end = $_A2HTML_QUOTES[$start];
// In a quotation
if ( substr($m[1], -strlen($start)) == $start
&& substr($m[2], -strlen($end)) == $end) {
$m[2] = substr($m[2], 0, -strlen($end));
$m[3] = $end . $m[3];
break;
}
}
// Remove trailing dots
foreach ($_A2HTML_DOTS as $dot) {
// End in this dot
if (substr($m[2], -strlen($dot)) == $dot) {
$m[2] = substr($m[2], 0, -strlen($dot));
$m[3] = $dot . $m[3];
break;
}
}
// Still matched after the above removal
if (preg_match("/^" . _A2HTML_URLREGEX_COMBINED . "$/", $m[2])) {
$lines[$i] .= _a2html_plain_line($m[1]) . _a2html_url2link($m[2]);
} else {
$lines[$i] .= _a2html_plain_line($m[1] . $m[2]);
}
$temp = $m[3];
}
// Add the remains
$lines[$i] .= _a2html_plain_line($temp);
}
$result = implode("\n", $lines);
// Add line breaks
$result = preg_replace("/(\r?\n)/", "<br />\\1", $result);
return $result;
}
// _a2html_plain_line: Convert plain line content to HTML
function _a2html_plain_line($line)
{
// Escape the HTML characters and mark the abbreviation
$line = h_abbr($line);
// Tag non-breaking spaces
$line = str_replace(" ", " &nbsp;", $line);
return $line;
}
// _a2html_url2link: Convert an URL to a link
function _a2html_url2link($url)
{
// There is a scheme
if (preg_match("/^" . URLREGEX_SCHEME . ":/", $url)) {
return "<a href=\"" . h($url) . "\"><samp>" . h($url) . "</samp></a>";
// Schemeless - assumed to be an e-mail address
} else {
return "<a href=\"mailto:" . h($url) . "\"><samp>" . h($url) . "</samp></a>";
}
}
?>