Initial commit.
This commit is contained in:
331
lib/php/monica/page2rel.inc.php
Normal file
331
lib/php/monica/page2rel.inc.php
Normal file
@@ -0,0 +1,331 @@
|
||||
<?php
|
||||
// File name: page2rel.inc.php
|
||||
// Description: PHP subroutine to convert URLs in a page to relative
|
||||
// Date: 2003-09-09
|
||||
// Author: imacat <imacat@pristine.com.tw>
|
||||
// Copyright: Copyright (C) 2003-2007 Pristine Communications
|
||||
|
||||
// Set the include path
|
||||
if (!defined("INCPATH_SET")) {
|
||||
require_once dirname(__FILE__) . "/incpath.inc.php";
|
||||
}
|
||||
// Referenced subroutines
|
||||
require_once "monica/htmlchar.inc.php";
|
||||
require_once "monica/rel2abs.inc.php";
|
||||
|
||||
// Settings
|
||||
define("_PAGE2REL_2REL", true);
|
||||
define("_PAGE2REL_2ABS", false);
|
||||
|
||||
// page2rel: Convert URLs in a page to relative
|
||||
function page2rel($html, $base)
|
||||
{
|
||||
return _page2rel_cnvturl($html, $base, _PAGE2REL_2REL, null);
|
||||
}
|
||||
|
||||
// page2abs: Convert URLs in a page to absolute
|
||||
function page2abs($html, $base, $rdiff = REL2ABS_RDIFF_KEEP)
|
||||
{
|
||||
return _page2rel_cnvturl($html, $base, _PAGE2REL_2ABS, $rdiff);
|
||||
}
|
||||
|
||||
// _page2rel_cnvturl: Convert URLs in a page to relative/absolute
|
||||
function _page2rel_cnvturl($source, $base, $is_rel, $rdiff)
|
||||
{
|
||||
$result = "";
|
||||
while (preg_match("/^(.*?)(<!--.*?-->|<[a-z]+\d?(?:\s+[a-z\-]+=(?:\"[^\"]*\"|'[^']*'|[^\"'\s<>]+))*(?:\s+\/)?>|<\?xml-stylesheet(?:\s+[a-z\-]+=(?:\"[^\"]*\"|'[^']*'|[^\"'\s<>]+))*\s*\?>)(.*)$/is", $source, $m)) {
|
||||
$result .= $m[1];
|
||||
$ele = $m[2];
|
||||
$source = $m[3];
|
||||
// Skip if not a relevant element
|
||||
if ( !preg_match("/^<((a|applet|area|base|embed|form|frame|img|input|link|object|param|script|style)\s+)(.+?)>$/is", $ele, $m)
|
||||
&& !preg_match("/^<((\?xml-stylesheet)\s+)(.+?)\?>$/s", $ele, $m)) {
|
||||
$result .= $ele;
|
||||
continue;
|
||||
}
|
||||
$elepref = $m[1];
|
||||
$elename = $m[2];
|
||||
$elebody = $m[3];
|
||||
switch (strtolower($elename)) {
|
||||
case "a":
|
||||
case "area":
|
||||
case "base":
|
||||
case "link":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "href", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
break;
|
||||
|
||||
case "img":
|
||||
case "input":
|
||||
case "embed":
|
||||
case "frame":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "src", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
break;
|
||||
|
||||
case "form":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "action", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
break;
|
||||
|
||||
case "applet":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "codebase", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
break;
|
||||
|
||||
case "object":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "classid", $base, $is_rel, $rdiff);
|
||||
$elebody = _page2rel_cnvtatt($elebody, "codebase", $base, $is_rel, $rdiff);
|
||||
$elebody = _page2rel_cnvtatt($elebody, "data", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
break;
|
||||
|
||||
case "script":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "src", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
$type = strtolower(_page2rel_attval($elebody, "type"));
|
||||
// Compatibility with the old "language" attribute
|
||||
if (is_null($type)) {
|
||||
$scptlang = strtolower(_page2rel_attval($elebody, "language"));
|
||||
switch ($scptlang) {
|
||||
case "javascript":
|
||||
$type = "text/javascript";
|
||||
break;
|
||||
case "vbscript":
|
||||
case "vbs":
|
||||
$type = "text/vbscript";
|
||||
break;
|
||||
case "perlscript":
|
||||
$type = "text/perlscript";
|
||||
break;
|
||||
}
|
||||
}
|
||||
switch ($type) {
|
||||
case "text/javascript":
|
||||
default:
|
||||
$m = _page2rel_foreign_js($source);
|
||||
break;
|
||||
case "text/vbscript":
|
||||
case "text/vbs":
|
||||
$m = _page2rel_foreign_vbs($source);
|
||||
break;
|
||||
case "text/perlscript":
|
||||
$m = _page2rel_foreign_pls($source);
|
||||
break;
|
||||
}
|
||||
$result .= $m[0];
|
||||
$source = $m[1];
|
||||
break;
|
||||
|
||||
case "style":
|
||||
$result .= $ele;
|
||||
$type = strtolower(_page2rel_attval($elebody, "type"));
|
||||
switch ($type) {
|
||||
case "text/css":
|
||||
default:
|
||||
$m = _page2rel_foreign_css($source, $base, $is_rel, $rdiff);
|
||||
break;
|
||||
case "text/javascript":
|
||||
$m = _page2rel_foreign_jsss($source);
|
||||
break;
|
||||
}
|
||||
$result .= $m[0];
|
||||
$source = $m[1];
|
||||
break;
|
||||
|
||||
case "param":
|
||||
$name = strtolower(_page2rel_attval($elebody, "name"));
|
||||
if (in_array($name, array("src", "movie", "filename"))) {
|
||||
$elebody = _page2rel_cnvtatt($elebody, "value", $base, $is_rel, $rdiff);
|
||||
}
|
||||
$result .= "<" . $elepref . $elebody . ">";
|
||||
break;
|
||||
|
||||
case "?xml-stylesheet":
|
||||
$elebody = _page2rel_cnvtatt($elebody, "href", $base, $is_rel, $rdiff);
|
||||
$result .= "<" . $elepref . $elebody . "?>";
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Append the remains
|
||||
$result .= $source;
|
||||
return $result;
|
||||
}
|
||||
|
||||
// _page2rel_cnvtatt: Convert URLs in HTML abbitutes to relative/absolute
|
||||
function _page2rel_cnvtatt($source, $att, $base, $is_rel, $rdiff)
|
||||
{
|
||||
$result = "";
|
||||
while ($source != "" && preg_match("/^([^\"'\s]*(?:(?:\"[^\"]*\"|'[^']*')[^\"'\s]*)*)(\s*)(.*?)$/s", $source, $m)) {
|
||||
$piece = $m[1];
|
||||
$sep = $m[2];
|
||||
$source = $m[3];
|
||||
// Skip if not in the attribute="value" format
|
||||
if ( !(preg_match("/^($att)=(\")([^\"]*)\"$/i", $piece, $m)
|
||||
|| preg_match("/^($att)=(')([^']*)'$/i", $piece, $m)
|
||||
|| preg_match("/^($att)=()([^\"']+)$/i", $piece, $m))) {
|
||||
$result .= $piece . $sep;
|
||||
continue;
|
||||
}
|
||||
$thisatt = $m[1];
|
||||
$quote = $m[2];
|
||||
$url = $m[3];
|
||||
// $url is a variable to be replaces dynamically, as in error pages
|
||||
if ($url != "\$url") {
|
||||
$url = $is_rel? abs2rel($url, $base):
|
||||
rel2abs($url, $base, REL2ABS_SKIP_FRAGMENT, REL2ABS_NO_HOST, $rdiff);
|
||||
}
|
||||
$result .= $thisatt . "=" . $quote . $url . $quote. $sep;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
// _page2rel_attval: Obtain the value of a attribute
|
||||
function _page2rel_attval($elebody, $att)
|
||||
{
|
||||
$att = strtolower($att);
|
||||
$val = null;
|
||||
while ($elebody != "" && preg_match("/^([^\"'\s]*(?:(?:\"[^\"]*\"|'[^']*')[^\"'\s]*)*)(\s*)(.*?)$/s", $elebody, $m)) {
|
||||
$piece = $m[1];
|
||||
$elebody = $m[3];
|
||||
// Skip if not in the attribute="value" format
|
||||
if ( !(preg_match("/^($att)=(\")([^\"]*)\"$/i", $piece, $m)
|
||||
|| preg_match("/^($att)=(')([^']*)'$/i", $piece, $m)
|
||||
|| preg_match("/^($att)=()([^\"']+)$/i", $piece, $m))) {
|
||||
continue;
|
||||
}
|
||||
$thisatt = $m[1];
|
||||
$thisval = dh($m[3]);
|
||||
if (strtolower($thisatt) == $att) {
|
||||
$val = $thisval;
|
||||
}
|
||||
}
|
||||
return $val;
|
||||
}
|
||||
|
||||
// _page2rel_foreign_js: Skip the next javascript block
|
||||
function _page2rel_foreign_js($html)
|
||||
{
|
||||
$block = "";
|
||||
$ended = false;
|
||||
while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|\/\/[^\n]*|<\/script>)(.*)$/is", $html, $m)) {
|
||||
// The end of the block
|
||||
if (strtolower($m[2]) == "</script>") {
|
||||
$block .= $m[1];
|
||||
$html = $m[2] . $m[3];
|
||||
$ended = true;
|
||||
break;
|
||||
}
|
||||
// Add this block
|
||||
$block .= $m[1] . $m[2];
|
||||
$html = $m[3];
|
||||
}
|
||||
// Not ended at last
|
||||
if (!$ended) {
|
||||
$block .= $html;
|
||||
$html = "";
|
||||
}
|
||||
return array($block, $html);
|
||||
}
|
||||
|
||||
// _page2rel_foreign_vbs: Skip the next vbscript block
|
||||
function _page2rel_foreign_vbs($html)
|
||||
{
|
||||
$block = "";
|
||||
$ended = false;
|
||||
while (preg_match("/^(.*?)(\"(?:[^\"]|\"\")*\"|[:\n]\s*Rem\b[^\n]*|'[^\n]*|<\/script>)(.*)$/is", $html, $m)) {
|
||||
// The end of the block
|
||||
if (strtolower($m[2]) == "</script>") {
|
||||
$block .= $m[1];
|
||||
$html = $m[2] . $m[3];
|
||||
$ended = true;
|
||||
break;
|
||||
}
|
||||
// Add this block
|
||||
$block .= $m[1] . $m[2];
|
||||
$html = $m[3];
|
||||
}
|
||||
// Not ended at last
|
||||
if (!$ended) {
|
||||
$block .= $html;
|
||||
$html = "";
|
||||
}
|
||||
return array($block, $html);
|
||||
}
|
||||
|
||||
// _page2rel_foreign_css: Skip the next cascading stylesheet block
|
||||
function _page2rel_foreign_css($html, $base, $is_rel, $rdiff)
|
||||
{
|
||||
$block = "";
|
||||
$ended = false;
|
||||
while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|<\/style>)(.*)$/is", $html, $m)) {
|
||||
// The end of the block
|
||||
if (strtolower($m[2]) == "</style>") {
|
||||
$block .= $m[1];
|
||||
$html = $m[2] . $m[3];
|
||||
$ended = true;
|
||||
break;
|
||||
}
|
||||
// Add this block
|
||||
$block .= $m[1] . $m[2];
|
||||
$html = $m[3];
|
||||
}
|
||||
// Not ended at last
|
||||
if (!$ended) {
|
||||
$block .= $html;
|
||||
$html = "";
|
||||
}
|
||||
// Convert URLs in the CSS block
|
||||
$block = _page2rel_foreign_cssurls($block, $base, $is_rel, $rdiff);
|
||||
return array($block, $html);
|
||||
}
|
||||
|
||||
// _page2rel_foreign_cssurls: Convert URLs in a CSS block
|
||||
function _page2rel_foreign_cssurls($source, $base, $is_rel, $rdiff)
|
||||
{
|
||||
$result = "";
|
||||
while (preg_match("/^(.*?)(\burl\([^\"']*\)|\burl\(\"(?:[^\\\\\"]|\\\\.)*\"\)|\burl\('(?:[^\\\\']|\\\\.)*'\)|\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/)(.*)$/is", $source, $m)) {
|
||||
$result .= $m[1];
|
||||
$piece = $m[2];
|
||||
$source = $m[3];
|
||||
// An URL
|
||||
if ( preg_match("/^(url\()(\")((?:[^\\\\\"]|\\\\.)*)\"(\))$/i", $piece, $m)
|
||||
|| preg_match("/^(url\()(')((?:[^\\\\']|\\\\.)*)'(\))$/i", $piece, $m)
|
||||
|| preg_match("/^(url\()()([^\"']*)(\))$/i", $piece, $m)) {
|
||||
$m[3] = $is_rel? abs2rel($m[3], $base):
|
||||
rel2abs($m[3], $base, REL2ABS_SKIP_FRAGMENT, REL2ABS_NO_HOST, $rdiff);
|
||||
$piece = $m[1] . $m[2] . $m[3] . $m[2] . $m[4];
|
||||
}
|
||||
$result .= $piece;
|
||||
}
|
||||
// Append the remains
|
||||
$result .= $source;
|
||||
return $result;
|
||||
}
|
||||
|
||||
// _page2rel_foreign_jsss: Skip the next javascript stylesheet block
|
||||
function _page2rel_foreign_jsss($html)
|
||||
{
|
||||
$block = "";
|
||||
$ended = false;
|
||||
while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|\/\/[^\n]*|<\/script>)(.*)$/is", $html, $m)) {
|
||||
// The end of the block
|
||||
if (strtolower($m[2]) == "</style>") {
|
||||
$block .= $m[1];
|
||||
$html = $m[2] . $m[3];
|
||||
$ended = true;
|
||||
break;
|
||||
}
|
||||
// Add this block
|
||||
$block .= $m[1] . $m[2];
|
||||
$html = $m[3];
|
||||
}
|
||||
// Not ended at last
|
||||
if (!$ended) {
|
||||
$block .= $html;
|
||||
$html = "";
|
||||
}
|
||||
return array($block, $html);
|
||||
}
|
||||
|
||||
?>
|
||||
Reference in New Issue
Block a user