332 lines
11 KiB
PHP
332 lines
11 KiB
PHP
<?php
|
|
// File name: page2rel.inc.php
|
|
// Description: PHP subroutine to convert URLs in a page to relative
|
|
// Date: 2003-09-09
|
|
// Author: imacat <imacat@pristine.com.tw>
|
|
// Copyright: Copyright (C) 2003-2007 Pristine Communications
|
|
|
|
// Set the include path
|
|
if (!defined("INCPATH_SET")) {
|
|
require_once dirname(__FILE__) . "/incpath.inc.php";
|
|
}
|
|
// Referenced subroutines
|
|
require_once "monica/htmlchar.inc.php";
|
|
require_once "monica/rel2abs.inc.php";
|
|
|
|
// Settings
|
|
define("_PAGE2REL_2REL", true);
|
|
define("_PAGE2REL_2ABS", false);
|
|
|
|
// page2rel: Convert URLs in a page to relative
|
|
function page2rel($html, $base)
|
|
{
|
|
return _page2rel_cnvturl($html, $base, _PAGE2REL_2REL, null);
|
|
}
|
|
|
|
// page2abs: Convert URLs in a page to absolute
|
|
function page2abs($html, $base, $rdiff = REL2ABS_RDIFF_KEEP)
|
|
{
|
|
return _page2rel_cnvturl($html, $base, _PAGE2REL_2ABS, $rdiff);
|
|
}
|
|
|
|
// _page2rel_cnvturl: Convert URLs in a page to relative/absolute
|
|
function _page2rel_cnvturl($source, $base, $is_rel, $rdiff)
|
|
{
|
|
$result = "";
|
|
while (preg_match("/^(.*?)(<!--.*?-->|<[a-z]+\d?(?:\s+[a-z\-]+=(?:\"[^\"]*\"|'[^']*'|[^\"'\s<>]+))*(?:\s+\/)?>|<\?xml-stylesheet(?:\s+[a-z\-]+=(?:\"[^\"]*\"|'[^']*'|[^\"'\s<>]+))*\s*\?>)(.*)$/is", $source, $m)) {
|
|
$result .= $m[1];
|
|
$ele = $m[2];
|
|
$source = $m[3];
|
|
// Skip if not a relevant element
|
|
if ( !preg_match("/^<((a|applet|area|base|embed|form|frame|img|input|link|object|param|script|style)\s+)(.+?)>$/is", $ele, $m)
|
|
&& !preg_match("/^<((\?xml-stylesheet)\s+)(.+?)\?>$/s", $ele, $m)) {
|
|
$result .= $ele;
|
|
continue;
|
|
}
|
|
$elepref = $m[1];
|
|
$elename = $m[2];
|
|
$elebody = $m[3];
|
|
switch (strtolower($elename)) {
|
|
case "a":
|
|
case "area":
|
|
case "base":
|
|
case "link":
|
|
$elebody = _page2rel_cnvtatt($elebody, "href", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
break;
|
|
|
|
case "img":
|
|
case "input":
|
|
case "embed":
|
|
case "frame":
|
|
$elebody = _page2rel_cnvtatt($elebody, "src", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
break;
|
|
|
|
case "form":
|
|
$elebody = _page2rel_cnvtatt($elebody, "action", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
break;
|
|
|
|
case "applet":
|
|
$elebody = _page2rel_cnvtatt($elebody, "codebase", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
break;
|
|
|
|
case "object":
|
|
$elebody = _page2rel_cnvtatt($elebody, "classid", $base, $is_rel, $rdiff);
|
|
$elebody = _page2rel_cnvtatt($elebody, "codebase", $base, $is_rel, $rdiff);
|
|
$elebody = _page2rel_cnvtatt($elebody, "data", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
break;
|
|
|
|
case "script":
|
|
$elebody = _page2rel_cnvtatt($elebody, "src", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
$type = strtolower(_page2rel_attval($elebody, "type"));
|
|
// Compatibility with the old "language" attribute
|
|
if (is_null($type)) {
|
|
$scptlang = strtolower(_page2rel_attval($elebody, "language"));
|
|
switch ($scptlang) {
|
|
case "javascript":
|
|
$type = "text/javascript";
|
|
break;
|
|
case "vbscript":
|
|
case "vbs":
|
|
$type = "text/vbscript";
|
|
break;
|
|
case "perlscript":
|
|
$type = "text/perlscript";
|
|
break;
|
|
}
|
|
}
|
|
switch ($type) {
|
|
case "text/javascript":
|
|
default:
|
|
$m = _page2rel_foreign_js($source);
|
|
break;
|
|
case "text/vbscript":
|
|
case "text/vbs":
|
|
$m = _page2rel_foreign_vbs($source);
|
|
break;
|
|
case "text/perlscript":
|
|
$m = _page2rel_foreign_pls($source);
|
|
break;
|
|
}
|
|
$result .= $m[0];
|
|
$source = $m[1];
|
|
break;
|
|
|
|
case "style":
|
|
$result .= $ele;
|
|
$type = strtolower(_page2rel_attval($elebody, "type"));
|
|
switch ($type) {
|
|
case "text/css":
|
|
default:
|
|
$m = _page2rel_foreign_css($source, $base, $is_rel, $rdiff);
|
|
break;
|
|
case "text/javascript":
|
|
$m = _page2rel_foreign_jsss($source);
|
|
break;
|
|
}
|
|
$result .= $m[0];
|
|
$source = $m[1];
|
|
break;
|
|
|
|
case "param":
|
|
$name = strtolower(_page2rel_attval($elebody, "name"));
|
|
if (in_array($name, array("src", "movie", "filename"))) {
|
|
$elebody = _page2rel_cnvtatt($elebody, "value", $base, $is_rel, $rdiff);
|
|
}
|
|
$result .= "<" . $elepref . $elebody . ">";
|
|
break;
|
|
|
|
case "?xml-stylesheet":
|
|
$elebody = _page2rel_cnvtatt($elebody, "href", $base, $is_rel, $rdiff);
|
|
$result .= "<" . $elepref . $elebody . "?>";
|
|
break;
|
|
}
|
|
}
|
|
// Append the remains
|
|
$result .= $source;
|
|
return $result;
|
|
}
|
|
|
|
// _page2rel_cnvtatt: Convert URLs in HTML abbitutes to relative/absolute
|
|
function _page2rel_cnvtatt($source, $att, $base, $is_rel, $rdiff)
|
|
{
|
|
$result = "";
|
|
while ($source != "" && preg_match("/^([^\"'\s]*(?:(?:\"[^\"]*\"|'[^']*')[^\"'\s]*)*)(\s*)(.*?)$/s", $source, $m)) {
|
|
$piece = $m[1];
|
|
$sep = $m[2];
|
|
$source = $m[3];
|
|
// Skip if not in the attribute="value" format
|
|
if ( !(preg_match("/^($att)=(\")([^\"]*)\"$/i", $piece, $m)
|
|
|| preg_match("/^($att)=(')([^']*)'$/i", $piece, $m)
|
|
|| preg_match("/^($att)=()([^\"']+)$/i", $piece, $m))) {
|
|
$result .= $piece . $sep;
|
|
continue;
|
|
}
|
|
$thisatt = $m[1];
|
|
$quote = $m[2];
|
|
$url = $m[3];
|
|
// $url is a variable to be replaces dynamically, as in error pages
|
|
if ($url != "\$url") {
|
|
$url = $is_rel? abs2rel($url, $base):
|
|
rel2abs($url, $base, REL2ABS_SKIP_FRAGMENT, REL2ABS_NO_HOST, $rdiff);
|
|
}
|
|
$result .= $thisatt . "=" . $quote . $url . $quote. $sep;
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
// _page2rel_attval: Obtain the value of a attribute
|
|
function _page2rel_attval($elebody, $att)
|
|
{
|
|
$att = strtolower($att);
|
|
$val = null;
|
|
while ($elebody != "" && preg_match("/^([^\"'\s]*(?:(?:\"[^\"]*\"|'[^']*')[^\"'\s]*)*)(\s*)(.*?)$/s", $elebody, $m)) {
|
|
$piece = $m[1];
|
|
$elebody = $m[3];
|
|
// Skip if not in the attribute="value" format
|
|
if ( !(preg_match("/^($att)=(\")([^\"]*)\"$/i", $piece, $m)
|
|
|| preg_match("/^($att)=(')([^']*)'$/i", $piece, $m)
|
|
|| preg_match("/^($att)=()([^\"']+)$/i", $piece, $m))) {
|
|
continue;
|
|
}
|
|
$thisatt = $m[1];
|
|
$thisval = dh($m[3]);
|
|
if (strtolower($thisatt) == $att) {
|
|
$val = $thisval;
|
|
}
|
|
}
|
|
return $val;
|
|
}
|
|
|
|
// _page2rel_foreign_js: Skip the next javascript block
|
|
function _page2rel_foreign_js($html)
|
|
{
|
|
$block = "";
|
|
$ended = false;
|
|
while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|\/\/[^\n]*|<\/script>)(.*)$/is", $html, $m)) {
|
|
// The end of the block
|
|
if (strtolower($m[2]) == "</script>") {
|
|
$block .= $m[1];
|
|
$html = $m[2] . $m[3];
|
|
$ended = true;
|
|
break;
|
|
}
|
|
// Add this block
|
|
$block .= $m[1] . $m[2];
|
|
$html = $m[3];
|
|
}
|
|
// Not ended at last
|
|
if (!$ended) {
|
|
$block .= $html;
|
|
$html = "";
|
|
}
|
|
return array($block, $html);
|
|
}
|
|
|
|
// _page2rel_foreign_vbs: Skip the next vbscript block
|
|
function _page2rel_foreign_vbs($html)
|
|
{
|
|
$block = "";
|
|
$ended = false;
|
|
while (preg_match("/^(.*?)(\"(?:[^\"]|\"\")*\"|[:\n]\s*Rem\b[^\n]*|'[^\n]*|<\/script>)(.*)$/is", $html, $m)) {
|
|
// The end of the block
|
|
if (strtolower($m[2]) == "</script>") {
|
|
$block .= $m[1];
|
|
$html = $m[2] . $m[3];
|
|
$ended = true;
|
|
break;
|
|
}
|
|
// Add this block
|
|
$block .= $m[1] . $m[2];
|
|
$html = $m[3];
|
|
}
|
|
// Not ended at last
|
|
if (!$ended) {
|
|
$block .= $html;
|
|
$html = "";
|
|
}
|
|
return array($block, $html);
|
|
}
|
|
|
|
// _page2rel_foreign_css: Skip the next cascading stylesheet block
|
|
function _page2rel_foreign_css($html, $base, $is_rel, $rdiff)
|
|
{
|
|
$block = "";
|
|
$ended = false;
|
|
while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|<\/style>)(.*)$/is", $html, $m)) {
|
|
// The end of the block
|
|
if (strtolower($m[2]) == "</style>") {
|
|
$block .= $m[1];
|
|
$html = $m[2] . $m[3];
|
|
$ended = true;
|
|
break;
|
|
}
|
|
// Add this block
|
|
$block .= $m[1] . $m[2];
|
|
$html = $m[3];
|
|
}
|
|
// Not ended at last
|
|
if (!$ended) {
|
|
$block .= $html;
|
|
$html = "";
|
|
}
|
|
// Convert URLs in the CSS block
|
|
$block = _page2rel_foreign_cssurls($block, $base, $is_rel, $rdiff);
|
|
return array($block, $html);
|
|
}
|
|
|
|
// _page2rel_foreign_cssurls: Convert URLs in a CSS block
|
|
function _page2rel_foreign_cssurls($source, $base, $is_rel, $rdiff)
|
|
{
|
|
$result = "";
|
|
while (preg_match("/^(.*?)(\burl\([^\"']*\)|\burl\(\"(?:[^\\\\\"]|\\\\.)*\"\)|\burl\('(?:[^\\\\']|\\\\.)*'\)|\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/)(.*)$/is", $source, $m)) {
|
|
$result .= $m[1];
|
|
$piece = $m[2];
|
|
$source = $m[3];
|
|
// An URL
|
|
if ( preg_match("/^(url\()(\")((?:[^\\\\\"]|\\\\.)*)\"(\))$/i", $piece, $m)
|
|
|| preg_match("/^(url\()(')((?:[^\\\\']|\\\\.)*)'(\))$/i", $piece, $m)
|
|
|| preg_match("/^(url\()()([^\"']*)(\))$/i", $piece, $m)) {
|
|
$m[3] = $is_rel? abs2rel($m[3], $base):
|
|
rel2abs($m[3], $base, REL2ABS_SKIP_FRAGMENT, REL2ABS_NO_HOST, $rdiff);
|
|
$piece = $m[1] . $m[2] . $m[3] . $m[2] . $m[4];
|
|
}
|
|
$result .= $piece;
|
|
}
|
|
// Append the remains
|
|
$result .= $source;
|
|
return $result;
|
|
}
|
|
|
|
// _page2rel_foreign_jsss: Skip the next javascript stylesheet block
|
|
function _page2rel_foreign_jsss($html)
|
|
{
|
|
$block = "";
|
|
$ended = false;
|
|
while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|\/\/[^\n]*|<\/script>)(.*)$/is", $html, $m)) {
|
|
// The end of the block
|
|
if (strtolower($m[2]) == "</style>") {
|
|
$block .= $m[1];
|
|
$html = $m[2] . $m[3];
|
|
$ended = true;
|
|
break;
|
|
}
|
|
// Add this block
|
|
$block .= $m[1] . $m[2];
|
|
$html = $m[3];
|
|
}
|
|
// Not ended at last
|
|
if (!$ended) {
|
|
$block .= $html;
|
|
$html = "";
|
|
}
|
|
return array($block, $html);
|
|
}
|
|
|
|
?>
|