// Copyright: Copyright (C) 2003-2007 Pristine Communications // Set the include path if (!defined("INCPATH_SET")) { require_once dirname(__FILE__) . "/incpath.inc.php"; } // Referenced subroutines require_once "monica/htmlchar.inc.php"; require_once "monica/rel2abs.inc.php"; // Settings define("_PAGE2REL_2REL", true); define("_PAGE2REL_2ABS", false); // page2rel: Convert URLs in a page to relative function page2rel($html, $base) { return _page2rel_cnvturl($html, $base, _PAGE2REL_2REL, null); } // page2abs: Convert URLs in a page to absolute function page2abs($html, $base, $rdiff = REL2ABS_RDIFF_KEEP) { return _page2rel_cnvturl($html, $base, _PAGE2REL_2ABS, $rdiff); } // _page2rel_cnvturl: Convert URLs in a page to relative/absolute function _page2rel_cnvturl($source, $base, $is_rel, $rdiff) { $result = ""; while (preg_match("/^(.*?)(|<[a-z]+\d?(?:\s+[a-z\-]+=(?:\"[^\"]*\"|'[^']*'|[^\"'\s<>]+))*(?:\s+\/)?>|<\?xml-stylesheet(?:\s+[a-z\-]+=(?:\"[^\"]*\"|'[^']*'|[^\"'\s<>]+))*\s*\?>)(.*)$/is", $source, $m)) { $result .= $m[1]; $ele = $m[2]; $source = $m[3]; // Skip if not a relevant element if ( !preg_match("/^<((a|applet|area|base|embed|form|frame|img|input|link|object|param|script|style)\s+)(.+?)>$/is", $ele, $m) && !preg_match("/^<((\?xml-stylesheet)\s+)(.+?)\?>$/s", $ele, $m)) { $result .= $ele; continue; } $elepref = $m[1]; $elename = $m[2]; $elebody = $m[3]; switch (strtolower($elename)) { case "a": case "area": case "base": case "link": $elebody = _page2rel_cnvtatt($elebody, "href", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . ">"; break; case "img": case "input": case "embed": case "frame": $elebody = _page2rel_cnvtatt($elebody, "src", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . ">"; break; case "form": $elebody = _page2rel_cnvtatt($elebody, "action", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . ">"; break; case "applet": $elebody = _page2rel_cnvtatt($elebody, "codebase", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . ">"; break; case "object": $elebody = _page2rel_cnvtatt($elebody, "classid", $base, $is_rel, $rdiff); $elebody = _page2rel_cnvtatt($elebody, "codebase", $base, $is_rel, $rdiff); $elebody = _page2rel_cnvtatt($elebody, "data", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . ">"; break; case "script": $elebody = _page2rel_cnvtatt($elebody, "src", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . ">"; $type = strtolower(_page2rel_attval($elebody, "type")); // Compatibility with the old "language" attribute if (is_null($type)) { $scptlang = strtolower(_page2rel_attval($elebody, "language")); switch ($scptlang) { case "javascript": $type = "text/javascript"; break; case "vbscript": case "vbs": $type = "text/vbscript"; break; case "perlscript": $type = "text/perlscript"; break; } } switch ($type) { case "text/javascript": default: $m = _page2rel_foreign_js($source); break; case "text/vbscript": case "text/vbs": $m = _page2rel_foreign_vbs($source); break; case "text/perlscript": $m = _page2rel_foreign_pls($source); break; } $result .= $m[0]; $source = $m[1]; break; case "style": $result .= $ele; $type = strtolower(_page2rel_attval($elebody, "type")); switch ($type) { case "text/css": default: $m = _page2rel_foreign_css($source, $base, $is_rel, $rdiff); break; case "text/javascript": $m = _page2rel_foreign_jsss($source); break; } $result .= $m[0]; $source = $m[1]; break; case "param": $name = strtolower(_page2rel_attval($elebody, "name")); if (in_array($name, array("src", "movie", "filename"))) { $elebody = _page2rel_cnvtatt($elebody, "value", $base, $is_rel, $rdiff); } $result .= "<" . $elepref . $elebody . ">"; break; case "?xml-stylesheet": $elebody = _page2rel_cnvtatt($elebody, "href", $base, $is_rel, $rdiff); $result .= "<" . $elepref . $elebody . "?>"; break; } } // Append the remains $result .= $source; return $result; } // _page2rel_cnvtatt: Convert URLs in HTML abbitutes to relative/absolute function _page2rel_cnvtatt($source, $att, $base, $is_rel, $rdiff) { $result = ""; while ($source != "" && preg_match("/^([^\"'\s]*(?:(?:\"[^\"]*\"|'[^']*')[^\"'\s]*)*)(\s*)(.*?)$/s", $source, $m)) { $piece = $m[1]; $sep = $m[2]; $source = $m[3]; // Skip if not in the attribute="value" format if ( !(preg_match("/^($att)=(\")([^\"]*)\"$/i", $piece, $m) || preg_match("/^($att)=(')([^']*)'$/i", $piece, $m) || preg_match("/^($att)=()([^\"']+)$/i", $piece, $m))) { $result .= $piece . $sep; continue; } $thisatt = $m[1]; $quote = $m[2]; $url = $m[3]; // $url is a variable to be replaces dynamically, as in error pages if ($url != "\$url") { $url = $is_rel? abs2rel($url, $base): rel2abs($url, $base, REL2ABS_SKIP_FRAGMENT, REL2ABS_NO_HOST, $rdiff); } $result .= $thisatt . "=" . $quote . $url . $quote. $sep; } return $result; } // _page2rel_attval: Obtain the value of a attribute function _page2rel_attval($elebody, $att) { $att = strtolower($att); $val = null; while ($elebody != "" && preg_match("/^([^\"'\s]*(?:(?:\"[^\"]*\"|'[^']*')[^\"'\s]*)*)(\s*)(.*?)$/s", $elebody, $m)) { $piece = $m[1]; $elebody = $m[3]; // Skip if not in the attribute="value" format if ( !(preg_match("/^($att)=(\")([^\"]*)\"$/i", $piece, $m) || preg_match("/^($att)=(')([^']*)'$/i", $piece, $m) || preg_match("/^($att)=()([^\"']+)$/i", $piece, $m))) { continue; } $thisatt = $m[1]; $thisval = dh($m[3]); if (strtolower($thisatt) == $att) { $val = $thisval; } } return $val; } // _page2rel_foreign_js: Skip the next javascript block function _page2rel_foreign_js($html) { $block = ""; $ended = false; while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|\/\/[^\n]*|<\/script>)(.*)$/is", $html, $m)) { // The end of the block if (strtolower($m[2]) == "") { $block .= $m[1]; $html = $m[2] . $m[3]; $ended = true; break; } // Add this block $block .= $m[1] . $m[2]; $html = $m[3]; } // Not ended at last if (!$ended) { $block .= $html; $html = ""; } return array($block, $html); } // _page2rel_foreign_vbs: Skip the next vbscript block function _page2rel_foreign_vbs($html) { $block = ""; $ended = false; while (preg_match("/^(.*?)(\"(?:[^\"]|\"\")*\"|[:\n]\s*Rem\b[^\n]*|'[^\n]*|<\/script>)(.*)$/is", $html, $m)) { // The end of the block if (strtolower($m[2]) == "") { $block .= $m[1]; $html = $m[2] . $m[3]; $ended = true; break; } // Add this block $block .= $m[1] . $m[2]; $html = $m[3]; } // Not ended at last if (!$ended) { $block .= $html; $html = ""; } return array($block, $html); } // _page2rel_foreign_css: Skip the next cascading stylesheet block function _page2rel_foreign_css($html, $base, $is_rel, $rdiff) { $block = ""; $ended = false; while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|<\/style>)(.*)$/is", $html, $m)) { // The end of the block if (strtolower($m[2]) == "") { $block .= $m[1]; $html = $m[2] . $m[3]; $ended = true; break; } // Add this block $block .= $m[1] . $m[2]; $html = $m[3]; } // Not ended at last if (!$ended) { $block .= $html; $html = ""; } // Convert URLs in the CSS block $block = _page2rel_foreign_cssurls($block, $base, $is_rel, $rdiff); return array($block, $html); } // _page2rel_foreign_cssurls: Convert URLs in a CSS block function _page2rel_foreign_cssurls($source, $base, $is_rel, $rdiff) { $result = ""; while (preg_match("/^(.*?)(\burl\([^\"']*\)|\burl\(\"(?:[^\\\\\"]|\\\\.)*\"\)|\burl\('(?:[^\\\\']|\\\\.)*'\)|\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/)(.*)$/is", $source, $m)) { $result .= $m[1]; $piece = $m[2]; $source = $m[3]; // An URL if ( preg_match("/^(url\()(\")((?:[^\\\\\"]|\\\\.)*)\"(\))$/i", $piece, $m) || preg_match("/^(url\()(')((?:[^\\\\']|\\\\.)*)'(\))$/i", $piece, $m) || preg_match("/^(url\()()([^\"']*)(\))$/i", $piece, $m)) { $m[3] = $is_rel? abs2rel($m[3], $base): rel2abs($m[3], $base, REL2ABS_SKIP_FRAGMENT, REL2ABS_NO_HOST, $rdiff); $piece = $m[1] . $m[2] . $m[3] . $m[2] . $m[4]; } $result .= $piece; } // Append the remains $result .= $source; return $result; } // _page2rel_foreign_jsss: Skip the next javascript stylesheet block function _page2rel_foreign_jsss($html) { $block = ""; $ended = false; while (preg_match("/^(.*?)(\"(?:[^\\\\\"]|\\\\.)*\"|'(?:[^\\\\']|\\\\.)*'|\/\*.*?\*\/|\/\/[^\n]*|<\/script>)(.*)$/is", $html, $m)) { // The end of the block if (strtolower($m[2]) == "") { $block .= $m[1]; $html = $m[2] . $m[3]; $ended = true; break; } // Add this block $block .= $m[1] . $m[2]; $html = $m[3]; } // Not ended at last if (!$ended) { $block .= $html; $html = ""; } return array($block, $html); } ?>