138 lines
3.7 KiB
Perl
138 lines
3.7 KiB
Perl
# Selima Website Content Management System
|
|
# Page2Rel.pm: The converter to turn absolute URLs in HTML to relative URLs.
|
|
|
|
# Copyright (c) 2004-2018 imacat.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Author: imacat <imacat@mail.imacat.idv.tw>
|
|
# First written: 2004-10-24
|
|
|
|
package Selima::Page2Rel;
|
|
use 5.008;
|
|
use strict;
|
|
use warnings;
|
|
use base qw(Exporter);
|
|
use vars qw(@EXPORT @EXPORT_OK);
|
|
BEGIN {
|
|
@EXPORT = qw(page2rel page2abs);
|
|
@EXPORT_OK = @EXPORT;
|
|
# Prototype declaration
|
|
sub page2rel($$);
|
|
sub page2abs($$);
|
|
}
|
|
|
|
use Encode qw(is_utf8);
|
|
|
|
use Selima::Unicode;
|
|
|
|
# page2rel: Convert URLs in a page to relative
|
|
sub page2rel($$) {
|
|
local ($_, %_);
|
|
my ($page, $base);
|
|
($page, $base) = @_;
|
|
$_ = new Selima::Page2Rel::Converter($base, 1);
|
|
return $_->convert($page);
|
|
}
|
|
|
|
# page2abs: Convert URLs in a page to absolute
|
|
sub page2abs($$) {
|
|
local ($_, %_);
|
|
my ($page, $base);
|
|
($page, $base) = @_;
|
|
$_ = new Selima::Page2Rel::Converter($base, 0);
|
|
return $_->convert($page);
|
|
}
|
|
|
|
|
|
# Selima::Page2Rel::Converter: convert URLs in a page to relative
|
|
package Selima::Page2Rel::Converter;
|
|
use 5.008;
|
|
use strict;
|
|
use warnings;
|
|
|
|
use HTML::Tagset qw();
|
|
# HTML::Tagset does not export anything
|
|
use vars qw(%LINKELE);
|
|
$LINKELE{$_} = { map { $_ => 1 } @{$HTML::Tagset::linkElements{$_}} }
|
|
foreach keys %HTML::Tagset::linkElements;
|
|
|
|
use Selima::AbsURI;
|
|
use Selima::DataVars qw(:absuri);
|
|
use Selima::RelURI;
|
|
use Selima::ShortCut;
|
|
|
|
# new: Initialize the converter
|
|
sub new : method {
|
|
local ($_, %_);
|
|
my ($class, $base, $is_rel, $self);
|
|
($class, $base, $is_rel) = @_;
|
|
$self = bless {}, $class;
|
|
$self->{"base"} = $base;
|
|
$self->{"is_rel"} = $is_rel;
|
|
return $self;
|
|
}
|
|
|
|
# convert: Convert URLs in a page to relative/absolute
|
|
sub convert : method {
|
|
local ($_, %_);
|
|
my $self;
|
|
($self, $_) = @_;
|
|
s/(<!--.*?-->|<[a-z]+\d?(?:\s+[a-z\-]+(?:=(?:"[^"]*"|'[^']*'|[^"'\s<>]+))?)*(?:\s+\/|\s*)?>)/$self->cnvtele($1);/gesi; # ' gettext
|
|
return $_;
|
|
}
|
|
|
|
# cnvtele: Convert URLs in an element to relative/absolute
|
|
sub cnvtele : method {
|
|
local ($_, %_);
|
|
my $self;
|
|
($self, $_) = @_;
|
|
# Skip the comments
|
|
return $_ unless /^<([a-z]+\d?)/;
|
|
# A link element
|
|
if (exists $LINKELE{$1}) {
|
|
$self->{"attrs"} = $LINKELE{$1};
|
|
s/(?<=\s)([a-z\-]+(?:=(?:"[^"]*"|'[^']*'|[^"'\s<>]+))?)/$self->cnvtattr($1);/gei; # ' gettext
|
|
}
|
|
# External language handlers
|
|
# To be done
|
|
return $_;
|
|
}
|
|
|
|
# cnvtattr: Convert URLs in an attribute to relative/absolute
|
|
sub cnvtattr : method {
|
|
local ($_, %_);
|
|
my ($self, $name, $quote);
|
|
($self, $_) = @_;
|
|
# Skip attributes without a value
|
|
return $_ unless /^([a-z\-]+)=("[^"]*"|'[^']*'|[^"'\s<>]+)$/; # ' gettext
|
|
($name, $_) = ($1, $2);
|
|
# Skip non-link attributes
|
|
return "$name=$_" unless exists ${$self->{"attrs"}}{$name};
|
|
|
|
# Strip the quotation
|
|
s/^(["']?)(.+)\1/$quote = $1; $2;/e; # " gettext
|
|
# Decode the HTML characters
|
|
$_ = dh($_);
|
|
# Convert it
|
|
$_ = $self->{"is_rel"}? reluri($_, $self->{"base"}):
|
|
absuri($_, $self->{"base"}, ABSURI_SKIP_FRAGMENT);
|
|
# Encode the HTML characters
|
|
$_ = h($_);
|
|
# Add the quotation
|
|
$_ = "$quote$_$quote" if $quote ne "";
|
|
return "$name=$_";
|
|
}
|
|
|
|
return 1;
|