Files
selima-perl/lib/perl5/Selima/Page2Rel.pm
2026-03-10 21:31:43 +08:00

138 lines
3.7 KiB
Perl

# Selima Website Content Management System
# Page2Rel.pm: The converter to turn absolute URLs in HTML to relative URLs.
# Copyright (c) 2004-2018 imacat.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Author: imacat <imacat@mail.imacat.idv.tw>
# First written: 2004-10-24
package Selima::Page2Rel;
use 5.008;
use strict;
use warnings;
use base qw(Exporter);
use vars qw(@EXPORT @EXPORT_OK);
BEGIN {
@EXPORT = qw(page2rel page2abs);
@EXPORT_OK = @EXPORT;
# Prototype declaration
sub page2rel($$);
sub page2abs($$);
}
use Encode qw(is_utf8);
use Selima::Unicode;
# page2rel: Convert URLs in a page to relative
sub page2rel($$) {
local ($_, %_);
my ($page, $base);
($page, $base) = @_;
$_ = new Selima::Page2Rel::Converter($base, 1);
return $_->convert($page);
}
# page2abs: Convert URLs in a page to absolute
sub page2abs($$) {
local ($_, %_);
my ($page, $base);
($page, $base) = @_;
$_ = new Selima::Page2Rel::Converter($base, 0);
return $_->convert($page);
}
# Selima::Page2Rel::Converter: convert URLs in a page to relative
package Selima::Page2Rel::Converter;
use 5.008;
use strict;
use warnings;
use HTML::Tagset qw();
# HTML::Tagset does not export anything
use vars qw(%LINKELE);
$LINKELE{$_} = { map { $_ => 1 } @{$HTML::Tagset::linkElements{$_}} }
foreach keys %HTML::Tagset::linkElements;
use Selima::AbsURI;
use Selima::DataVars qw(:absuri);
use Selima::RelURI;
use Selima::ShortCut;
# new: Initialize the converter
sub new : method {
local ($_, %_);
my ($class, $base, $is_rel, $self);
($class, $base, $is_rel) = @_;
$self = bless {}, $class;
$self->{"base"} = $base;
$self->{"is_rel"} = $is_rel;
return $self;
}
# convert: Convert URLs in a page to relative/absolute
sub convert : method {
local ($_, %_);
my $self;
($self, $_) = @_;
s/(<!--.*?-->|<[a-z]+\d?(?:\s+[a-z\-]+(?:=(?:"[^"]*"|'[^']*'|[^"'\s<>]+))?)*(?:\s+\/|\s*)?>)/$self->cnvtele($1);/gesi; # ' gettext
return $_;
}
# cnvtele: Convert URLs in an element to relative/absolute
sub cnvtele : method {
local ($_, %_);
my $self;
($self, $_) = @_;
# Skip the comments
return $_ unless /^<([a-z]+\d?)/;
# A link element
if (exists $LINKELE{$1}) {
$self->{"attrs"} = $LINKELE{$1};
s/(?<=\s)([a-z\-]+(?:=(?:"[^"]*"|'[^']*'|[^"'\s<>]+))?)/$self->cnvtattr($1);/gei; # ' gettext
}
# External language handlers
# To be done
return $_;
}
# cnvtattr: Convert URLs in an attribute to relative/absolute
sub cnvtattr : method {
local ($_, %_);
my ($self, $name, $quote);
($self, $_) = @_;
# Skip attributes without a value
return $_ unless /^([a-z\-]+)=("[^"]*"|'[^']*'|[^"'\s<>]+)$/; # ' gettext
($name, $_) = ($1, $2);
# Skip non-link attributes
return "$name=$_" unless exists ${$self->{"attrs"}}{$name};
# Strip the quotation
s/^(["']?)(.+)\1/$quote = $1; $2;/e; # " gettext
# Decode the HTML characters
$_ = dh($_);
# Convert it
$_ = $self->{"is_rel"}? reluri($_, $self->{"base"}):
absuri($_, $self->{"base"}, ABSURI_SKIP_FRAGMENT);
# Encode the HTML characters
$_ = h($_);
# Add the quotation
$_ = "$quote$_$quote" if $quote ne "";
return "$name=$_";
}
return 1;