# Selima Website Content Management System # Page2Rel.pm: The converter to turn absolute URLs in HTML to relative URLs. # Copyright (c) 2004-2018 imacat. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Author: imacat # First written: 2004-10-24 package Selima::Page2Rel; use 5.008; use strict; use warnings; use base qw(Exporter); use vars qw(@EXPORT @EXPORT_OK); BEGIN { @EXPORT = qw(page2rel page2abs); @EXPORT_OK = @EXPORT; # Prototype declaration sub page2rel($$); sub page2abs($$); } use Encode qw(is_utf8); use Selima::Unicode; # page2rel: Convert URLs in a page to relative sub page2rel($$) { local ($_, %_); my ($page, $base); ($page, $base) = @_; $_ = new Selima::Page2Rel::Converter($base, 1); return $_->convert($page); } # page2abs: Convert URLs in a page to absolute sub page2abs($$) { local ($_, %_); my ($page, $base); ($page, $base) = @_; $_ = new Selima::Page2Rel::Converter($base, 0); return $_->convert($page); } # Selima::Page2Rel::Converter: convert URLs in a page to relative package Selima::Page2Rel::Converter; use 5.008; use strict; use warnings; use HTML::Tagset qw(); # HTML::Tagset does not export anything use vars qw(%LINKELE); $LINKELE{$_} = { map { $_ => 1 } @{$HTML::Tagset::linkElements{$_}} } foreach keys %HTML::Tagset::linkElements; use Selima::AbsURI; use Selima::DataVars qw(:absuri); use Selima::RelURI; use Selima::ShortCut; # new: Initialize the converter sub new : method { local ($_, %_); my ($class, $base, $is_rel, $self); ($class, $base, $is_rel) = @_; $self = bless {}, $class; $self->{"base"} = $base; $self->{"is_rel"} = $is_rel; return $self; } # convert: Convert URLs in a page to relative/absolute sub convert : method { local ($_, %_); my $self; ($self, $_) = @_; s/(|<[a-z]+\d?(?:\s+[a-z\-]+(?:=(?:"[^"]*"|'[^']*'|[^"'\s<>]+))?)*(?:\s+\/|\s*)?>)/$self->cnvtele($1);/gesi; # ' gettext return $_; } # cnvtele: Convert URLs in an element to relative/absolute sub cnvtele : method { local ($_, %_); my $self; ($self, $_) = @_; # Skip the comments return $_ unless /^<([a-z]+\d?)/; # A link element if (exists $LINKELE{$1}) { $self->{"attrs"} = $LINKELE{$1}; s/(?<=\s)([a-z\-]+(?:=(?:"[^"]*"|'[^']*'|[^"'\s<>]+))?)/$self->cnvtattr($1);/gei; # ' gettext } # External language handlers # To be done return $_; } # cnvtattr: Convert URLs in an attribute to relative/absolute sub cnvtattr : method { local ($_, %_); my ($self, $name, $quote); ($self, $_) = @_; # Skip attributes without a value return $_ unless /^([a-z\-]+)=("[^"]*"|'[^']*'|[^"'\s<>]+)$/; # ' gettext ($name, $_) = ($1, $2); # Skip non-link attributes return "$name=$_" unless exists ${$self->{"attrs"}}{$name}; # Strip the quotation s/^(["']?)(.+)\1/$quote = $1; $2;/e; # " gettext # Decode the HTML characters $_ = dh($_); # Convert it $_ = $self->{"is_rel"}? reluri($_, $self->{"base"}): absuri($_, $self->{"base"}, ABSURI_SKIP_FRAGMENT); # Encode the HTML characters $_ = h($_); # Add the quotation $_ = "$quote$_$quote" if $quote ne ""; return "$name=$_"; } return 1;