# Selima Website Content Management System # ReqURI.pm: The request URI finder. # Copyright (c) 2004-2018 imacat. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Author: imacat # First written: 2004-09-23 package Selima::ReqURI; use 5.008; use strict; use warnings; use base qw(Exporter); use vars qw(@EXPORT @EXPORT_OK); BEGIN { @EXPORT = qw(init_request_uri); @EXPORT_OK = @EXPORT; # Prototype declaration sub init_request_uri(); sub path_info_is_broken(); } use CGI qw(); use File::Spec::Functions qw(catdir splitdir); use URI::Escape qw(uri_escape); use Selima::DataVars qw($SESSION :env :requri :siteconf); use Selima::HTTPS; use Selima::Server; use Selima::Session; # init_request_uri: Obtain the REQUEST_URI sub init_request_uri() { local ($_, %_); my ($r, $file, $script, $script_name, $query, $port); # Calculated before return if defined $REQUEST_URI; # mod_perl has no CGI environment variables when run at PerlInitHandler $r = $IS_MP2? Apache2::RequestUtil->request: Apache->request if $IS_MODPERL; # Build the document root # Selima/.pm should always be put at /magicat/lib/perl5/Selima/.pm $DOC_ROOT = join "/", splitdir $INC{"Selima/$PACKAGE.pm"}; $DOC_ROOT =~ s/\/(?:magicat|admin)\/lib\/perl5\/Selima\/$PACKAGE.pm$//; $DOC_ROOT = catdir splitdir $DOC_ROOT; # Find the root difference # Apache stores the file name in SCRIPT_FILENAME undef $file; if (!$IS_CGI) { $file = $0; } elsif ($IS_MODPERL) { $file = $r->filename; } elsif (is_apache()) { $file = $ENV{"SCRIPT_FILENAME"}; # Microsoft-IIS stores the file name in PATH_TRANSLATED } elsif (is_iis()) { $file = $ENV{"PATH_TRANSLATED"}; # I have not seen any other implementation yet. } undef $script; if (defined $file) { $file = join "/", splitdir $file; $_ = join "/", splitdir $DOC_ROOT; $script = $1 if $file =~ /^$_(\/.+)$/; } undef $ROOT_DIFF; if (defined $script) { if ($IS_MODPERL) { $script_name = $r->uri; $_ = $r->path_info; $script_name =~ s/$_$//; } else { $script_name = $ENV{"SCRIPT_NAME"}; } $script_name =~ s/\/cgi-(perl|raw)\//\/cgi-bin\//; $ROOT_DIFF = $1 if $script_name =~ /^(.+)$script$/; } # Assume no root difference if that is not available $ROOT_DIFF = "" if !defined $ROOT_DIFF; # Build the REQUEST_PATH first if ($IS_MODPERL) { $REQUEST_URI = $r->the_request; $_ = $r->protocol; $REQUEST_URI =~ s/\s+$_//; $_ = $r->method; $REQUEST_URI =~ s/^$_\s+//; # Remove the schema and host part with absoluteURI $REQUEST_URI =~ s/^[^:\/ ]+:\/\/[^\/ ]+//; $REQUEST_PATH = $REQUEST_URI; $REQUEST_PATH =~ s/\?.*$//; # Apache REQUEST_URI exists. Use it for simplicity and accuracy. } elsif (exists $ENV{"REQUEST_URI"}) { $REQUEST_URI = $ENV{"REQUEST_URI"}; # Remove the schema and host part with absoluteURI $REQUEST_URI =~ s/^[^:\/ ]+:\/\/[^\/ ]+//; $REQUEST_PATH = $REQUEST_URI; $REQUEST_PATH =~ s/\?.*$//; # Construct the REQUEST_PATH from scratches # Avoid it whenever possible, since its result is not always right, # especially for directory indices, like index.php. } else { $REQUEST_PATH = $ENV{"SCRIPT_NAME"}; $REQUEST_PATH .= $ENV{"PATH_INFO"} if exists $ENV{"PATH_INFO"} && !path_info_is_broken; } $REQUEST_PATH =~ s/^$ROOT_DIFF//; # Set the REQUEST_FILE from REQUEST_PATH $REQUEST_FILE = $REQUEST_PATH; $REQUEST_FILE =~ s/^.*\///; # Set to "." for directories, since we do not know what # exact file name it should be $REQUEST_FILE = "." if $REQUEST_FILE eq ""; # Strip the unwanted arguments from the query string $query = ""; if ($IS_MODPERL) { $query = $r->args if defined $r->args; } else { $query = $ENV{"QUERY_STRING"} if exists $ENV{"QUERY_STRING"} && $ENV{"QUERY_STRING"} ne ""; } # Construct the REQUEST_URI # REQUEST_URI is raw. All arguments are kept. $REQUEST_URI = $REQUEST_PATH . ($query ne ""? "?$query": "") if !defined $REQUEST_URI; if ($query ne "") { ($_, $query) = (new CGI($query), ""); # Remove the unwanted arguments # *NOTE*: session may not be started yet $_->delete("lang", $Selima::Session::NAME, "charset"); if (defined $_->param) { @_ = qw(); foreach my $name ($_->param) { foreach my $val ($_->param($name)) { push @_, uri_escape($name) . "=" . uri_escape($val) } } $query = "?" . join "&", @_; } } # Construct the REQUEST_FILEQS $REQUEST_FILEQS = $REQUEST_FILE . $query; # Construct the REQUEST_FULLURI, with the scheme and the host name $REQUEST_SCHEME = !$IS_CGI? "file": is_https()? "https": "http"; if (!$IS_CGI) { $REQUEST_HOST = "localhost"; } elsif ($IS_MODPERL) { $REQUEST_HOST = defined $r->hostname? $r->hostname: $r->server->server_hostname; } else { if (exists $ENV{"HTTP_HOST"}) { $REQUEST_HOST = $ENV{"HTTP_HOST"}; $REQUEST_HOST =~ s/:\d+$//; } else { $REQUEST_HOST = $ENV{"SERVER_NAME"}; } } # Deal with the URI bug that remembers the port when calculating authority $_ = $IS_MODPERL? $r->get_server_port: $ENV{"SERVER_PORT"}; $port = ($_ == (is_https()? 443: 80)? "": ":$_"); $REQUEST_HOSTPORT = $REQUEST_SCHEME . "://" . $REQUEST_HOST . $port . $ROOT_DIFF; $REQUEST_HOSTPATH = new URI($REQUEST_HOSTPORT . $REQUEST_PATH); $REQUEST_FULLURI = new URI($REQUEST_SCHEME . "://" . $REQUEST_HOST . $port . $REQUEST_URI); return; } # path_info_is_broken: If the server has a broken PATH_INFO sub path_info_is_broken() { local ($_, %_); # Microsoft-IIS is broken return 1 if is_iis(); return 0; } return 1;