HOME


Mini Shell 1.0
DIR: /var/www/yme/wp-content/plugins/simply-static/src/
Upload File :
Current File : /var/www/yme/wp-content/plugins/simply-static/src/class-ss-url-extractor.php
<?php

namespace Simply_Static;

use Exception;
use voku\helper\HtmlDomParser;
use function WPML\FP\apply;

// Exit if accessed directly
if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

/**
 * Simply Static URL extractor class
 *
 * Note that in addition to extracting URLs this class also makes modifications
 * to the Simply_Static\Url_Response that is passed into it: URLs in the body of
 * the response are updated to be absolute URLs.
 */
class Url_Extractor {

	/**
	 * The following pages were incredibly helpful:
	 * - http://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
	 * - http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_web_page
	 * - http://php.net/manual/en/book.dom.php
	 */

	protected static $match_tags = array(
		'a'       => array( 'href', 'urn', 'style' ),
		'base'    => array( 'href' ),
		'img'     => array(
			'src',
			'usemap',
			'longdesc',
			'dynsrc',
			'lowsrc',
			'srcset',
			'data-src',
			'data-srcset',
			'data-bg'
		),
		'use'     => array( 'href' ),
		'picture' => array( 'src', 'srcset', 'data-src', 'data-srcset', 'data-bg' ),
		'amp-img' => array( 'src', 'srcset' ),

		'applet' => array( 'code', 'codebase', 'archive', 'object' ),
		'area'   => array( 'href' ),
		'body'   => array( 'background', 'credits', 'instructions', 'logo' ),
		'input'  => array( 'src', 'usemap', 'dynsrc', 'lowsrc', 'formaction' ),

		'blockquote' => array( 'cite' ),
		'del'        => array( 'cite' ),
		'frame'      => array( 'longdesc', 'src' ),
		'head'       => array( 'profile' ),
		'ins'        => array( 'cite' ),
		'object'     => array( 'archive', 'classid', 'codebase', 'data', 'usemap' ),
		'q'          => array( 'cite' ),
		'script'     => array( 'src' ),

		'audio'        => array( 'src', 'srcset' ),
		'figure'       => array( 'src', 'srcset' ),
		'command'      => array( 'icon' ),
		'embed'        => array( 'src', 'code', 'pluginspage' ),
		'event-source' => array( 'src' ),
		'html'         => array( 'manifest', 'background', 'xmlns' ),
		'source'       => array( 'src', 'srcset' ),
		'video'        => array( 'src', 'poster', 'srcset' ),
		'image'        => array( 'href', 'xlink:href', 'src', 'style', 'srcset' ),

		'bgsound' => array( 'src' ),
		'div'     => array( 'href', 'src', 'style', 'data-bg', 'data-thumbnail' ),
		'span'    => array( 'href', 'src', 'style', 'data-bg' ),
		'section' => array( 'style', 'data-bg' ),
		'footer'  => array( 'style' ),
		'header'  => array( 'style' ),
		'ilayer'  => array( 'src' ),
		'table'   => array( 'background' ),
		'td'      => array( 'background' ),
		'th'      => array( 'background' ),
		'layer'   => array( 'src' ),
		'xml'     => array( 'src' ),

		'button'   => array( 'formaction', 'style' ),
		'datalist' => array( 'data' ),
		'select'   => array( 'data' ),

		'access'   => array( 'path' ),
		'card'     => array( 'onenterforward', 'onenterbackward', 'ontimer' ),
		'go'       => array( 'href' ),
		'option'   => array( 'onpick' ),
		'template' => array( 'onenterforward', 'onenterbackward', 'ontimer' ),
		'wml'      => array( 'xmlns' ),

		'meta' => array( 'content' ),
		'link' => array( 'href' ),
		'atom' => array( 'href' )
	);

	// /** @const */
	// protected static $match_metas = array(
	//	 'content-base',
	//	 'content-location',
	//	 'referer',
	//	 'location',
	//	 'refresh',
	// );

	/**
	 * The static page to extract URLs from
	 * @var \Simply_Static\Page
	 */
	protected $static_page;

	/**
	 * An instance of the options structure containing all options for this plugin
	 * @var \Simply_Static\Options
	 */
	protected $options = null;

	/**
	 * The url of the site
	 * @var array
	 */
	public $extracted_urls = array();

	/**
	 * Constructor
	 *
	 * @param string $static_page Simply_Static\Page to extract URLs from
	 */
	public function __construct( $static_page ) {
		$this->static_page = $static_page;
		$this->options     = Options::instance();
	}

	/**
	 * Fetch the content from our file
	 * @return string
	 */
	public function get_body() {
		// Setting the stream context to prevent an issue where non-latin
		// characters get converted to html codes like #1234; inappropriately
		// http://stackoverflow.com/questions/5600371/file-get-contents-converts-utf-8-to-iso-8859-1
		$opts    = array(
			'http' => array(
				'header' => "Accept-Charset: UTF-8"
			)
		);
		$context = stream_context_create( $opts );
		$path    = $this->options->get_archive_dir() . $this->static_page->file_path;

		return file_get_contents( $path, false, $context );
	}

	/**
	 * Save a string back to our file (e.g. after having updated URLs)
	 *
	 * @param string $static_page Simply_Static\Page to extract URLs from
	 *
	 * @return int|false
	 */
	public function save_body( $content ) {
		$content = apply_filters( 'simply_static_content_before_save', $content, $this );

		return file_put_contents( $this->options->get_archive_dir() . $this->static_page->file_path, $content );
	}

	/**
	 * Get the Static Page.
	 *
	 * @return \Simply_Static\Page|string
	 */
	public function get_static_page() {
		return $this->static_page;
	}

	/**
	 * Extracts URLs from the static_page and update them based on the dest. type
	 *
	 * Returns a list of unique URLs from the body of the static_page. It only
	 * extracts URLs from the same domain, either absolute urls or relative urls
	 * that are then converted to absolute urls.
	 *
	 * Note that no validation is performed on whether the URLs would actually
	 * return a 200/OK response.
	 *
	 * @return array
	 */
	public function extract_and_update_urls() {
		if ( $this->static_page->is_type( 'html' ) ) {
			$this->save_body( $this->extract_and_replace_urls_in_html() );
		}

		if ( $this->static_page->is_type( 'css' ) ) {
			$this->save_body( $this->extract_and_replace_urls_in_css( $this->get_body() ) );
		}

		if ( $this->static_page->is_type( 'xml' ) ) {
			$this->save_body( $this->extract_and_replace_urls_in_xml() );
		}

		if ( $this->static_page->is_type( 'json' ) ) {
			$this->save_body( $this->extract_and_replace_urls_in_json() );
		}

		if ( $this->static_page->is_type( 'html' ) || $this->static_page->is_type( 'css' ) || $this->static_page->is_type( 'xml' ) || $this->static_page->is_type( 'json' ) ) {
			// Replace encoded URLs.
			$this->replace_encoded_urls();

			// If activated forced string/replace for URLs.
			if ( $this->options->get( 'force_replace_url' ) && ( ! $this->options->get( 'use_forms' ) && ! $this->options->get( 'use_comments' ) ) ) {
				$this->force_replace_urls();
			}
		}

		return array_unique( $this->extracted_urls );
	}

	/**
	 * Replaces origin URL with destination URL in response body
	 *
	 * This is a function of last resort for URL replacement. Ideally it was
	 * already done in one of the extract_and_replace_urls_in_x functions.
	 *
	 * This catches instances of WordPress URLs and replaces them with the
	 * destinaton_url. This generally works fine for absolute and relative URL
	 * generation. It'll produce sub-optimal results for offline URLs, in that
	 * it's only replacing the host and not adjusting the path according to the
	 * current page. The point of this is more to remove any traces of the
	 * WordPress URL than anything else.
	 *
	 * @return void
	 */
	public function replace_encoded_urls() {

		$destination_url = $this->options->get_destination_url();
		$response_body   = $this->get_body();

		// replace wp_json_encode'd urls, as used by WP's `concatemoji`
		$response_body = str_replace( addcslashes( Util::origin_url(), '/' ), addcslashes( $destination_url, '/' ), $response_body );

		// replace encoded URLs, as found in query params
		$response_body = preg_replace( '/(https?%3A)?%2F%2F' . addcslashes( urlencode( Util::origin_host() ), '.' ) . '/i', urlencode( $destination_url ), $response_body );

		$this->save_body( $response_body );
	}

	/**
	 * Replaces origin URL with destination URL in response body
	 *
	 * This is a function of last resort for URL replacement. Ideally it was
	 * already done in one of the extract_and_replace_urls_in_x functions.
	 *
	 * This catches instances of WordPress URLs and replaces them with the
	 * destinaton_url. This generally works fine for absolute and relative URL
	 * generation. It'll produce sub-optimal results for offline URLs, in that
	 * it's only replacing the host and not adjusting the path according to the
	 * current page. The point of this is more to remove any traces of the
	 * WordPress URL than anything else.
	 *
	 * @return void
	 */
	public function force_replace_urls() {
		/*
		TODO:
		Can we get it to work with offline URLs via preg_replace_callback
		+ convert_url? To do that we'd need to grab the entire URL. Ideally
		that would also work with escaped URLs / inside of JavaScript. And
		even more ideally, we'd only have a single preg_replace.
		 */

		$destination_url = $this->options->get_destination_url();
		$response_body   = $this->get_body();

		// replace any instance of the origin url, whether it starts with https://, http://, or //.
		$response_body = preg_replace( '/(https?:)?\/\/' . addcslashes( Util::origin_host(), '/' ) . '/i', $destination_url, $response_body );

		// replace wp_json_encode'd urls, as used by WP's `concatemoji`.
		// e.g. {"concatemoji":"http:\/\/www.example.org\/wp-includes\/js\/wp-emoji-release.min.js?ver=4.6.1"}.
		$response_body = str_replace( addcslashes( Util::origin_url(), '/' ), addcslashes( $destination_url, '/' ), $response_body );

		$response_body = apply_filters( 'simply_static_force_replaced_urls_body', $response_body, $this->static_page );

		$this->save_body( $response_body );
	}

	/**
	 * Extract URLs and convert URLs to absolute URLs for each tag
	 *
	 * The tag is passed by reference, so it's updated directly and nothing is
	 * returned from this function.
	 *
	 * @param simple_html_dom_node $tag SHDP dom node
	 * @param string $tag_name name of the tag
	 * @param array $attributes array of attribute notes
	 *
	 * @return void
	 */
	private function extract_urls_and_update_tag( &$tag, $tag_name, $attributes ) {
		if ( isset( $tag->style ) ) {
			$updated_css = $this->extract_and_replace_urls_in_css( $tag->style );
			$tag->style  = $updated_css;
		}

		foreach ( $attributes as $attribute_name ) {
			if ( isset( $tag->$attribute_name ) ) {
				$extracted_urls  = array();
				$attribute_value = $tag->$attribute_name;

				// we need to verify that the meta tag is a URL.
				if ( 'meta' === $tag_name ) {
					if ( filter_var( $attribute_value, FILTER_VALIDATE_URL ) ) {
						$extracted_urls[] = $attribute_value;
					}
				} else {
					// srcset is a fair bit different from most html
					if ( $attribute_name === 'srcset' || $attribute_name === 'data-srcset' ) {
						$extracted_urls = $this->extract_urls_from_srcset( $attribute_value );
					} else {
						$extracted_urls[] = $attribute_value;
					}
				}

				$strict_url_validation = apply_filters( 'simply_static_strict_url_validation', false );

				foreach ( $extracted_urls as $extracted_url ) {
					if ( $strict_url_validation && ! filter_var( $extracted_url, FILTER_VALIDATE_URL ) ) {
						continue;
					}

					if ( $extracted_url !== '' ) {
						$updated_extracted_url = $this->add_to_extracted_urls( $extracted_url );

						if ( ! is_null( $updated_extracted_url ) ) {
							$attribute_value = str_replace( $extracted_url, $updated_extracted_url, $attribute_value );
						}
					}
				}
				$tag->$attribute_name = $attribute_value;
			}
		}

	}

	/**
	 * Loop through elements of interest in the DOM to pull out URLs
	 *
	 * There are specific html tags and -- more precisely -- attributes that
	 * we're looking for. We loop through tags with attributes we care about,
	 * which the attributes for URLs, extract and update any URLs we find, and
	 * then return the updated HTML.
	 * @return string The HTML with all URLs made absolute
	 */
	private function extract_and_replace_urls_in_html() {
		$html_string = $this->get_body();
		$match_tags  = apply_filters( 'ss_match_tags', self::$match_tags );

		$dom = HtmlDomParser::str_get_html( $html_string );

		// return the original html string if dom is blank or boolean (unparseable)
		if ( $dom == '' || is_bool( $dom ) ) {
			return $html_string;
		} else {
			// handle tags with attributes
			foreach ( $match_tags as $tag_name => $attributes ) {
				$tags = $dom->find( $tag_name );

				foreach ( $tags as $tag ) {
					$this->extract_urls_and_update_tag( $tag, $tag_name, $attributes );
				}
			}

			// handle 'style' tag differently, since we need to parse the content.
			$parse_inline_style = apply_filters( 'ss_parse_inline_style', true );

			if ( $parse_inline_style ) {
				$style_tags = $dom->find( 'style' );

				foreach ( $style_tags as $tag ) {
					// Check if valid content exists.
					try {
						$updated_css        = $this->extract_and_replace_urls_in_css( $tag->innerhtmlKeep );
						$tag->innerhtmlKeep = $updated_css;
					} catch ( Exception $e ) {
						// If not skip the result.
						continue;
					}
				}
			}

			// handle 'script' tag differently, since we need to parse the content.
			$parse_inline_script = apply_filters( 'ss_parse_inline_script', true );

			if ( $parse_inline_script ) {
				$script_tags = $dom->find( 'script' );

				foreach ( $script_tags as $tag ) {
					// Check if valid content exists.
					try {
						$updated_script     = $this->extract_and_replace_urls_in_script( $tag->innerhtmlKeep );
						$tag->innerhtmlKeep = $updated_script;
						$this->extract_and_replace_urls_in_script_inner_text( $tag );
					} catch ( Exception $e ) {
						// If not skip the result.
						continue;
					}
				}
			}

			do_action(
				'ss_after_extract_and_replace_urls_in_html',
				$dom,
				$this
			);

			// Further manipulate Dom?
			$dom = apply_filters( 'ss_dom_before_save', $dom, $this->static_page->url );

			return $dom->save();
		}
	}

	/**
	 * Extract URLs from the srcset attribute
	 *
	 * @param string $srcset Value of the srcset attribute
	 *
	 * @return array  Array of extracted URLs
	 */
	private function extract_urls_from_srcset( $srcset ) {
		$extracted_urls = array();

		foreach ( explode( ',', $srcset ) as $url_and_descriptor ) {
			// remove the (optional) descriptor
			// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-srcset
			$extracted_urls[] = trim( preg_replace( '/[\d\.]+[xw]\s*$/', '', $url_and_descriptor ) );
		}

		return $extracted_urls;
	}

	/**
	 * Use regex to extract URLs on CSS pages
	 *
	 * URLs in CSS follow three basic patterns:
	 * - @import "common.css" screen, projection;
	 * - @import url("fineprint.css") print;
	 * - background-image: url(image.png);
	 *
	 * URLs are either contained within url(), part of an @import statement,
	 * or both.
	 *
	 * @param string $text The CSS to extract URLs from
	 *
	 * @return string The CSS with all URLs converted
	 */
	private function extract_and_replace_urls_in_css( $text ) {
		$text     = html_entity_decode( $text );
		$patterns = array(
			"/url\(\s*[\"']?([^)\"']+)/", // url()
			"/@import\s+[\"']([^\"']+)/"
		); // @import w/o url()

		foreach ( $patterns as $pattern ) {
			$text = preg_replace_callback( $pattern, array( $this, 'css_matches' ), $text );
		}

		return $text;
	}

	private function extract_and_replace_urls_in_script( $text ) {
		if ( $this->is_json( $text ) ) {
			$decoded_text = html_entity_decode( $text, ENT_NOQUOTES );
		} else {
			$decoded_text = html_entity_decode( $text );
		}

		$decoded_text = apply_filters( 'simply_static_decoded_urls_in_script', $decoded_text, $this->static_page, $this );

		$text = preg_replace( '/(https?:)?\/\/' . addcslashes( Util::origin_host(), '/' ) . '/i', $this->options->get_destination_url(), $decoded_text );

		return $text;
	}

	/**
	 * @param \ $tag
	 *
	 * @return array|string|string[]|null
	 */
	private function extract_and_replace_urls_in_script_inner_text( $tag ) {

		$regex = '/(https?:)?\/\/' . addcslashes( Util::origin_host(), '/' ) . '/i';

		switch ( $this->options->get( 'destination_url_type' ) ) {
			case 'absolute':
				$convert_to = $this->options->get_destination_url();
				break;
			case 'relative':
				// Adding \/? before end of regex pattern to convert url.com/ & url.com to relative path, ex. /path/.
				$regex      = '/(https?:)?\/\/' . addcslashes( Util::origin_host(), '/' ) . '\/?/i';
				$convert_to = $this->options->get( 'relative_path' );
				break;
			default:
				// Offline mode.
				// Adding \/? before end of regex pattern to convert url.com/ & url.com to relative path, ex. /path/.
				$regex      = '/(https?:)?\/\/' . addcslashes( Util::origin_host(), '/' ) . '\/?/i';
				$convert_to = '/';
		}

		if ( $this->is_json( $tag->innerhtmlKeep ) ) {
			$decoded_text = html_entity_decode( $tag->innerhtmlKeep, ENT_NOQUOTES );
		} else {
			$decoded_text = html_entity_decode( $tag->innerhtmlKeep );
		}

		$decoded_text = apply_filters( 'simply_static_decoded_text_in_script', $decoded_text, $this->static_page, $convert_to, $tag, $this );

		$tag->innerhtmlKeep = preg_replace( $regex, $convert_to, $decoded_text );

		return $tag;
	}

	/**
	 * Check whether a given string is a valid JSON representation.
	 *
	 * Copied from: WP CLI, https://github.com/wp-cli/wp-cli/blob/f3e4b0785aa3d3132ee73be30aedca8838a8fa06/php/utils.php#L1600-L1612
	 *
	 * @param string $argument String to evaluate.
	 * @param bool $ignore_scalars Optional. Whether to ignore scalar values.
	 *                               Defaults to true.
	 *
	 * @return bool Whether the provided string is a valid JSON representation.
	 */
	protected function is_json( $argument, $ignore_scalars = true ) {
		if ( ! is_string( $argument ) || '' === $argument ) {
			return false;
		}
		$arg = $argument[0];
		if ( $ignore_scalars && ! in_array( $argument[0], [ '{', '[' ], true ) ) {
			return false;
		}

		json_decode( $argument, $assoc = true );

		return json_last_error() === JSON_ERROR_NONE;
	}

	/**
	 * callback function for preg_replace in extract_and_replace_urls_in_css
	 *
	 * Takes the match, extracts the URL, adds it to the list of URLs, converts
	 * the URL to a destination URL.
	 *
	 * @param array $matches Array of preg_replace matches
	 *
	 * @return string An updated string for the text that was originally matched
	 */
	public function css_matches( $matches ) {
		$full_match    = $matches[0];
		$extracted_url = $matches[1];

		if ( isset( $extracted_url ) && $extracted_url !== '' ) {
			$updated_extracted_url = $this->add_to_extracted_urls( $extracted_url );
			$full_match            = str_ireplace( $extracted_url, $updated_extracted_url, $full_match );
		}

		return $full_match;
	}

	/**
	 * Use regex to extract URLs from XML docs (e.g. /feed/)
	 * @return string The XML with all of the URLs converted
	 */
	private function extract_and_replace_urls_in_xml() {
		$xml_string = $this->get_body();
		// match anything starting with http/s plus all following characters
		// except: [space] " ' <
		$pattern = "/https?:\/\/[^\s\"'<]+/";
		$text    = preg_replace_callback( $pattern, array( $this, 'xml_matches' ), $xml_string );

		return $text;
	}

	/**
	 * Use regex to extract URLs from JSON files (e.g. /feed/)
	 * @return string The JSON with all of the URLs converted
	 */
	private function extract_and_replace_urls_in_json() {
		$json_string = $this->get_body();
		// match anything starting with http/s plus all following characters
		// except: [space] " ' <
		$pattern = "/https?:\/\/[^\s\"'<]+/";
		$text    = preg_replace_callback( $pattern, array( $this, 'json_matches' ), $json_string );

		return $text;
	}

	/**
	 * Callback function for preg_replace in extract_and_replace_urls_in_xml
	 *
	 * Takes the match, adds it to the list of URLs, converts the URL to a
	 * destination URL.
	 *
	 * @param array $matches Array of regex matches found in the XML doc
	 *
	 * @return string         The extracted, converted URL
	 */
	private function xml_matches( $matches ) {
		$extracted_url = $matches[0];

		if ( isset( $extracted_url ) && $extracted_url !== '' ) {
			$updated_extracted_url = $this->add_to_extracted_urls( $extracted_url );
		}

		return $updated_extracted_url;
	}

	/**
	 * Callback function for preg_replace in extract_and_replace_urls_in_json
	 *
	 * Takes the match, adds it to the list of URLs, converts the URL to a
	 * destination URL.
	 *
	 * @param array $matches Array of regex matches found in the JSON file
	 *
	 * @return string         The extracted, converted URL
	 */
	private function json_matches( $matches ) {
		$extracted_url = $matches[0];

		if ( isset( $extracted_url ) && $extracted_url !== '' ) {
			$updated_extracted_url = $this->add_to_extracted_urls( $extracted_url );
		}

		return $updated_extracted_url;
	}

	/**
	 * Add a URL to the extracted URLs array and convert to absolute/relative/offline
	 *
	 * URLs are first converted to absolute URLs. Then they're checked to see if
	 * they are local URLs; if they are, they're added to the extracted URLs
	 * queue.
	 *
	 * If the destination URL type requested was absolute, the WordPress scheme/
	 * host is swapped for the destination scheme/host. If the destination URL
	 * type is relative/offline, the URL is converted to that format. Then the
	 * URL is returned.
	 *
	 * @return string The URL that should be added to the list of extracted URLs
	 * @return string The URL, converted to an absolute/relative/offline URL
	 */
	public function add_to_extracted_urls( $extracted_url ) {
		$url = Util::relative_to_absolute_url( $extracted_url, $this->static_page->url );

		if ( $url && Util::is_local_url( $url ) ) {
			// add to extracted urls queue
			$this->extracted_urls[] = apply_filters(
				'simply_static_extracted_url',
				Util::remove_params_and_fragment( $url ),
				$url,
				$this->static_page
			);

			$url = $this->convert_url( $url );
		}

		return $url;
	}

	/**
	 * Convert URL to absolute URL at desired host or to a relative or offline URL
	 *
	 * @param string $url Absolute URL to convert
	 *
	 * @return string      Converted URL
	 */
	private function convert_url( $url ) {

		$url = apply_filters( 'simply_static_pre_converted_url', $url, $this->static_page, $this );

		if ( $this->options->get( 'destination_url_type' ) == 'absolute' ) {
			$url = $this->convert_absolute_url( $url );
		} else if ( $this->options->get( 'destination_url_type' ) == 'relative' ) {
			$url = $this->convert_relative_url( $url );
		} else if ( $this->options->get( 'destination_url_type' ) == 'offline' ) {
			$url = $this->convert_offline_url( $url );
		}

		$url = remove_query_arg( 'simply_static_page', $url );

		return apply_filters( 'simply_static_converted_url', $url, $this->static_page, $this );
	}

	/**
	 * Convert a WordPress URL to a URL at the destination scheme/host
	 *
	 * @param string $url Absolute URL to convert
	 *
	 * @return string      URL at destination scheme/host
	 */
	private function convert_absolute_url( $url ) {
		$destination_url = $this->options->get_destination_url();
		$url             = Util::strip_protocol_from_url( $url );
		$url             = str_replace( Util::origin_host(), $destination_url, $url );

		return $url;
	}

	/**
	 * Convert a WordPress URL to a relative path
	 *
	 * @param string $url Absolute URL to convert
	 *
	 * @return string      Relative path for the URL
	 */
	private function convert_relative_url( $url ) {
		$url = Util::get_path_from_local_url( $url );
		$url = $this->options->get( 'relative_path' ) . $url;

		return $url;
	}

	/**
	 * Convert a WordPress URL to a path for offline usage
	 *
	 * This function compares current page's URL to the provided URL and
	 * creates a path for getting from one page to the other. It also attaches
	 * /index.html onto the end of any path that isn't a file, before any
	 * fragments or params.
	 *
	 * Example:
	 *   static_page->url: http://static-site.dev/2013/01/11/page-a/
	 *               $url: http://static-site.dev/2013/01/10/page-b/
	 *               path: ./../../10/page-b/index.html
	 *
	 * @param string $url Absolute URL to convert
	 *
	 * @return string      Converted path
	 */
	private function convert_offline_url( $url ) {
		// remove the scheme/host from the url
		$page_path      = Util::get_path_from_local_url( $this->static_page->url );
		$extracted_path = Util::get_path_from_local_url( $url );

		// create a path from one page to the other
		$path = Util::create_offline_path( $extracted_path, $page_path );

		$path_info = Util::url_path_info( $url );
		if ( $path_info['extension'] === '' ) {
			// If there's no extension, we need to add a /index.html,
			// and do so before any params or fragments.
			$clean_path = Util::remove_params_and_fragment( $path );
			$fragment   = substr( $path, strlen( $clean_path ) );

			$path = trailingslashit( $clean_path );
			$path .= 'index.html' . $fragment;
		}

		return $path;
	}
}