[markdown] Update SmartyPants Lib to 1.8.1.
Signed-off-by: Thomas Hochstein <thh@inter.net>
This commit is contained in:
parent
bc07cb91e2
commit
fd183bb999
|
@ -3,31 +3,16 @@
|
|||
# SmartyPants - Smart typography for web sites
|
||||
#
|
||||
# PHP SmartyPants
|
||||
# Copyright (c) 2004-2013 Michel Fortin
|
||||
# <http://michelf.ca/>
|
||||
# Copyright (c) 2004-2016 Michel Fortin
|
||||
# <https://michelf.ca/>
|
||||
#
|
||||
# Original SmartyPants
|
||||
# Copyright (c) 2003-2004 John Gruber
|
||||
# <http://daringfireball.net/>
|
||||
# <https://daringfireball.net/>
|
||||
#
|
||||
namespace Michelf;
|
||||
|
||||
|
||||
### Pre-Configured SmartyPants Modes ###
|
||||
|
||||
# SmartyPants does nothing at all
|
||||
const SMARTYPANTS_ATTR_DO_NOTHING = 0;
|
||||
# "--" for em-dashes; no en-dash support
|
||||
const SMARTYPANTS_ATTR_EM_DASH = 1;
|
||||
# "---" for em-dashes; "--" for en-dashes
|
||||
const SMARTYPANTS_ATTR_LONG_EM_DASH_SHORT_EN = 2;
|
||||
# "--" for em-dashes; "---" for en-dashes
|
||||
const SMARTYPANTS_ATTR_SHORT_EM_DASH_LONG_EN = 3;
|
||||
|
||||
# Default is SMARTYPANTS_ATTR_EM_DASH
|
||||
const SMARTYPANTS_ATTR_DEFAULT = SMARTYPANTS_ATTR_EM_DASH;
|
||||
|
||||
|
||||
#
|
||||
# SmartyPants Parser Class
|
||||
#
|
||||
|
@ -36,12 +21,29 @@ class SmartyPants {
|
|||
|
||||
### Version ###
|
||||
|
||||
const SMARTYPANTSLIB_VERSION = "1.6.0-beta1";
|
||||
const SMARTYPANTSLIB_VERSION = "1.8.1";
|
||||
|
||||
|
||||
### Presets
|
||||
|
||||
# SmartyPants does nothing at all
|
||||
const ATTR_DO_NOTHING = 0;
|
||||
# "--" for em-dashes; no en-dash support
|
||||
const ATTR_EM_DASH = 1;
|
||||
# "---" for em-dashes; "--" for en-dashes
|
||||
const ATTR_LONG_EM_DASH_SHORT_EN = 2;
|
||||
# "--" for em-dashes; "---" for en-dashes
|
||||
const ATTR_SHORT_EM_DASH_LONG_EN = 3;
|
||||
# "--" for em-dashes; "---" for en-dashes
|
||||
const ATTR_STUPEFY = -1;
|
||||
|
||||
# The default preset: ATTR_EM_DASH
|
||||
const ATTR_DEFAULT = SmartyPants::ATTR_EM_DASH;
|
||||
|
||||
|
||||
### Standard Function Interface ###
|
||||
|
||||
public static function defaultTransform($text, $attr = SMARTYPANTS_ATTR_DEFAULT) {
|
||||
public static function defaultTransform($text, $attr = SmartyPants::ATTR_DEFAULT) {
|
||||
#
|
||||
# Initialize the parser and return the result of its transform method.
|
||||
# This will work fine for derived classes too.
|
||||
|
@ -68,18 +70,35 @@ class SmartyPants {
|
|||
public $tags_to_skip = 'pre|code|kbd|script|style|math';
|
||||
|
||||
# Options to specify which transformations to make:
|
||||
protected $do_nothing = 0; # disable all transforms
|
||||
protected $do_quotes = 0;
|
||||
protected $do_backticks = 0; # 1 => double only, 2 => double & single
|
||||
protected $do_dashes = 0; # 1, 2, or 3 for the three modes described above
|
||||
protected $do_ellipses = 0;
|
||||
protected $do_stupefy = 0;
|
||||
protected $convert_quot = 0; # should we translate " entities into normal quotes?
|
||||
public $do_nothing = 0; # disable all transforms
|
||||
public $do_quotes = 0;
|
||||
public $do_backticks = 0; # 1 => double only, 2 => double & single
|
||||
public $do_dashes = 0; # 1, 2, or 3 for the three modes described above
|
||||
public $do_ellipses = 0;
|
||||
public $do_stupefy = 0;
|
||||
public $convert_quot = 0; # should we translate " entities into normal quotes?
|
||||
|
||||
# Smart quote characters:
|
||||
# Opening and closing smart double-quotes.
|
||||
public $smart_doublequote_open = '“';
|
||||
public $smart_doublequote_close = '”';
|
||||
public $smart_singlequote_open = '‘';
|
||||
public $smart_singlequote_close = '’'; # Also apostrophe.
|
||||
|
||||
# ``Backtick quotes''
|
||||
public $backtick_doublequote_open = '“'; // replacement for ``
|
||||
public $backtick_doublequote_close = '”'; // replacement for ''
|
||||
public $backtick_singlequote_open = '‘'; // replacement for `
|
||||
public $backtick_singlequote_close = '’'; // replacement for ' (also apostrophe)
|
||||
|
||||
# Other punctuation
|
||||
public $em_dash = '—';
|
||||
public $en_dash = '–';
|
||||
public $ellipsis = '…';
|
||||
|
||||
### Parser Implementation ###
|
||||
|
||||
public function __construct($attr = SMARTYPANTS_ATTR_DEFAULT) {
|
||||
public function __construct($attr = SmartyPants::ATTR_DEFAULT) {
|
||||
#
|
||||
# Initialize a parser with certain attributes.
|
||||
#
|
||||
|
@ -183,6 +202,30 @@ class SmartyPants {
|
|||
}
|
||||
|
||||
|
||||
function decodeEntitiesInConfiguration() {
|
||||
#
|
||||
# Utility function that converts entities in configuration variables to
|
||||
# UTF-8 characters.
|
||||
#
|
||||
$output_config_vars = array(
|
||||
'smart_doublequote_open',
|
||||
'smart_doublequote_close',
|
||||
'smart_singlequote_open',
|
||||
'smart_singlequote_close',
|
||||
'backtick_doublequote_open',
|
||||
'backtick_doublequote_close',
|
||||
'backtick_singlequote_open',
|
||||
'backtick_singlequote_close',
|
||||
'em_dash',
|
||||
'en_dash',
|
||||
'ellipsis',
|
||||
);
|
||||
foreach ($output_config_vars as $var) {
|
||||
$this->$var = html_entity_decode($this->$var);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected function educate($t, $prev_token_last_char) {
|
||||
$t = $this->processEscapes($t);
|
||||
|
||||
|
@ -208,19 +251,19 @@ class SmartyPants {
|
|||
if ($t == "'") {
|
||||
# Special case: single-character ' token
|
||||
if (preg_match('/\S/', $prev_token_last_char)) {
|
||||
$t = "’";
|
||||
$t = $this->smart_singlequote_close;
|
||||
}
|
||||
else {
|
||||
$t = "‘";
|
||||
$t = $this->smart_singlequote_open;
|
||||
}
|
||||
}
|
||||
else if ($t == '"') {
|
||||
# Special case: single-character " token
|
||||
if (preg_match('/\S/', $prev_token_last_char)) {
|
||||
$t = "”";
|
||||
$t = $this->smart_doublequote_close;
|
||||
}
|
||||
else {
|
||||
$t = "“";
|
||||
$t = $this->smart_doublequote_open;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -235,436 +278,6 @@ class SmartyPants {
|
|||
}
|
||||
|
||||
|
||||
protected function educateQuotes($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
#
|
||||
# Returns: The string, with "educated" curly quote HTML entities.
|
||||
#
|
||||
# Example input: "Isn't this fun?"
|
||||
# Example output: “Isn’t this fun?”
|
||||
#
|
||||
# Make our own "punctuation" character class, because the POSIX-style
|
||||
# [:PUNCT:] is only available in Perl 5.6 or later:
|
||||
$punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
|
||||
|
||||
# Special case if the very first character is a quote
|
||||
# followed by punctuation at a non-word-break. Close the quotes by brute force:
|
||||
$_ = preg_replace(
|
||||
array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
|
||||
array('’', '”'), $_);
|
||||
|
||||
|
||||
# Special case for double sets of quotes, e.g.:
|
||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||
$_ = preg_replace(
|
||||
array("/\"'(?=\w)/", "/'\"(?=\w)/"),
|
||||
array('“‘', '‘“'), $_);
|
||||
|
||||
# Special case for decade abbreviations (the '80s):
|
||||
$_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
|
||||
|
||||
$close_class = '[^\ \t\r\n\[\{\(\-]';
|
||||
$dec_dashes = '&\#8211;|&\#8212;';
|
||||
|
||||
# Get most opening single quotes:
|
||||
$_ = preg_replace("{
|
||||
(
|
||||
\\s | # a whitespace char, or
|
||||
| # a non-breaking space entity, or
|
||||
-- | # dashes, or
|
||||
&[mn]dash; | # named dash entities
|
||||
$dec_dashes | # or decimal entities
|
||||
&\\#x201[34]; # or hex
|
||||
)
|
||||
' # the quote
|
||||
(?=\\w) # followed by a word character
|
||||
}x", '\1‘', $_);
|
||||
# Single closing quotes:
|
||||
$_ = preg_replace("{
|
||||
($close_class)?
|
||||
'
|
||||
(?(1)| # If $1 captured, then do nothing;
|
||||
(?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
|
||||
) # char or an 's' at a word ending position. This
|
||||
# is a special case to handle something like:
|
||||
# \"<i>Custer</i>'s Last Stand.\"
|
||||
}xi", '\1’', $_);
|
||||
|
||||
# Any remaining single quotes should be opening ones:
|
||||
$_ = str_replace("'", '‘', $_);
|
||||
|
||||
|
||||
# Get most opening double quotes:
|
||||
$_ = preg_replace("{
|
||||
(
|
||||
\\s | # a whitespace char, or
|
||||
| # a non-breaking space entity, or
|
||||
-- | # dashes, or
|
||||
&[mn]dash; | # named dash entities
|
||||
$dec_dashes | # or decimal entities
|
||||
&\\#x201[34]; # or hex
|
||||
)
|
||||
\" # the quote
|
||||
(?=\\w) # followed by a word character
|
||||
}x", '\1“', $_);
|
||||
|
||||
# Double closing quotes:
|
||||
$_ = preg_replace("{
|
||||
($close_class)?
|
||||
\"
|
||||
(?(1)|(?=\\s)) # If $1 captured, then do nothing;
|
||||
# if not, then make sure the next char is whitespace.
|
||||
}x", '\1”', $_);
|
||||
|
||||
# Any remaining quotes should be opening ones.
|
||||
$_ = str_replace('"', '“', $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateBackticks($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with ``backticks'' -style double quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
# Example input: ``Isn't this fun?''
|
||||
# Example output: “Isn't this fun?”
|
||||
#
|
||||
|
||||
$_ = str_replace(array("``", "''",),
|
||||
array('“', '”'), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateSingleBackticks($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with `backticks' -style single quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
# Example input: `Isn't this fun?'
|
||||
# Example output: ‘Isn’t this fun?’
|
||||
#
|
||||
|
||||
$_ = str_replace(array("`", "'",),
|
||||
array('‘', '’'), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateDashes($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
#
|
||||
# Returns: The string, with each instance of "--" translated to
|
||||
# an em-dash HTML entity.
|
||||
#
|
||||
|
||||
$_ = str_replace('--', '—', $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateDashesOldSchool($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
#
|
||||
# Returns: The string, with each instance of "--" translated to
|
||||
# an en-dash HTML entity, and each "---" translated to
|
||||
# an em-dash HTML entity.
|
||||
#
|
||||
|
||||
# em en
|
||||
$_ = str_replace(array("---", "--",),
|
||||
array('—', '–'), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateDashesOldSchoolInverted($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
#
|
||||
# Returns: The string, with each instance of "--" translated to
|
||||
# an em-dash HTML entity, and each "---" translated to
|
||||
# an en-dash HTML entity. Two reasons why: First, unlike the
|
||||
# en- and em-dash syntax supported by
|
||||
# EducateDashesOldSchool(), it's compatible with existing
|
||||
# entries written before SmartyPants 1.1, back when "--" was
|
||||
# only used for em-dashes. Second, em-dashes are more
|
||||
# common than en-dashes, and so it sort of makes sense that
|
||||
# the shortcut should be shorter to type. (Thanks to Aaron
|
||||
# Swartz for the idea.)
|
||||
#
|
||||
|
||||
# en em
|
||||
$_ = str_replace(array("---", "--",),
|
||||
array('–', '—'), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateEllipses($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with each instance of "..." translated to
|
||||
# an ellipsis HTML entity. Also converts the case where
|
||||
# there are spaces between the dots.
|
||||
#
|
||||
# Example input: Huh...?
|
||||
# Example output: Huh…?
|
||||
#
|
||||
|
||||
$_ = str_replace(array("...", ". . .",), '…', $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function stupefyEntities($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with each SmartyPants HTML entity translated to
|
||||
# its ASCII counterpart.
|
||||
#
|
||||
# Example input: “Hello — world.”
|
||||
# Example output: "Hello -- world."
|
||||
#
|
||||
|
||||
# en-dash em-dash
|
||||
$_ = str_replace(array('–', '—'),
|
||||
array('-', '--'), $_);
|
||||
|
||||
# single quote open close
|
||||
$_ = str_replace(array('‘', '’'), "'", $_);
|
||||
|
||||
# double quote open close
|
||||
$_ = str_replace(array('“', '”'), '"', $_);
|
||||
|
||||
$_ = str_replace('…', '...', $_); # ellipsis
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function processEscapes($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with after processing the following backslash
|
||||
# escape sequences. This is useful if you want to force a "dumb"
|
||||
# quote or other character to appear.
|
||||
#
|
||||
# Escape Value
|
||||
# ------ -----
|
||||
# \\ \
|
||||
# \" "
|
||||
# \' '
|
||||
# \. .
|
||||
# \- -
|
||||
# \` `
|
||||
#
|
||||
$_ = str_replace(
|
||||
array('\\\\', '\"', "\'", '\.', '\-', '\`'),
|
||||
array('\', '"', ''', '.', '-', '`'), $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function tokenizeHTML($str) {
|
||||
#
|
||||
# Parameter: String containing HTML markup.
|
||||
# Returns: An array of the tokens comprising the input
|
||||
# string. Each token is either a tag (possibly with nested,
|
||||
# tags contained therein, such as <a href="<MTFoo>">, or a
|
||||
# run of text between tags. Each element of the array is a
|
||||
# two-element array; the first is either 'tag' or 'text';
|
||||
# the second is the actual value.
|
||||
#
|
||||
#
|
||||
# Regular expression derived from the _tokenize() subroutine in
|
||||
# Brad Choate's MTRegex plugin.
|
||||
# <http://www.bradchoate.com/past/mtregex.php>
|
||||
#
|
||||
$index = 0;
|
||||
$tokens = array();
|
||||
|
||||
$match = '(?s:<!--.*?-->)|'. # comment
|
||||
'(?s:<\?.*?\?>)|'. # processing instruction
|
||||
# regular tags
|
||||
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
|
||||
|
||||
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
|
||||
foreach ($parts as $part) {
|
||||
if (++$index % 2 && $part != '')
|
||||
$tokens[] = array('text', $part);
|
||||
else
|
||||
$tokens[] = array('tag', $part);
|
||||
}
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# SmartyPants Typographer Parser Class
|
||||
#
|
||||
class _SmartyPantsTypographer_TmpImpl extends \Michelf\SmartyPants {
|
||||
|
||||
### Configuration Variables ###
|
||||
|
||||
# Options to specify which transformations to make:
|
||||
public $do_comma_quotes = 0;
|
||||
public $do_guillemets = 0;
|
||||
public $do_space_emdash = 0;
|
||||
public $do_space_endash = 0;
|
||||
public $do_space_colon = 0;
|
||||
public $do_space_semicolon = 0;
|
||||
public $do_space_marks = 0;
|
||||
public $do_space_frenchquote = 0;
|
||||
public $do_space_thousand = 0;
|
||||
public $do_space_unit = 0;
|
||||
|
||||
# Smart quote characters:
|
||||
# Opening and closing smart double-quotes.
|
||||
public $smart_doublequote_open = '“';
|
||||
public $smart_doublequote_close = '”';
|
||||
public $smart_singlequote_open = '‘';
|
||||
public $smart_singlequote_close = '’'; # Also apostrophe.
|
||||
|
||||
# Space characters for different places:
|
||||
# Space around em-dashes. "He_—_or she_—_should change that."
|
||||
public $space_emdash = " ";
|
||||
# Space around en-dashes. "He_–_or she_–_should change that."
|
||||
public $space_endash = " ";
|
||||
# Space before a colon. "He said_: here it is."
|
||||
public $space_colon = " ";
|
||||
# Space before a semicolon. "That's what I said_; that's what he said."
|
||||
public $space_semicolon = " ";
|
||||
# Space before a question mark and an exclamation mark: "¡_Holà_! What_?"
|
||||
public $space_marks = " ";
|
||||
# Space inside french quotes. "Voici la «_chose_» qui m'a attaqué."
|
||||
public $space_frenchquote = " ";
|
||||
# Space as thousand separator. "On compte 10_000 maisons sur cette liste."
|
||||
public $space_thousand = " ";
|
||||
# Space before a unit abreviation. "This 12_kg of matter costs 10_$."
|
||||
public $space_unit = " ";
|
||||
|
||||
# Expression of a space (breakable or not):
|
||||
public $space = '(?: | | |�*160;|�*[aA]0;)';
|
||||
|
||||
|
||||
### Parser Implementation ###
|
||||
|
||||
public function __construct($attr = SMARTYPANTS_ATTR_DEFAULT) {
|
||||
#
|
||||
# Initialize a SmartyPantsTypographer_Parser with certain attributes.
|
||||
#
|
||||
# Parser attributes:
|
||||
# 0 : do nothing
|
||||
# 1 : set all, except dash spacing
|
||||
# 2 : set all, except dash spacing, using old school en- and em- dash shortcuts
|
||||
# 3 : set all, except dash spacing, using inverted old school en and em- dash shortcuts
|
||||
#
|
||||
# Punctuation:
|
||||
# q -> quotes
|
||||
# b -> backtick quotes (``double'' only)
|
||||
# B -> backtick quotes (``double'' and `single')
|
||||
# c -> comma quotes (,,double`` only)
|
||||
# g -> guillemets (<<double>> only)
|
||||
# d -> dashes
|
||||
# D -> old school dashes
|
||||
# i -> inverted old school dashes
|
||||
# e -> ellipses
|
||||
# w -> convert " entities to " for Dreamweaver users
|
||||
#
|
||||
# Spacing:
|
||||
# : -> colon spacing +-
|
||||
# ; -> semicolon spacing +-
|
||||
# m -> question and exclamation marks spacing +-
|
||||
# h -> em-dash spacing +-
|
||||
# H -> en-dash spacing +-
|
||||
# f -> french quote spacing +-
|
||||
# t -> thousand separator spacing -
|
||||
# u -> unit spacing +-
|
||||
# (you can add a plus sign after some of these options denoted by + to
|
||||
# add the space when it is not already present, or you can add a minus
|
||||
# sign to completly remove any space present)
|
||||
#
|
||||
# Initialize inherited SmartyPants parser.
|
||||
parent::__construct($attr);
|
||||
|
||||
if ($attr == "1" || $attr == "2" || $attr == "3") {
|
||||
# Do everything, turn all options on.
|
||||
$this->do_comma_quotes = 1;
|
||||
$this->do_guillemets = 1;
|
||||
$this->do_space_emdash = 1;
|
||||
$this->do_space_endash = 1;
|
||||
$this->do_space_colon = 1;
|
||||
$this->do_space_semicolon = 1;
|
||||
$this->do_space_marks = 1;
|
||||
$this->do_space_frenchquote = 1;
|
||||
$this->do_space_thousand = 1;
|
||||
$this->do_space_unit = 1;
|
||||
}
|
||||
else if ($attr == "-1") {
|
||||
# Special "stupefy" mode.
|
||||
$this->do_stupefy = 1;
|
||||
}
|
||||
else {
|
||||
$chars = preg_split('//', $attr);
|
||||
foreach ($chars as $c){
|
||||
if ($c == "c") { $current =& $this->do_comma_quotes; }
|
||||
else if ($c == "g") { $current =& $this->do_guillemets; }
|
||||
else if ($c == ":") { $current =& $this->do_space_colon; }
|
||||
else if ($c == ";") { $current =& $this->do_space_semicolon; }
|
||||
else if ($c == "m") { $current =& $this->do_space_marks; }
|
||||
else if ($c == "h") { $current =& $this->do_space_emdash; }
|
||||
else if ($c == "H") { $current =& $this->do_space_endash; }
|
||||
else if ($c == "f") { $current =& $this->do_space_frenchquote; }
|
||||
else if ($c == "t") { $current =& $this->do_space_thousand; }
|
||||
else if ($c == "u") { $current =& $this->do_space_unit; }
|
||||
else if ($c == "+") {
|
||||
$current = 2;
|
||||
unset($current);
|
||||
}
|
||||
else if ($c == "-") {
|
||||
$current = -1;
|
||||
unset($current);
|
||||
}
|
||||
else {
|
||||
# Unknown attribute option, ignore.
|
||||
}
|
||||
$current = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function educate($t, $prev_token_last_char) {
|
||||
$t = parent::educate($t, $prev_token_last_char);
|
||||
|
||||
if ($this->do_comma_quotes) $t = $this->educateCommaQuotes($t);
|
||||
if ($this->do_guillemets) $t = $this->educateGuillemets($t);
|
||||
|
||||
if ($this->do_space_emdash) $t = $this->spaceEmDash($t);
|
||||
if ($this->do_space_endash) $t = $this->spaceEnDash($t);
|
||||
if ($this->do_space_colon) $t = $this->spaceColon($t);
|
||||
if ($this->do_space_semicolon) $t = $this->spaceSemicolon($t);
|
||||
if ($this->do_space_marks) $t = $this->spaceMarks($t);
|
||||
if ($this->do_space_frenchquote) $t = $this->spaceFrenchQuotes($t);
|
||||
if ($this->do_space_thousand) $t = $this->spaceThousandSeparator($t);
|
||||
if ($this->do_space_unit) $t = $this->spaceUnit($t);
|
||||
|
||||
return $t;
|
||||
}
|
||||
|
||||
|
||||
protected function educateQuotes($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
|
@ -758,253 +371,129 @@ class _SmartyPantsTypographer_TmpImpl extends \Michelf\SmartyPants {
|
|||
}
|
||||
|
||||
|
||||
protected function educateCommaQuotes($_) {
|
||||
protected function educateBackticks($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with ,,comma,, -style double quotes
|
||||
# Returns: The string, with ``backticks'' -style double quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
# Example input: ,,Isn't this fun?,,
|
||||
# Example output: „Isn't this fun?„
|
||||
# Example input: ``Isn't this fun?''
|
||||
# Example output: “Isn't this fun?”
|
||||
#
|
||||
# Note: this is meant to be used alongside with backtick quotes; there is
|
||||
# no language that use only lower quotations alone mark like in the example.
|
||||
#
|
||||
$_ = str_replace(",,", '„', $_);
|
||||
|
||||
$_ = str_replace(array("``", "''",),
|
||||
array($this->backtick_doublequote_open,
|
||||
$this->backtick_doublequote_close), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateGuillemets($_) {
|
||||
protected function educateSingleBackticks($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with << guillemets >> -style quotes
|
||||
# translated into HTML guillemets entities.
|
||||
# Returns: The string, with `backticks' -style single quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
# Example input: << Isn't this fun? >>
|
||||
# Example output: „ Isn't this fun? „
|
||||
# Example input: `Isn't this fun?'
|
||||
# Example output: ‘Isn’t this fun?’
|
||||
#
|
||||
$_ = preg_replace("/(?:<|<){2}/", '«', $_);
|
||||
$_ = preg_replace("/(?:>|>){2}/", '»', $_);
|
||||
|
||||
$_ = str_replace(array("`", "'",),
|
||||
array($this->backtick_singlequote_open,
|
||||
$this->backtick_singlequote_close), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceFrenchQuotes($_) {
|
||||
protected function educateDashes($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# inside french-style quotes, only french quotes.
|
||||
# Parameter: String.
|
||||
#
|
||||
# Example input: Quotes in « French », »German« and »Finnish» style.
|
||||
# Example output: Quotes in «_French_», »German« and »Finnish» style.
|
||||
# Returns: The string, with each instance of "--" translated to
|
||||
# an em-dash HTML entity.
|
||||
#
|
||||
$opt = ( $this->do_space_frenchquote == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_frenchquote != -1 ? $this->space_frenchquote : '' );
|
||||
|
||||
# Characters allowed immediatly outside quotes.
|
||||
$outside_char = $this->space . '|\s|[.,:;!?\[\](){}|@*~=+-]|¡|¿';
|
||||
|
||||
$_ = preg_replace(
|
||||
"/(^|$outside_char)(«|«|›|‹)$this->space$opt/",
|
||||
"\\1\\2$chr", $_);
|
||||
$_ = preg_replace(
|
||||
"/$this->space$opt(»|»|‹|›)($outside_char|$)/",
|
||||
"$chr\\1\\2", $_);
|
||||
|
||||
$_ = str_replace('--', $this->em_dash, $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceColon($_) {
|
||||
protected function educateDashesOldSchool($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# before colons.
|
||||
# Parameter: String.
|
||||
#
|
||||
# Example input: Ingredients : fun.
|
||||
# Example output: Ingredients_: fun.
|
||||
# Returns: The string, with each instance of "--" translated to
|
||||
# an en-dash HTML entity, and each "---" translated to
|
||||
# an em-dash HTML entity.
|
||||
#
|
||||
$opt = ( $this->do_space_colon == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_colon != -1 ? $this->space_colon : '' );
|
||||
|
||||
$_ = preg_replace("/$this->space$opt(:)(\\s|$)/m",
|
||||
"$chr\\1\\2", $_);
|
||||
|
||||
# em en
|
||||
$_ = str_replace(array("---", "--",),
|
||||
array($this->em_dash, $this->en_dash), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceSemicolon($_) {
|
||||
protected function educateDashesOldSchoolInverted($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# before semicolons.
|
||||
# Parameter: String.
|
||||
#
|
||||
# Example input: There he goes ; there she goes.
|
||||
# Example output: There he goes_; there she goes.
|
||||
# Returns: The string, with each instance of "--" translated to
|
||||
# an em-dash HTML entity, and each "---" translated to
|
||||
# an en-dash HTML entity. Two reasons why: First, unlike the
|
||||
# en- and em-dash syntax supported by
|
||||
# EducateDashesOldSchool(), it's compatible with existing
|
||||
# entries written before SmartyPants 1.1, back when "--" was
|
||||
# only used for em-dashes. Second, em-dashes are more
|
||||
# common than en-dashes, and so it sort of makes sense that
|
||||
# the shortcut should be shorter to type. (Thanks to Aaron
|
||||
# Swartz for the idea.)
|
||||
#
|
||||
$opt = ( $this->do_space_semicolon == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_semicolon != -1 ? $this->space_semicolon : '' );
|
||||
|
||||
$_ = preg_replace("/$this->space(;)(?=\\s|$)/m",
|
||||
" \\1", $_);
|
||||
$_ = preg_replace("/((?:^|\\s)(?>[^&;\\s]+|&#?[a-zA-Z0-9]+;)*)".
|
||||
" $opt(;)(?=\\s|$)/m",
|
||||
"\\1$chr\\2", $_);
|
||||
|
||||
# en em
|
||||
$_ = str_replace(array("---", "--",),
|
||||
array($this->en_dash, $this->em_dash), $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceMarks($_) {
|
||||
protected function educateEllipses($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around question and exclamation marks.
|
||||
# Parameter: String.
|
||||
# Returns: The string, with each instance of "..." translated to
|
||||
# an ellipsis HTML entity. Also converts the case where
|
||||
# there are spaces between the dots.
|
||||
#
|
||||
# Example input: ¡ Holà ! What ?
|
||||
# Example output: ¡_Holà_! What_?
|
||||
# Example input: Huh...?
|
||||
# Example output: Huh…?
|
||||
#
|
||||
$opt = ( $this->do_space_marks == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_marks != -1 ? $this->space_marks : '' );
|
||||
|
||||
// Regular marks.
|
||||
$_ = preg_replace("/$this->space$opt([?!]+)/", "$chr\\1", $_);
|
||||
|
||||
// Inverted marks.
|
||||
$imarks = "(?:¡|¡|¡|&#x[Aa]1;|¿|¿|¿|&#x[Bb][Ff];)";
|
||||
$_ = preg_replace("/($imarks+)$this->space$opt/", "\\1$chr", $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceEmDash($_) {
|
||||
#
|
||||
# Parameters: String, two replacement characters separated by a hyphen (`-`),
|
||||
# and forcing flag.
|
||||
#
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around dashes.
|
||||
#
|
||||
# Example input: Then — without any plan — the fun happend.
|
||||
# Example output: Then_—_without any plan_—_the fun happend.
|
||||
#
|
||||
$opt = ( $this->do_space_emdash == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_emdash != -1 ? $this->space_emdash : '' );
|
||||
$_ = preg_replace("/$this->space$opt(—|—)$this->space$opt/",
|
||||
"$chr\\1$chr", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceEnDash($_) {
|
||||
#
|
||||
# Parameters: String, two replacement characters separated by a hyphen (`-`),
|
||||
# and forcing flag.
|
||||
#
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around dashes.
|
||||
#
|
||||
# Example input: Then — without any plan — the fun happend.
|
||||
# Example output: Then_—_without any plan_—_the fun happend.
|
||||
#
|
||||
$opt = ( $this->do_space_endash == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_endash != -1 ? $this->space_endash : '' );
|
||||
$_ = preg_replace("/$this->space$opt(–|–)$this->space$opt/",
|
||||
"$chr\\1$chr", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceThousandSeparator($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# inside numbers (thousand separator in french).
|
||||
#
|
||||
# Example input: Il y a 10 000 insectes amusants dans ton jardin.
|
||||
# Example output: Il y a 10_000 insectes amusants dans ton jardin.
|
||||
#
|
||||
$chr = ( $this->do_space_thousand != -1 ? $this->space_thousand : '' );
|
||||
$_ = preg_replace('/([0-9]) ([0-9])/', "\\1$chr\\2", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected $units = '
|
||||
### Metric units (with prefixes)
|
||||
(?:
|
||||
p |
|
||||
µ | µ | &\#0*181; | &\#[xX]0*[Bb]5; |
|
||||
[mcdhkMGT]
|
||||
)?
|
||||
(?:
|
||||
[mgstAKNJWCVFSTHBL]|mol|cd|rad|Hz|Pa|Wb|lm|lx|Bq|Gy|Sv|kat|
|
||||
Ω | Ohm | Ω | &\#0*937; | &\#[xX]0*3[Aa]9;
|
||||
)|
|
||||
### Computers units (KB, Kb, TB, Kbps)
|
||||
[kKMGT]?(?:[oBb]|[oBb]ps|flops)|
|
||||
### Money
|
||||
¢ | ¢ | &\#0*162; | &\#[xX]0*[Aa]2; |
|
||||
M?(?:
|
||||
£ | £ | &\#0*163; | &\#[xX]0*[Aa]3; |
|
||||
¥ | ¥ | &\#0*165; | &\#[xX]0*[Aa]5; |
|
||||
€ | € | &\#0*8364; | &\#[xX]0*20[Aa][Cc]; |
|
||||
$
|
||||
)|
|
||||
### Other units
|
||||
(?: ° | ° | &\#0*176; | &\#[xX]0*[Bb]0; ) [CF]? |
|
||||
%|pt|pi|M?px|em|en|gal|lb|[NSEOW]|[NS][EOW]|ha|mbar
|
||||
'; //x
|
||||
|
||||
protected function spaceUnit($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# before unit symbols.
|
||||
#
|
||||
# Example input: Get 3 mol of fun for 3 $.
|
||||
# Example output: Get 3_mol of fun for 3_$.
|
||||
#
|
||||
$opt = ( $this->do_space_unit == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_unit != -1 ? $this->space_unit : '' );
|
||||
|
||||
$_ = preg_replace('/
|
||||
(?:([0-9])[ ]'.$opt.') # Number followed by space.
|
||||
('.$this->units.') # Unit.
|
||||
(?![a-zA-Z0-9]) # Negative lookahead for other unit characters.
|
||||
/x',
|
||||
"\\1$chr\\2", $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceAbbr($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around abbreviations.
|
||||
#
|
||||
# Example input: Fun i.e. something pleasant.
|
||||
# Example output: Fun i.e._something pleasant.
|
||||
#
|
||||
$opt = ( $this->do_space_abbr == 2 ? '?' : '' );
|
||||
|
||||
$_ = preg_replace("/(^|\s)($this->abbr_after) $opt/m",
|
||||
"\\1\\2$this->space_abbr", $_);
|
||||
$_ = preg_replace("/( )$opt($this->abbr_sp_before)(?![a-zA-Z'])/m",
|
||||
"\\1$this->space_abbr\\2", $_);
|
||||
$_ = str_replace(array("...", ". . .",), $this->ellipsis, $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function stupefyEntities($_) {
|
||||
#
|
||||
# Adding angle quotes and lower quotes to SmartyPants's stupefy mode.
|
||||
# Parameter: String.
|
||||
# Returns: The string, with each SmartyPants HTML entity translated to
|
||||
# its ASCII counterpart.
|
||||
#
|
||||
# Example input: “Hello — world.”
|
||||
# Example output: "Hello -- world."
|
||||
#
|
||||
$_ = parent::stupefyEntities($_);
|
||||
|
||||
$_ = str_replace(array('„', '«', '»'), '"', $_);
|
||||
# en-dash em-dash
|
||||
$_ = str_replace(array('–', '—'),
|
||||
array('-', '--'), $_);
|
||||
|
||||
# single quote open close
|
||||
$_ = str_replace(array('‘', '’'), "'", $_);
|
||||
|
||||
# double quote open close
|
||||
$_ = str_replace(array('“', '”'), '"', $_);
|
||||
|
||||
$_ = str_replace('…', '...', $_); # ellipsis
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
@ -1012,20 +501,60 @@ class _SmartyPantsTypographer_TmpImpl extends \Michelf\SmartyPants {
|
|||
|
||||
protected function processEscapes($_) {
|
||||
#
|
||||
# Adding a few more escapes to SmartyPants's escapes:
|
||||
# Parameter: String.
|
||||
# Returns: The string, with after processing the following backslash
|
||||
# escape sequences. This is useful if you want to force a "dumb"
|
||||
# quote or other character to appear.
|
||||
#
|
||||
# Escape Value
|
||||
# ------ -----
|
||||
# \, ,
|
||||
# \< <
|
||||
# \> >
|
||||
# \\ \
|
||||
# \" "
|
||||
# \' '
|
||||
# \. .
|
||||
# \- -
|
||||
# \` `
|
||||
#
|
||||
$_ = parent::processEscapes($_);
|
||||
|
||||
$_ = str_replace(
|
||||
array('\,', '\<', '\>', '\<', '\>'),
|
||||
array(',', '<', '>', '<', '>'), $_);
|
||||
array('\\\\', '\"', "\'", '\.', '\-', '\`'),
|
||||
array('\', '"', ''', '.', '-', '`'), $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function tokenizeHTML($str) {
|
||||
#
|
||||
# Parameter: String containing HTML markup.
|
||||
# Returns: An array of the tokens comprising the input
|
||||
# string. Each token is either a tag (possibly with nested,
|
||||
# tags contained therein, such as <a href="<MTFoo>">, or a
|
||||
# run of text between tags. Each element of the array is a
|
||||
# two-element array; the first is either 'tag' or 'text';
|
||||
# the second is the actual value.
|
||||
#
|
||||
#
|
||||
# Regular expression derived from the _tokenize() subroutine in
|
||||
# Brad Choate's MTRegex plugin.
|
||||
# <http://www.bradchoate.com/past/mtregex.php>
|
||||
#
|
||||
$index = 0;
|
||||
$tokens = array();
|
||||
|
||||
$match = '(?s:<!--.*?-->)|'. # comment
|
||||
'(?s:<\?.*?\?>)|'. # processing instruction
|
||||
# regular tags
|
||||
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
|
||||
|
||||
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
|
||||
foreach ($parts as $part) {
|
||||
if (++$index % 2 && $part != '')
|
||||
$tokens[] = array('text', $part);
|
||||
else
|
||||
$tokens[] = array('tag', $part);
|
||||
}
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -3,38 +3,484 @@
|
|||
# SmartyPants Typographer - Smart typography for web sites
|
||||
#
|
||||
# PHP SmartyPants & Typographer
|
||||
# Copyright (c) 2004-2013 Michel Fortin
|
||||
# <http://michelf.ca/>
|
||||
# Copyright (c) 2004-2016 Michel Fortin
|
||||
# <https://michelf.ca/>
|
||||
#
|
||||
# Original SmartyPants
|
||||
# Copyright (c) 2003-2004 John Gruber
|
||||
# <http://daringfireball.net/>
|
||||
# <https://daringfireball.net/>
|
||||
#
|
||||
namespace Michelf;
|
||||
|
||||
|
||||
# Just force Michelf/SmartyPants.php to load. This is needed to load
|
||||
# the temporary implementation class. See below for details.
|
||||
\Michelf\SmartyPants::SMARTYPANTSLIB_VERSION;
|
||||
|
||||
|
||||
#
|
||||
# SmartyPants Typographer Parser Class
|
||||
#
|
||||
# Note: Currently the implementation resides in the temporary class
|
||||
# \Michelf\_SmartyPantsTypographer_TmpImpl (in the same file as
|
||||
# \Michelf\SmartyPants). This makes it easier to propagate the changes between
|
||||
# the three different packaging styles of PHP SmartyPants. Once this issue is
|
||||
# resolved, the _SmartyPantsTypographer_TmpImpl class will disappear and this
|
||||
# one will contain the code.
|
||||
#
|
||||
use \Michelf\SmartyPants;
|
||||
class SmartyPantsTypographer extends \Michelf\SmartyPants {
|
||||
|
||||
### Configuration Variables ###
|
||||
|
||||
# Options to specify which transformations to make:
|
||||
public $do_comma_quotes = 0;
|
||||
public $do_guillemets = 0;
|
||||
public $do_geresh_gershayim = 0;
|
||||
public $do_space_emdash = 0;
|
||||
public $do_space_endash = 0;
|
||||
public $do_space_colon = 0;
|
||||
public $do_space_semicolon = 0;
|
||||
public $do_space_marks = 0;
|
||||
public $do_space_frenchquote = 0;
|
||||
public $do_space_thousand = 0;
|
||||
public $do_space_unit = 0;
|
||||
|
||||
# Quote characters for replacing ASCII approximations
|
||||
public $doublequote_low = "„"; // replacement for ,,
|
||||
public $guillemet_leftpointing = "«"; // replacement for <<
|
||||
public $guillemet_rightpointing = "»"; // replacement for >>
|
||||
public $geresh = "׳";
|
||||
public $gershayim = "״";
|
||||
|
||||
# Space characters for different places:
|
||||
# Space around em-dashes. "He_—_or she_—_should change that."
|
||||
public $space_emdash = " ";
|
||||
# Space around en-dashes. "He_–_or she_–_should change that."
|
||||
public $space_endash = " ";
|
||||
# Space before a colon. "He said_: here it is."
|
||||
public $space_colon = " ";
|
||||
# Space before a semicolon. "That's what I said_; that's what he said."
|
||||
public $space_semicolon = " ";
|
||||
# Space before a question mark and an exclamation mark: "¡_Holà_! What_?"
|
||||
public $space_marks = " ";
|
||||
# Space inside french quotes. "Voici la «_chose_» qui m'a attaqué."
|
||||
public $space_frenchquote = " ";
|
||||
# Space as thousand separator. "On compte 10_000 maisons sur cette liste."
|
||||
public $space_thousand = " ";
|
||||
# Space before a unit abreviation. "This 12_kg of matter costs 10_$."
|
||||
public $space_unit = " ";
|
||||
|
||||
|
||||
# Expression of a space (breakable or not):
|
||||
public $space = '(?: | | |�*160;|�*[aA]0;)';
|
||||
|
||||
class SmartyPantsTypographer extends \Michelf\_SmartyPantsTypographer_TmpImpl {
|
||||
|
||||
### Parser Implementation ###
|
||||
|
||||
# Temporarily, the implemenation is in the _SmartyPantsTypographer_TmpImpl
|
||||
# class. See note above.
|
||||
public function __construct($attr = SmartyPants::ATTR_DEFAULT) {
|
||||
#
|
||||
# Initialize a SmartyPantsTypographer_Parser with certain attributes.
|
||||
#
|
||||
# Parser attributes:
|
||||
# 0 : do nothing
|
||||
# 1 : set all, except dash spacing
|
||||
# 2 : set all, except dash spacing, using old school en- and em- dash shortcuts
|
||||
# 3 : set all, except dash spacing, using inverted old school en and em- dash shortcuts
|
||||
#
|
||||
# Punctuation:
|
||||
# q -> quotes
|
||||
# b -> backtick quotes (``double'' only)
|
||||
# B -> backtick quotes (``double'' and `single')
|
||||
# c -> comma quotes (,,double`` only)
|
||||
# g -> guillemets (<<double>> only)
|
||||
# d -> dashes
|
||||
# D -> old school dashes
|
||||
# i -> inverted old school dashes
|
||||
# e -> ellipses
|
||||
# w -> convert " entities to " for Dreamweaver users
|
||||
#
|
||||
# Spacing:
|
||||
# : -> colon spacing +-
|
||||
# ; -> semicolon spacing +-
|
||||
# m -> question and exclamation marks spacing +-
|
||||
# h -> em-dash spacing +-
|
||||
# H -> en-dash spacing +-
|
||||
# f -> french quote spacing +-
|
||||
# t -> thousand separator spacing -
|
||||
# u -> unit spacing +-
|
||||
# (you can add a plus sign after some of these options denoted by + to
|
||||
# add the space when it is not already present, or you can add a minus
|
||||
# sign to completly remove any space present)
|
||||
#
|
||||
# Initialize inherited SmartyPants parser.
|
||||
parent::__construct($attr);
|
||||
|
||||
if ($attr == "1" || $attr == "2" || $attr == "3") {
|
||||
# Do everything, turn all options on.
|
||||
$this->do_comma_quotes = 1;
|
||||
$this->do_guillemets = 1;
|
||||
$this->do_geresh_gershayim = 1;
|
||||
$this->do_space_emdash = 1;
|
||||
$this->do_space_endash = 1;
|
||||
$this->do_space_colon = 1;
|
||||
$this->do_space_semicolon = 1;
|
||||
$this->do_space_marks = 1;
|
||||
$this->do_space_frenchquote = 1;
|
||||
$this->do_space_thousand = 1;
|
||||
$this->do_space_unit = 1;
|
||||
}
|
||||
else if ($attr == "-1") {
|
||||
# Special "stupefy" mode.
|
||||
$this->do_stupefy = 1;
|
||||
}
|
||||
else {
|
||||
$chars = preg_split('//', $attr);
|
||||
foreach ($chars as $c){
|
||||
if ($c == "c") { $current =& $this->do_comma_quotes; }
|
||||
else if ($c == "g") { $current =& $this->do_guillemets; }
|
||||
else if ($c == "G") { $current =& $this->do_geresh_gershayim; }
|
||||
else if ($c == ":") { $current =& $this->do_space_colon; }
|
||||
else if ($c == ";") { $current =& $this->do_space_semicolon; }
|
||||
else if ($c == "m") { $current =& $this->do_space_marks; }
|
||||
else if ($c == "h") { $current =& $this->do_space_emdash; }
|
||||
else if ($c == "H") { $current =& $this->do_space_endash; }
|
||||
else if ($c == "f") { $current =& $this->do_space_frenchquote; }
|
||||
else if ($c == "t") { $current =& $this->do_space_thousand; }
|
||||
else if ($c == "u") { $current =& $this->do_space_unit; }
|
||||
else if ($c == "+") {
|
||||
$current = 2;
|
||||
unset($current);
|
||||
}
|
||||
else if ($c == "-") {
|
||||
$current = -1;
|
||||
unset($current);
|
||||
}
|
||||
else {
|
||||
# Unknown attribute option, ignore.
|
||||
}
|
||||
$current = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function decodeEntitiesInConfiguration() {
|
||||
parent::decodeEntitiesInConfiguration();
|
||||
$output_config_vars = array(
|
||||
'doublequote_low',
|
||||
'guillemet_leftpointing',
|
||||
'guillemet_rightpointing',
|
||||
'space_emdash',
|
||||
'space_endash',
|
||||
'space_colon',
|
||||
'space_semicolon',
|
||||
'space_marks',
|
||||
'space_frenchquote',
|
||||
'space_thousand',
|
||||
'space_unit',
|
||||
);
|
||||
foreach ($output_config_vars as $var) {
|
||||
$this->$var = html_entity_decode($this->$var);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function educate($t, $prev_token_last_char) {
|
||||
# must happen before regular smart quotes
|
||||
if ($this->do_geresh_gershayim) $t = $this->educateGereshGershayim($t);
|
||||
|
||||
$t = parent::educate($t, $prev_token_last_char);
|
||||
|
||||
if ($this->do_comma_quotes) $t = $this->educateCommaQuotes($t);
|
||||
if ($this->do_guillemets) $t = $this->educateGuillemets($t);
|
||||
|
||||
if ($this->do_space_emdash) $t = $this->spaceEmDash($t);
|
||||
if ($this->do_space_endash) $t = $this->spaceEnDash($t);
|
||||
if ($this->do_space_colon) $t = $this->spaceColon($t);
|
||||
if ($this->do_space_semicolon) $t = $this->spaceSemicolon($t);
|
||||
if ($this->do_space_marks) $t = $this->spaceMarks($t);
|
||||
if ($this->do_space_frenchquote) $t = $this->spaceFrenchQuotes($t);
|
||||
if ($this->do_space_thousand) $t = $this->spaceThousandSeparator($t);
|
||||
if ($this->do_space_unit) $t = $this->spaceUnit($t);
|
||||
|
||||
return $t;
|
||||
}
|
||||
|
||||
|
||||
protected function educateCommaQuotes($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with ,,comma,, -style double quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
# Example input: ,,Isn't this fun?,,
|
||||
# Example output: „Isn't this fun?„
|
||||
#
|
||||
# Note: this is meant to be used alongside with backtick quotes; there is
|
||||
# no language that use only lower quotations alone mark like in the example.
|
||||
#
|
||||
$_ = str_replace(",,", $this->doublequote_low, $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateGuillemets($_) {
|
||||
#
|
||||
# Parameter: String.
|
||||
# Returns: The string, with << guillemets >> -style quotes
|
||||
# translated into HTML guillemets entities.
|
||||
#
|
||||
# Example input: << Isn't this fun? >>
|
||||
# Example output: „ Isn't this fun? „
|
||||
#
|
||||
$_ = preg_replace("/(?:<|<){2}/", $this->guillemet_leftpointing, $_);
|
||||
$_ = preg_replace("/(?:>|>){2}/", $this->guillemet_rightpointing, $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function educateGereshGershayim($_) {
|
||||
#
|
||||
# Parameter: String, UTF-8 encoded.
|
||||
# Returns: The string, where simple a or double quote surrounded by
|
||||
# two hebrew characters is replaced into a typographic
|
||||
# geresh or gershayim punctuation mark.
|
||||
#
|
||||
# Example input: צה"ל / צ'ארלס
|
||||
# Example output: צה״ל / צ׳ארלס
|
||||
#
|
||||
// surrounding code points can be U+0590 to U+05BF and U+05D0 to U+05F2
|
||||
// encoded in UTF-8: D6.90 to D6.BF and D7.90 to D7.B2
|
||||
$_ = preg_replace('/(?<=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])\'(?=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])/', $this->geresh, $_);
|
||||
$_ = preg_replace('/(?<=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])"(?=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])/', $this->gershayim, $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceFrenchQuotes($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# inside french-style quotes, only french quotes.
|
||||
#
|
||||
# Example input: Quotes in « French », »German« and »Finnish» style.
|
||||
# Example output: Quotes in «_French_», »German« and »Finnish» style.
|
||||
#
|
||||
$opt = ( $this->do_space_frenchquote == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_frenchquote != -1 ? $this->space_frenchquote : '' );
|
||||
|
||||
# Characters allowed immediatly outside quotes.
|
||||
$outside_char = $this->space . '|\s|[.,:;!?\[\](){}|@*~=+-]|¡|¿';
|
||||
|
||||
$_ = preg_replace(
|
||||
"/(^|$outside_char)(«|«|›|‹)$this->space$opt/",
|
||||
"\\1\\2$chr", $_);
|
||||
$_ = preg_replace(
|
||||
"/$this->space$opt(»|»|‹|›)($outside_char|$)/",
|
||||
"$chr\\1\\2", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceColon($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# before colons.
|
||||
#
|
||||
# Example input: Ingredients : fun.
|
||||
# Example output: Ingredients_: fun.
|
||||
#
|
||||
$opt = ( $this->do_space_colon == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_colon != -1 ? $this->space_colon : '' );
|
||||
|
||||
$_ = preg_replace("/$this->space$opt(:)(\\s|$)/m",
|
||||
"$chr\\1\\2", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceSemicolon($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# before semicolons.
|
||||
#
|
||||
# Example input: There he goes ; there she goes.
|
||||
# Example output: There he goes_; there she goes.
|
||||
#
|
||||
$opt = ( $this->do_space_semicolon == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_semicolon != -1 ? $this->space_semicolon : '' );
|
||||
|
||||
$_ = preg_replace("/$this->space(;)(?=\\s|$)/m",
|
||||
" \\1", $_);
|
||||
$_ = preg_replace("/((?:^|\\s)(?>[^&;\\s]+|&#?[a-zA-Z0-9]+;)*)".
|
||||
" $opt(;)(?=\\s|$)/m",
|
||||
"\\1$chr\\2", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceMarks($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around question and exclamation marks.
|
||||
#
|
||||
# Example input: ¡ Holà ! What ?
|
||||
# Example output: ¡_Holà_! What_?
|
||||
#
|
||||
$opt = ( $this->do_space_marks == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_marks != -1 ? $this->space_marks : '' );
|
||||
|
||||
// Regular marks.
|
||||
$_ = preg_replace("/$this->space$opt([?!]+)/", "$chr\\1", $_);
|
||||
|
||||
// Inverted marks.
|
||||
$imarks = "(?:¡|¡|¡|&#x[Aa]1;|¿|¿|¿|&#x[Bb][Ff];)";
|
||||
$_ = preg_replace("/($imarks+)$this->space$opt/", "\\1$chr", $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceEmDash($_) {
|
||||
#
|
||||
# Parameters: String, two replacement characters separated by a hyphen (`-`),
|
||||
# and forcing flag.
|
||||
#
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around dashes.
|
||||
#
|
||||
# Example input: Then — without any plan — the fun happend.
|
||||
# Example output: Then_—_without any plan_—_the fun happend.
|
||||
#
|
||||
$opt = ( $this->do_space_emdash == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_emdash != -1 ? $this->space_emdash : '' );
|
||||
$_ = preg_replace("/$this->space$opt(—|—)$this->space$opt/",
|
||||
"$chr\\1$chr", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceEnDash($_) {
|
||||
#
|
||||
# Parameters: String, two replacement characters separated by a hyphen (`-`),
|
||||
# and forcing flag.
|
||||
#
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around dashes.
|
||||
#
|
||||
# Example input: Then — without any plan — the fun happend.
|
||||
# Example output: Then_—_without any plan_—_the fun happend.
|
||||
#
|
||||
$opt = ( $this->do_space_endash == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_endash != -1 ? $this->space_endash : '' );
|
||||
$_ = preg_replace("/$this->space$opt(–|–)$this->space$opt/",
|
||||
"$chr\\1$chr", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceThousandSeparator($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# inside numbers (thousand separator in french).
|
||||
#
|
||||
# Example input: Il y a 10 000 insectes amusants dans ton jardin.
|
||||
# Example output: Il y a 10_000 insectes amusants dans ton jardin.
|
||||
#
|
||||
$chr = ( $this->do_space_thousand != -1 ? $this->space_thousand : '' );
|
||||
$_ = preg_replace('/([0-9]) ([0-9])/', "\\1$chr\\2", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected $units = '
|
||||
### Metric units (with prefixes)
|
||||
(?:
|
||||
p |
|
||||
µ | µ | &\#0*181; | &\#[xX]0*[Bb]5; |
|
||||
[mcdhkMGT]
|
||||
)?
|
||||
(?:
|
||||
[mgstAKNJWCVFSTHBL]|mol|cd|rad|Hz|Pa|Wb|lm|lx|Bq|Gy|Sv|kat|
|
||||
Ω | Ohm | Ω | &\#0*937; | &\#[xX]0*3[Aa]9;
|
||||
)|
|
||||
### Computers units (KB, Kb, TB, Kbps)
|
||||
[kKMGT]?(?:[oBb]|[oBb]ps|flops)|
|
||||
### Money
|
||||
¢ | ¢ | &\#0*162; | &\#[xX]0*[Aa]2; |
|
||||
M?(?:
|
||||
£ | £ | &\#0*163; | &\#[xX]0*[Aa]3; |
|
||||
¥ | ¥ | &\#0*165; | &\#[xX]0*[Aa]5; |
|
||||
€ | € | &\#0*8364; | &\#[xX]0*20[Aa][Cc]; |
|
||||
$
|
||||
)|
|
||||
### Other units
|
||||
(?: ° | ° | &\#0*176; | &\#[xX]0*[Bb]0; ) [CF]? |
|
||||
%|pt|pi|M?px|em|en|gal|lb|[NSEOW]|[NS][EOW]|ha|mbar
|
||||
'; //x
|
||||
|
||||
protected function spaceUnit($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# before unit symbols.
|
||||
#
|
||||
# Example input: Get 3 mol of fun for 3 $.
|
||||
# Example output: Get 3_mol of fun for 3_$.
|
||||
#
|
||||
$opt = ( $this->do_space_unit == 2 ? '?' : '' );
|
||||
$chr = ( $this->do_space_unit != -1 ? $this->space_unit : '' );
|
||||
|
||||
$_ = preg_replace('/
|
||||
(?:([0-9])[ ]'.$opt.') # Number followed by space.
|
||||
('.$this->units.') # Unit.
|
||||
(?![a-zA-Z0-9]) # Negative lookahead for other unit characters.
|
||||
/x',
|
||||
"\\1$chr\\2", $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function spaceAbbr($_) {
|
||||
#
|
||||
# Parameters: String, replacement character, and forcing flag.
|
||||
# Returns: The string, with appropriates spaces replaced
|
||||
# around abbreviations.
|
||||
#
|
||||
# Example input: Fun i.e. something pleasant.
|
||||
# Example output: Fun i.e._something pleasant.
|
||||
#
|
||||
$opt = ( $this->do_space_abbr == 2 ? '?' : '' );
|
||||
|
||||
$_ = preg_replace("/(^|\s)($this->abbr_after) $opt/m",
|
||||
"\\1\\2$this->space_abbr", $_);
|
||||
$_ = preg_replace("/( )$opt($this->abbr_sp_before)(?![a-zA-Z'])/m",
|
||||
"\\1$this->space_abbr\\2", $_);
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function stupefyEntities($_) {
|
||||
#
|
||||
# Adding angle quotes and lower quotes to SmartyPants's stupefy mode.
|
||||
#
|
||||
$_ = parent::stupefyEntities($_);
|
||||
|
||||
$_ = str_replace(array('„', '«', '»'), '"', $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
|
||||
|
||||
protected function processEscapes($_) {
|
||||
#
|
||||
# Adding a few more escapes to SmartyPants's escapes:
|
||||
#
|
||||
# Escape Value
|
||||
# ------ -----
|
||||
# \, ,
|
||||
# \< <
|
||||
# \> >
|
||||
#
|
||||
$_ = parent::processEscapes($_);
|
||||
|
||||
$_ = str_replace(
|
||||
array('\,', '\<', '\>', '\<', '\>'),
|
||||
array(',', '<', '>', '<', '>'), $_);
|
||||
|
||||
return $_;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue