# # Original SmartyPants # Copyright (c) 2003-2004 John Gruber # # namespace Michelf; # # SmartyPants Typographer Parser Class # class SmartyPantsTypographer extends \Michelf\SmartyPants { ### Configuration Variables ### # Options to specify which transformations to make: public $do_comma_quotes = 0; public $do_guillemets = 0; public $do_geresh_gershayim = 0; public $do_space_emdash = 0; public $do_space_endash = 0; public $do_space_colon = 0; public $do_space_semicolon = 0; public $do_space_marks = 0; public $do_space_frenchquote = 0; public $do_space_thousand = 0; public $do_space_unit = 0; # Quote characters for replacing ASCII approximations public $doublequote_low = "„"; // replacement for ,, public $guillemet_leftpointing = "«"; // replacement for << public $guillemet_rightpointing = "»"; // replacement for >> public $geresh = "׳"; public $gershayim = "״"; # Space characters for different places: # Space around em-dashes. "He_—_or she_—_should change that." public $space_emdash = " "; # Space around en-dashes. "He_–_or she_–_should change that." public $space_endash = " "; # Space before a colon. "He said_: here it is." public $space_colon = " "; # Space before a semicolon. "That's what I said_; that's what he said." public $space_semicolon = " "; # Space before a question mark and an exclamation mark: "¡_Holà_! What_?" public $space_marks = " "; # Space inside french quotes. "Voici la «_chose_» qui m'a attaqué." public $space_frenchquote = " "; # Space as thousand separator. "On compte 10_000 maisons sur cette liste." public $space_thousand = " "; # Space before a unit abreviation. "This 12_kg of matter costs 10_$." public $space_unit = " "; # Expression of a space (breakable or not): public $space = '(?: | | |�*160;|�*[aA]0;)'; ### Parser Implementation ### public function __construct($attr = SmartyPants::ATTR_DEFAULT) { # # Initialize a SmartyPantsTypographer_Parser with certain attributes. # # Parser attributes: # 0 : do nothing # 1 : set all, except dash spacing # 2 : set all, except dash spacing, using old school en- and em- dash shortcuts # 3 : set all, except dash spacing, using inverted old school en and em- dash shortcuts # # Punctuation: # q -> quotes # b -> backtick quotes (``double'' only) # B -> backtick quotes (``double'' and `single') # c -> comma quotes (,,double`` only) # g -> guillemets (<> only) # d -> dashes # D -> old school dashes # i -> inverted old school dashes # e -> ellipses # w -> convert " entities to " for Dreamweaver users # # Spacing: # : -> colon spacing +- # ; -> semicolon spacing +- # m -> question and exclamation marks spacing +- # h -> em-dash spacing +- # H -> en-dash spacing +- # f -> french quote spacing +- # t -> thousand separator spacing - # u -> unit spacing +- # (you can add a plus sign after some of these options denoted by + to # add the space when it is not already present, or you can add a minus # sign to completly remove any space present) # # Initialize inherited SmartyPants parser. parent::__construct($attr); if ($attr == "1" || $attr == "2" || $attr == "3") { # Do everything, turn all options on. $this->do_comma_quotes = 1; $this->do_guillemets = 1; $this->do_geresh_gershayim = 1; $this->do_space_emdash = 1; $this->do_space_endash = 1; $this->do_space_colon = 1; $this->do_space_semicolon = 1; $this->do_space_marks = 1; $this->do_space_frenchquote = 1; $this->do_space_thousand = 1; $this->do_space_unit = 1; } else if ($attr == "-1") { # Special "stupefy" mode. $this->do_stupefy = 1; } else { $chars = preg_split('//', $attr); foreach ($chars as $c){ if ($c == "c") { $current =& $this->do_comma_quotes; } else if ($c == "g") { $current =& $this->do_guillemets; } else if ($c == "G") { $current =& $this->do_geresh_gershayim; } else if ($c == ":") { $current =& $this->do_space_colon; } else if ($c == ";") { $current =& $this->do_space_semicolon; } else if ($c == "m") { $current =& $this->do_space_marks; } else if ($c == "h") { $current =& $this->do_space_emdash; } else if ($c == "H") { $current =& $this->do_space_endash; } else if ($c == "f") { $current =& $this->do_space_frenchquote; } else if ($c == "t") { $current =& $this->do_space_thousand; } else if ($c == "u") { $current =& $this->do_space_unit; } else if ($c == "+") { $current = 2; unset($current); } else if ($c == "-") { $current = -1; unset($current); } else { # Unknown attribute option, ignore. } $current = 1; } } } function decodeEntitiesInConfiguration() { parent::decodeEntitiesInConfiguration(); $output_config_vars = array( 'doublequote_low', 'guillemet_leftpointing', 'guillemet_rightpointing', 'space_emdash', 'space_endash', 'space_colon', 'space_semicolon', 'space_marks', 'space_frenchquote', 'space_thousand', 'space_unit', ); foreach ($output_config_vars as $var) { $this->$var = html_entity_decode($this->$var); } } function educate($t, $prev_token_last_char) { # must happen before regular smart quotes if ($this->do_geresh_gershayim) $t = $this->educateGereshGershayim($t); $t = parent::educate($t, $prev_token_last_char); if ($this->do_comma_quotes) $t = $this->educateCommaQuotes($t); if ($this->do_guillemets) $t = $this->educateGuillemets($t); if ($this->do_space_emdash) $t = $this->spaceEmDash($t); if ($this->do_space_endash) $t = $this->spaceEnDash($t); if ($this->do_space_colon) $t = $this->spaceColon($t); if ($this->do_space_semicolon) $t = $this->spaceSemicolon($t); if ($this->do_space_marks) $t = $this->spaceMarks($t); if ($this->do_space_frenchquote) $t = $this->spaceFrenchQuotes($t); if ($this->do_space_thousand) $t = $this->spaceThousandSeparator($t); if ($this->do_space_unit) $t = $this->spaceUnit($t); return $t; } protected function educateCommaQuotes($_) { # # Parameter: String. # Returns: The string, with ,,comma,, -style double quotes # translated into HTML curly quote entities. # # Example input: ,,Isn't this fun?,, # Example output: „Isn't this fun?„ # # Note: this is meant to be used alongside with backtick quotes; there is # no language that use only lower quotations alone mark like in the example. # $_ = str_replace(",,", $this->doublequote_low, $_); return $_; } protected function educateGuillemets($_) { # # Parameter: String. # Returns: The string, with << guillemets >> -style quotes # translated into HTML guillemets entities. # # Example input: << Isn't this fun? >> # Example output: „ Isn't this fun? „ # $_ = preg_replace("/(?:<|<){2}/", $this->guillemet_leftpointing, $_); $_ = preg_replace("/(?:>|>){2}/", $this->guillemet_rightpointing, $_); return $_; } protected function educateGereshGershayim($_) { # # Parameter: String, UTF-8 encoded. # Returns: The string, where simple a or double quote surrounded by # two hebrew characters is replaced into a typographic # geresh or gershayim punctuation mark. # # Example input: צה"ל / צ'ארלס # Example output: צה״ל / צ׳ארלס # // surrounding code points can be U+0590 to U+05BF and U+05D0 to U+05F2 // encoded in UTF-8: D6.90 to D6.BF and D7.90 to D7.B2 $_ = preg_replace('/(?<=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])\'(?=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])/', $this->geresh, $_); $_ = preg_replace('/(?<=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])"(?=\xD6[\x90-\xBF]|\xD7[\x90-\xB2])/', $this->gershayim, $_); return $_; } protected function spaceFrenchQuotes($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # inside french-style quotes, only french quotes. # # Example input: Quotes in « French », »German« and »Finnish» style. # Example output: Quotes in «_French_», »German« and »Finnish» style. # $opt = ( $this->do_space_frenchquote == 2 ? '?' : '' ); $chr = ( $this->do_space_frenchquote != -1 ? $this->space_frenchquote : '' ); # Characters allowed immediatly outside quotes. $outside_char = $this->space . '|\s|[.,:;!?\[\](){}|@*~=+-]|¡|¿'; $_ = preg_replace( "/(^|$outside_char)(«|«|›|‹)$this->space$opt/", "\\1\\2$chr", $_); $_ = preg_replace( "/$this->space$opt(»|»|‹|›)($outside_char|$)/", "$chr\\1\\2", $_); return $_; } protected function spaceColon($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # before colons. # # Example input: Ingredients : fun. # Example output: Ingredients_: fun. # $opt = ( $this->do_space_colon == 2 ? '?' : '' ); $chr = ( $this->do_space_colon != -1 ? $this->space_colon : '' ); $_ = preg_replace("/$this->space$opt(:)(\\s|$)/m", "$chr\\1\\2", $_); return $_; } protected function spaceSemicolon($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # before semicolons. # # Example input: There he goes ; there she goes. # Example output: There he goes_; there she goes. # $opt = ( $this->do_space_semicolon == 2 ? '?' : '' ); $chr = ( $this->do_space_semicolon != -1 ? $this->space_semicolon : '' ); $_ = preg_replace("/$this->space(;)(?=\\s|$)/m", " \\1", $_); $_ = preg_replace("/((?:^|\\s)(?>[^&;\\s]+|&#?[a-zA-Z0-9]+;)*)". " $opt(;)(?=\\s|$)/m", "\\1$chr\\2", $_); return $_; } protected function spaceMarks($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # around question and exclamation marks. # # Example input: ¡ Holà ! What ? # Example output: ¡_Holà_! What_? # $opt = ( $this->do_space_marks == 2 ? '?' : '' ); $chr = ( $this->do_space_marks != -1 ? $this->space_marks : '' ); // Regular marks. $_ = preg_replace("/$this->space$opt([?!]+)/", "$chr\\1", $_); // Inverted marks. $imarks = "(?:¡|¡|¡|&#x[Aa]1;|¿|¿|¿|&#x[Bb][Ff];)"; $_ = preg_replace("/($imarks+)$this->space$opt/", "\\1$chr", $_); return $_; } protected function spaceEmDash($_) { # # Parameters: String, two replacement characters separated by a hyphen (`-`), # and forcing flag. # # Returns: The string, with appropriates spaces replaced # around dashes. # # Example input: Then — without any plan — the fun happend. # Example output: Then_—_without any plan_—_the fun happend. # $opt = ( $this->do_space_emdash == 2 ? '?' : '' ); $chr = ( $this->do_space_emdash != -1 ? $this->space_emdash : '' ); $_ = preg_replace("/$this->space$opt(—|—)$this->space$opt/", "$chr\\1$chr", $_); return $_; } protected function spaceEnDash($_) { # # Parameters: String, two replacement characters separated by a hyphen (`-`), # and forcing flag. # # Returns: The string, with appropriates spaces replaced # around dashes. # # Example input: Then — without any plan — the fun happend. # Example output: Then_—_without any plan_—_the fun happend. # $opt = ( $this->do_space_endash == 2 ? '?' : '' ); $chr = ( $this->do_space_endash != -1 ? $this->space_endash : '' ); $_ = preg_replace("/$this->space$opt(–|–)$this->space$opt/", "$chr\\1$chr", $_); return $_; } protected function spaceThousandSeparator($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # inside numbers (thousand separator in french). # # Example input: Il y a 10 000 insectes amusants dans ton jardin. # Example output: Il y a 10_000 insectes amusants dans ton jardin. # $chr = ( $this->do_space_thousand != -1 ? $this->space_thousand : '' ); $_ = preg_replace('/([0-9]) ([0-9])/', "\\1$chr\\2", $_); return $_; } protected $units = ' ### Metric units (with prefixes) (?: p | µ | µ | &\#0*181; | &\#[xX]0*[Bb]5; | [mcdhkMGT] )? (?: [mgstAKNJWCVFSTHBL]|mol|cd|rad|Hz|Pa|Wb|lm|lx|Bq|Gy|Sv|kat| Ω | Ohm | Ω | &\#0*937; | &\#[xX]0*3[Aa]9; )| ### Computers units (KB, Kb, TB, Kbps) [kKMGT]?(?:[oBb]|[oBb]ps|flops)| ### Money ¢ | ¢ | &\#0*162; | &\#[xX]0*[Aa]2; | M?(?: £ | £ | &\#0*163; | &\#[xX]0*[Aa]3; | ¥ | ¥ | &\#0*165; | &\#[xX]0*[Aa]5; | € | € | &\#0*8364; | &\#[xX]0*20[Aa][Cc]; | $ )| ### Other units (?: ° | ° | &\#0*176; | &\#[xX]0*[Bb]0; ) [CF]? | %|pt|pi|M?px|em|en|gal|lb|[NSEOW]|[NS][EOW]|ha|mbar '; //x protected function spaceUnit($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # before unit symbols. # # Example input: Get 3 mol of fun for 3 $. # Example output: Get 3_mol of fun for 3_$. # $opt = ( $this->do_space_unit == 2 ? '?' : '' ); $chr = ( $this->do_space_unit != -1 ? $this->space_unit : '' ); $_ = preg_replace('/ (?:([0-9])[ ]'.$opt.') # Number followed by space. ('.$this->units.') # Unit. (?![a-zA-Z0-9]) # Negative lookahead for other unit characters. /x', "\\1$chr\\2", $_); return $_; } protected function spaceAbbr($_) { # # Parameters: String, replacement character, and forcing flag. # Returns: The string, with appropriates spaces replaced # around abbreviations. # # Example input: Fun i.e. something pleasant. # Example output: Fun i.e._something pleasant. # $opt = ( $this->do_space_abbr == 2 ? '?' : '' ); $_ = preg_replace("/(^|\s)($this->abbr_after) $opt/m", "\\1\\2$this->space_abbr", $_); $_ = preg_replace("/( )$opt($this->abbr_sp_before)(?![a-zA-Z'])/m", "\\1$this->space_abbr\\2", $_); return $_; } protected function stupefyEntities($_) { # # Adding angle quotes and lower quotes to SmartyPants's stupefy mode. # $_ = parent::stupefyEntities($_); $_ = str_replace(array('„', '«', '»'), '"', $_); return $_; } protected function processEscapes($_) { # # Adding a few more escapes to SmartyPants's escapes: # # Escape Value # ------ ----- # \, , # \< < # \> > # $_ = parent::processEscapes($_); $_ = str_replace( array('\,', '\<', '\>', '\<', '\>'), array(',', '<', '>', '<', '>'), $_); return $_; } }