Changeset 27403 for trunk


Ignore:
Timestamp:
Feb 18, 2014, 6:32:36 PM (10 years ago)
Author:
mistic100
Message:

update emogrifier

Location:
trunk/include
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/emogrifier.class.php

    r26972 r27403  
    11<?php
    2 /*
    3 UPDATES
    4 
    5     2008-08-10  Fixed CSS comment stripping regex to add PCRE_DOTALL (changed from '/\/\*.*\*\//U' to '/\/\*.*\*\//sU')
    6     2008-08-18  Added lines instructing DOMDocument to attempt to normalize HTML before processing
    7     2008-10-20  Fixed bug with bad variable name... Thanks Thomas!
    8     2008-03-02  Added licensing terms under the MIT License
    9                 Only remove unprocessable HTML tags if they exist in the array
    10     2009-06-03  Normalize existing CSS (style) attributes in the HTML before we process the CSS.
    11                 Made it so that the display:none stripper doesn't require a trailing semi-colon.
    12     2009-08-13  Added support for subset class values (e.g. "p.class1.class2").
    13                 Added better protection for bad css attributes.
    14                 Fixed support for HTML entities.
    15     2009-08-17  Fixed CSS selector processing so that selectors are processed by precedence/specificity, and not just in order.
    16     2009-10-29  Fixed so that selectors appearing later in the CSS will have precedence over identical selectors appearing earlier.
    17     2009-11-04  Explicitly declared static functions static to get rid of E_STRICT notices.
    18     2010-05-18  Fixed bug where full url filenames with protocols wouldn't get split improperly when we explode on ':'... Thanks Mark!
    19                 Added two new attribute selectors
    20     2010-06-16  Added static caching for less processing overhead in situations where multiple emogrification takes place
    21     2010-07-26  Fixed bug where '0' values were getting discarded because of php's empty() function... Thanks Scott!
    22     2010-09-03  Added checks to invisible node removal to ensure that we don't try to remove non-existent child nodes of parents that have already been deleted
    23     2011-04-08  Fixed errors in CSS->XPath conversion for adjacent sibling selectors and id/class combinations... Thanks Bob V.!
    24     2011-06-08  Fixed an error where CSS @media types weren't being parsed correctly... Thanks Will W.!
    25     2011-08-03  Fixed an error where an empty selector at the beginning of the CSS would cause a parse error on the next selector... Thanks Alexei T.!
    26     2011-10-13  Fully fixed a bug introduced in 2011-06-08 where selectors at the beginning of the CSS would be parsed incorrectly... Thanks Thomas A.!
    27     2011-10-26  Added an option to allow you to output emogrified code without extended characters being turned into HTML entities.
    28                 Moved static references to class attributes so they can be manipulated.
    29                 Added the ability to clear out the (formerly) static cache when CSS is reloaded.
    30     2011-12-22  Fixed a bug that was overwriting existing inline styles from the original HTML... Thanks Sagi L.!
    31     2012-01-31  Fixed a bug that was introduced with the 2011-12-22 revision... Thanks Sagi L. and M. Bąkowski!
    32                 Added extraction of <style> blocks within the HTML due to popular demand.
    33                 Added several new pseudo-selectors (first-child, last-child, nth-child, and nth-of-type).
    34     2012-02-07  Fixed some recent code introductions to use class constants rather than global constants.
    35                 Fixed some recent code introductions to make it cleaner to read.
    36     2012-05-01  Made removal of invisible nodes operate in a case-insensitive manner... Thanks Juha P.!
    37     2013-10-10  Add preserveStyleTag option
    38     2014-01-26  PHP 5.5 compatibility (/e modifier is deprecated in preg_replace)
    39 */
    40 
    41 define('CACHE_CSS', 0);
    42 define('CACHE_SELECTOR', 1);
    43 define('CACHE_XPATH', 2);
    44 
     2
     3/**
     4 * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
     5 *
     6 * For more information, please see the README.md file.
     7 *
     8 * @author Cameron Brooks
     9 * @author Jaime Prado
     10 */
    4511class Emogrifier {
    46 
    47     // for calculating nth-of-type and nth-child selectors
     12    /**
     13     * @var string
     14     */
     15    const ENCODING = 'UTF-8';
     16
     17    /**
     18     * @var integer
     19     */
     20    const CACHE_KEY_CSS = 0;
     21
     22    /**
     23     * @var integer
     24     */
     25    const CACHE_KEY_SELECTOR = 1;
     26
     27    /**
     28     * @var integer
     29     */
     30    const CACHE_KEY_XPATH = 2;
     31
     32    /**
     33     * for calculating nth-of-type and nth-child selectors
     34     *
     35     * @var integer
     36     */
    4837    const INDEX = 0;
     38
     39    /**
     40     * for calculating nth-of-type and nth-child selectors
     41     *
     42     * @var integer
     43     */
    4944    const MULTIPLIER = 1;
    5045
     46    /**
     47     * @var string
     48     */
     49    const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
     50
     51    /**
     52     * @var string
     53     */
     54    const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
     55
     56    /**
     57     * @var string
     58     */
    5159    private $html = '';
     60
     61    /**
     62     * @var string
     63     */
    5264    private $css = '';
    53     private $unprocessableHTMLTags = array('wbr');
    54     private $caches = array();
    55 
    56     // this attribute applies to the case where you want to preserve your original text encoding.
    57     // by default, emogrifier translates your text into HTML entities for two reasons:
    58     // 1. because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email
    59     // 2. it translates any illegal XML characters that DOMDocument cannot work with
    60     // if you would like to preserve your original encoding, set this attribute to true.
    61     public $preserveEncoding = false;
    62    
    63     // by default, emogrifier removes <style> tags, set preserveStyleTag to true to keep them
    64     public $preserveStyleTag = false;
    65 
     65
     66    /**
     67     * @var array<string>
     68     */
     69    private $unprocessableHtmlTags = array('wbr');
     70
     71    /**
     72     * @var array<array>
     73     */
     74    private $caches = array(
     75        self::CACHE_KEY_CSS => array(),
     76        self::CACHE_KEY_SELECTOR => array(),
     77        self::CACHE_KEY_XPATH => array(),
     78    );
     79
     80    /**
     81     * the visited nodes with the XPath paths as array keys
     82     *
     83     * @var array<\DOMNode>
     84     */
     85    private $visitedNodes = array();
     86
     87    /**
     88     * the styles to apply to the nodes with the XPath paths as array keys for the outer array and the attribute names/values
     89     * as key/value pairs for the inner array
     90     *
     91     * @var array<array><string>
     92     */
     93    private $styleAttributesForNodes = array();
     94
     95    /**
     96     * This attribute applies to the case where you want to preserve your original text encoding.
     97     *
     98     * By default, emogrifier translates your text into HTML entities for two reasons:
     99     *
     100     * 1. Because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email.
     101     *
     102     * 2. It translates any illegal XML characters that DOMDocument cannot work with.
     103     *
     104     * If you would like to preserve your original encoding, set this attribute to TRUE.
     105     *
     106     * @var boolean
     107     */
     108    public $preserveEncoding = FALSE;
     109
     110    /**
     111     * The constructor.
     112     *
     113     * @param string $html the HTML to emogrify, must be UTF-8-encoded
     114     * @param string $css the CSS to merge, must be UTF-8-encoded
     115     */
    66116    public function __construct($html = '', $css = '') {
     117        $this->setHtml($html);
     118        $this->setCss($css);
     119    }
     120
     121    /**
     122     * The destructor.
     123     */
     124    public function __destruct() {
     125        $this->purgeVisitedNodes();
     126    }
     127
     128    /**
     129     * Sets the HTML to emogrify.
     130     *
     131     * @param string $html the HTML to emogrify, must be UTF-8-encoded
     132     *
     133     * @return void
     134     */
     135    public function setHtml($html = '') {
    67136        $this->html = $html;
    68         $this->css  = $css;
    69         $this->clearCache();
    70     }
    71 
    72     public function setHTML($html = '') { $this->html = $html; }
    73     public function setCSS($css = '') {
     137    }
     138
     139    /**
     140     * Sets the CSS to merge with the HTML.
     141     *
     142     * @param string $css the CSS to merge, must be UTF-8-encoded
     143     *
     144     * @return void
     145     */
     146    public function setCss($css = '') {
    74147        $this->css = $css;
    75         $this->clearCache(CACHE_CSS);
    76     }
    77 
    78     public function clearCache($key = null) {
    79         if (!is_null($key)) {
    80             if (isset($this->caches[$key])) $this->caches[$key] = array();
    81         } else {
    82             $this->caches = array(
    83                 CACHE_CSS       => array(),
    84                 CACHE_SELECTOR  => array(),
    85                 CACHE_XPATH     => array(),
    86             );
    87         }
    88     }
    89 
    90     // there are some HTML tags that DOMDocument cannot process, and will throw an error if it encounters them.
    91     // in particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
    92     // these functions allow you to add/remove them if necessary.
    93     // it only strips them from the code (does not remove actual nodes).
    94     public function addUnprocessableHTMLTag($tag) { $this->unprocessableHTMLTags[] = $tag; }
    95     public function removeUnprocessableHTMLTag($tag) {
    96         if (($key = array_search($tag,$this->unprocessableHTMLTags)) !== false)
    97             unset($this->unprocessableHTMLTags[$key]);
    98     }
    99 
    100     // applies the CSS you submit to the html you submit. places the css inline
     148    }
     149
     150    /**
     151     * Clears all caches.
     152     *
     153     * @return void
     154     */
     155    private function clearAllCaches() {
     156        $this->clearCache(self::CACHE_KEY_CSS);
     157        $this->clearCache(self::CACHE_KEY_SELECTOR);
     158        $this->clearCache(self::CACHE_KEY_XPATH);
     159    }
     160
     161    /**
     162     * Clears a single cache by key.
     163     *
     164     * @param integer $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR or CACHE_KEY_XPATH
     165     *
     166     * @return void
     167     *
     168     * @throws \InvalidArgumentException
     169     */
     170    private function clearCache($key) {
     171        $allowedCacheKeys = array(self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH);
     172        if (!in_array($key, $allowedCacheKeys, TRUE)) {
     173            throw new \InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
     174        }
     175
     176        $this->caches[$key] = array();
     177    }
     178
     179    /**
     180     * Purges the visited nodes.
     181     *
     182     * @return void
     183     */
     184    private function purgeVisitedNodes() {
     185        $this->visitedNodes = array();
     186        $this->styleAttributesForNodes = array();
     187    }
     188
     189    /**
     190     * Marks a tag for removal.
     191     *
     192     * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
     193     * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
     194     *
     195     * Note: The tags will not be removed if they have any content.
     196     *
     197     * @param string $tagName the tag name, e.g., "p"
     198     *
     199     * @return void
     200     */
     201    public function addUnprocessableHtmlTag($tagName) {
     202        $this->unprocessableHtmlTags[] = $tagName;
     203    }
     204
     205    /**
     206     * Drops a tag from the removal list.
     207     *
     208     * @param string $tagName the tag name, e.g., "p"
     209     *
     210     * @return void
     211     */
     212    public function removeUnprocessableHtmlTag($tagName) {
     213        $key = array_search($tagName, $this->unprocessableHtmlTags, TRUE);
     214        if ($key !== FALSE) {
     215            unset($this->unprocessableHtmlTags[$key]);
     216        }
     217    }
     218
     219    /**
     220     * Applies the CSS you submit to the HTML you submit.
     221     *
     222     * This method places the CSS inline.
     223     *
     224     * @return string
     225     *
     226     * @throws \BadMethodCallException
     227     */
    101228    public function emogrify() {
    102         $body = $this->html;
    103 
    104         // remove any unprocessable HTML tags (tags that DOMDocument cannot parse; this includes wbr and many new HTML5 tags)
    105         if (count($this->unprocessableHTMLTags)) {
    106             $unprocessableHTMLTags = implode('|',$this->unprocessableHTMLTags);
    107             $body = preg_replace("/<\/?($unprocessableHTMLTags)[^>]*>/i",'',$body);
    108         }
    109 
    110         $encoding = mb_detect_encoding($body);
    111         $body = mb_convert_encoding($body, 'HTML-ENTITIES', $encoding);
    112 
    113         $xmldoc = new DOMDocument;
    114         $xmldoc->encoding = $encoding;
    115         $xmldoc->strictErrorChecking = false;
    116         $xmldoc->formatOutput = true;
    117         $xmldoc->loadHTML($body);
    118         $xmldoc->normalizeDocument();
    119 
    120         $xpath = new DOMXPath($xmldoc);
     229        if ($this->html === '') {
     230            throw new \BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
     231        }
     232
     233        $xmlDocument = $this->createXmlDocument();
     234        $xpath = new \DOMXPath($xmlDocument);
     235        $this->clearAllCaches();
    121236
    122237        // before be begin processing the CSS file, parse the document and normalize all existing CSS attributes (changes 'DISPLAY: none' to 'display: none');
    123238        // we wouldn't have to do this if DOMXPath supported XPath 2.0.
    124239        // also store a reference of nodes with existing inline styles so we don't overwrite them
    125         $vistedNodes = $vistedNodeRef = array();
    126         $nodes = @$xpath->query('//*[@style]');
    127         foreach ($nodes as $node) {
    128             $normalizedOrigStyle = preg_replace_callback('/[A-z\-]+(?=\:)/S',create_function('$m', 'return strtolower($m[0]);'),$node->getAttribute('style'));
    129 
    130             // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles
    131             $nodeKey = md5($node->getNodePath());
    132             if (!isset($vistedNodeRef[$nodeKey])) {
    133                 $vistedNodeRef[$nodeKey] = $this->cssStyleDefinitionToArray($normalizedOrigStyle);
    134                 $vistedNodes[$nodeKey]   = $node;
    135             }
    136 
    137             $node->setAttribute('style', $normalizedOrigStyle);
     240        $this->purgeVisitedNodes();
     241
     242        $nodesWithStyleAttributes = $xpath->query('//*[@style]');
     243        if ($nodesWithStyleAttributes !== FALSE) {
     244            $callback = create_function('$m', 'return strtolower($m[0]);');
     245
     246            /** @var $nodeWithStyleAttribute \DOMNode */
     247            foreach ($nodesWithStyleAttributes as $node) {
     248                $normalizedOriginalStyle = preg_replace_callback(
     249                    '/[A-z\\-]+(?=\\:)/S',
     250                    $callback,
     251                    $node->getAttribute('style')
     252                );
     253
     254                // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles
     255                $nodePath = $node->getNodePath();
     256                if (!isset($this->styleAttributesForNodes[$nodePath])) {
     257                    $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle);
     258                    $this->visitedNodes[$nodePath] = $node;
     259                }
     260
     261                $node->setAttribute('style', $normalizedOriginalStyle);
     262            }
    138263        }
    139264
     
    141266        // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
    142267        $css = $this->css;
    143         $nodes = @$xpath->query('//style');
    144         foreach ($nodes as $node) {
    145             // append the css
    146             $css .= "\n\n{$node->nodeValue}";
    147             // remove the <style> node
    148             if (!$this->preserveStyleTag) {
    149                 $node->parentNode->removeChild($node);
     268        $styleNodes = $xpath->query('//style');
     269        if ($styleNodes !== FALSE) {
     270            /** @var $styleNode \DOMNode */
     271            foreach ($styleNodes as $styleNode) {
     272                // append the css
     273                $css .= "\n\n" . $styleNode->nodeValue;
     274                // remove the <style> node
     275                $styleNode->parentNode->removeChild($styleNode);
    150276            }
    151277        }
     
    153279        // filter the CSS
    154280        $search = array(
    155             '/\/\*.*\*\//sU', // get rid of css comment code
    156             '/^\s*@import\s[^;]+;/misU', // strip out any import directives
    157             '/^\s*@media\s[^{]+{\s*}/misU', // strip any empty media enclosures
    158             '/^\s*@media\s+((aural|braille|embossed|handheld|print|projection|speech|tty|tv)\s*,*\s*)+{.*}\s*}/misU', // strip out all media types that are not 'screen' or 'all' (these don't apply to email)
    159             '/^\s*@media\s[^{]+{(.*})\s*}/misU', // get rid of remaining media type enclosures
     281            // get rid of css comment code
     282            '/\\/\\*.*\\*\\//sU',
     283            // strip out any import directives
     284            '/^\\s*@import\\s[^;]+;/misU',
     285            // strip any empty media enclosures
     286            '/^\\s*@media\\s[^{]+{\\s*}/misU',
     287            // strip out all media rules that are not 'screen' or 'all' (these don't apply to email)
     288            '/^\\s*@media\\s+((aural|braille|embossed|handheld|print|projection|speech|tty|tv)\\s*,*\\s*)+{.*}\\s*}/misU',
     289            // get rid of remaining media type rules
     290            '/^\\s*@media\\s[^{]+{(.*})\\s*}/misU',
    160291        );
    161292
     
    170301        $css = preg_replace($search, $replace, $css);
    171302
    172         $csskey = md5($css);
    173         if (!isset($this->caches[CACHE_CSS][$csskey])) {
    174 
     303        $cssKey = md5($css);
     304        if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
    175305            // process the CSS file for selectors and definitions
    176             preg_match_all('/(^|[^{}])\s*([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER);
    177 
    178             $all_selectors = array();
     306            preg_match_all('/(?:^|[^{}])\\s*([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER);
     307
     308            $allSelectors = array();
    179309            foreach ($matches as $key => $selectorString) {
    180310                // if there is a blank definition, skip
    181                 if (!strlen(trim($selectorString[3]))) continue;
     311                if (!strlen(trim($selectorString[2]))) {
     312                    continue;
     313                }
    182314
    183315                // else split by commas and duplicate attributes so we can sort by selector precedence
    184                 $selectors = explode(',',$selectorString[2]);
     316                $selectors = explode(',', $selectorString[1]);
    185317                foreach ($selectors as $selector) {
    186 
    187318                    // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; ONLY allow structural pseudo-classes
    188                     if (strpos($selector, ':') !== false && !preg_match('/:\S+\-(child|type)\(/i', $selector)) continue;
    189 
    190                     $all_selectors[] = array('selector' => trim($selector),
    191                                              'attributes' => trim($selectorString[3]),
    192                                              'line' => $key, // keep track of where it appears in the file, since order is important
     319                    if (strpos($selector, ':') !== FALSE && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)) {
     320                        continue;
     321                    }
     322
     323                    $allSelectors[] = array('selector' => trim($selector),
     324                                             'attributes' => trim($selectorString[2]),
     325                                             // keep track of where it appears in the file, since order is important
     326                                             'line' => $key,
    193327                    );
    194328                }
     
    196330
    197331            // now sort the selectors by precedence
    198             usort($all_selectors, array($this,'sortBySelectorPrecedence'));
    199 
    200             $this->caches[CACHE_CSS][$csskey] = $all_selectors;
    201         }
    202 
    203         foreach ($this->caches[CACHE_CSS][$csskey] as $value) {
    204 
     332            usort($allSelectors, array($this,'sortBySelectorPrecedence'));
     333
     334            $this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors;
     335        }
     336
     337        foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) {
    205338            // query the body for the xpath selector
    206             $nodes = $xpath->query($this->translateCSStoXpath(trim($value['selector'])));
    207 
    208             foreach($nodes as $node) {
     339            $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath(trim($value['selector'])));
     340
     341            /** @var $node \DOMNode */
     342            foreach ($nodesMatchingCssSelectors as $node) {
    209343                // if it has a style attribute, get it, process it, and append (overwrite) new stuff
    210344                if ($node->hasAttribute('style')) {
    211345                    // break it up into an associative array
    212                     $oldStyleArr = $this->cssStyleDefinitionToArray($node->getAttribute('style'));
    213                     $newStyleArr = $this->cssStyleDefinitionToArray($value['attributes']);
     346                    $oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style'));
     347                    $newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']);
    214348
    215349                    // new styles overwrite the old styles (not technically accurate, but close enough)
    216                     $combinedArr = array_merge($oldStyleArr,$newStyleArr);
     350                    $combinedArray = array_merge($oldStyleDeclarations, $newStyleDeclarations);
    217351                    $style = '';
    218                     foreach ($combinedArr as $k => $v) $style .= (strtolower($k) . ':' . $v . ';');
     352                    foreach ($combinedArray as $attributeName => $attributeValue) {
     353                        $style .= (strtolower($attributeName) . ':' . $attributeValue . ';');
     354                    }
    219355                } else {
    220356                    // otherwise create a new style
     
    226362
    227363        // now iterate through the nodes that contained inline styles in the original HTML
    228         foreach ($vistedNodeRef as $nodeKey => $origStyleArr) {
    229             $node = $vistedNodes[$nodeKey];
    230             $currStyleArr = $this->cssStyleDefinitionToArray($node->getAttribute('style'));
    231 
    232             $combinedArr = array_merge($currStyleArr, $origStyleArr);
     364        foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
     365            $node = $this->visitedNodes[$nodePath];
     366            $currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style'));
     367
     368            $combinedArray = array_merge($currentStyleAttributes, $styleAttributesForNode);
    233369            $style = '';
    234             foreach ($combinedArr as $k => $v) $style .= (strtolower($k) . ':' . $v . ';');
     370            foreach ($combinedArray as $attributeName => $attributeValue) {
     371                $style .= (strtolower($attributeName) . ':' . $attributeValue . ';');
     372            }
    235373
    236374            $node->setAttribute('style', $style);
     
    241379        // lower-case() isn't available to us. We've thus far only set attributes to lowercase, not attribute values. Consequently, we need
    242380        // to translate() the letters that would be in 'NONE' ("NOE") to lowercase.
    243         $nodes = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');
     381        $nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');
    244382        // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
    245383        // we don't try to call removeChild on a nonexistent child node
    246         if ($nodes->length > 0)
    247             foreach ($nodes as $node)
    248                 if ($node->parentNode && is_callable(array($node->parentNode,'removeChild')))
    249                         $node->parentNode->removeChild($node);
     384        if ($nodesWithStyleDisplayNone->length > 0) {
     385            /** @var $node \DOMNode */
     386            foreach ($nodesWithStyleDisplayNone as $node) {
     387                if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) {
     388                    $node->parentNode->removeChild($node);
     389                }
     390            }
     391        }
    250392
    251393        if ($this->preserveEncoding) {
    252             return mb_convert_encoding($xmldoc->saveHTML(), $encoding, 'HTML-ENTITIES');
     394            return mb_convert_encoding($xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES');
    253395        } else {
    254             return $xmldoc->saveHTML();
    255         }
    256     }
    257 
    258     private function sortBySelectorPrecedence($a, $b) {
    259         $precedenceA = $this->getCSSSelectorPrecedence($a['selector']);
    260         $precedenceB = $this->getCSSSelectorPrecedence($b['selector']);
    261 
    262         // we want these sorted ascendingly so selectors with lesser precedence get processed first and
    263         // selectors with greater precedence get sorted last
    264         return ($precedenceA == $precedenceB) ? ($a['line'] < $b['line'] ? -1 : 1) : ($precedenceA < $precedenceB ? -1 : 1);
    265     }
    266 
    267     private function getCSSSelectorPrecedence($selector) {
    268         $selectorkey = md5($selector);
    269         if (!isset($this->caches[CACHE_SELECTOR][$selectorkey])) {
     396            return $xmlDocument->saveHTML();
     397        }
     398    }
     399
     400    /**
     401     * Creates a DOMDocument instance with the current HTML.
     402     *
     403     * @return \DOMDocument
     404     */
     405    private function createXmlDocument() {
     406        $xmlDocument = new \DOMDocument;
     407        $xmlDocument->encoding = self::ENCODING;
     408        $xmlDocument->strictErrorChecking = FALSE;
     409        $xmlDocument->formatOutput = TRUE;
     410        $libxmlState = libxml_use_internal_errors(TRUE);
     411        $xmlDocument->loadHTML($this->getUnifiedHtml());
     412        libxml_clear_errors();
     413        libxml_use_internal_errors($libxmlState);
     414        $xmlDocument->normalizeDocument();
     415
     416        return $xmlDocument;
     417    }
     418
     419    /**
     420     * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable HTML tags removed.
     421     *
     422     * @return string the unified HTML
     423     *
     424     * @throws \BadMethodCallException
     425     */
     426    private function getUnifiedHtml() {
     427        if (!empty($this->unprocessableHtmlTags)) {
     428            $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
     429            $bodyWithoutUnprocessableTags = preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $this->html);
     430        } else {
     431            $bodyWithoutUnprocessableTags = $this->html;
     432        }
     433
     434        return mb_convert_encoding($bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING);
     435    }
     436
     437    /**
     438     * @param array $a
     439     * @param array $b
     440     *
     441     * @return integer
     442     */
     443    private function sortBySelectorPrecedence(array $a, array $b) {
     444        $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
     445        $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
     446
     447        // We want these sorted in ascending order so selectors with lesser precedence get processed first and
     448        // selectors with greater precedence get sorted last.
     449        // The parenthesis around the -1 are necessary to avoid a PHP_CodeSniffer warning about missing spaces around
     450        // arithmetic operators.
     451        // @see http://forge.typo3.org/issues/55605
     452        $precedenceForEquals = ($a['line'] < $b['line'] ? (-1) : 1);
     453        $precedenceForNotEquals = ($precedenceA < $precedenceB ? (-1) : 1);
     454        return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
     455    }
     456
     457    /**
     458     * @param string $selector
     459     *
     460     * @return integer
     461     */
     462    private function getCssSelectorPrecedence($selector) {
     463        $selectorKey = md5($selector);
     464        if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
    270465            $precedence = 0;
    271466            $value = 100;
    272             $search = array('\#','\.',''); // ids: worth 100, classes: worth 10, elements: worth 1
     467            // ids: worth 100, classes: worth 10, elements: worth 1
     468            $search = array('\\#','\\.','');
    273469
    274470            foreach ($search as $s) {
    275                 if (trim($selector == '')) break;
    276                 $num = 0;
    277                 $selector = preg_replace('/'.$s.'\w+/','',$selector,-1,$num);
    278                 $precedence += ($value * $num);
     471                if (trim($selector == '')) {
     472                    break;
     473                }
     474                $number = 0;
     475                $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
     476                $precedence += ($value * $number);
    279477                $value /= 10;
    280478            }
    281             $this->caches[CACHE_SELECTOR][$selectorkey] = $precedence;
    282         }
    283 
    284         return $this->caches[CACHE_SELECTOR][$selectorkey];
    285     }
    286 
    287     // right now we support all CSS 1 selectors and most CSS2/3 selectors.
    288     // http://plasmasturm.org/log/444/
    289     private function translateCSStoXpath($css_selector) {
    290 
    291         $css_selector = trim($css_selector);
    292         $xpathkey = md5($css_selector);
    293         if (!isset($this->caches[CACHE_XPATH][$xpathkey])) {
     479            $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
     480        }
     481
     482        return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
     483    }
     484
     485    /**
     486     * Right now, we support all CSS 1 selectors and most CSS2/3 selectors.
     487     *
     488     * @see http://plasmasturm.org/log/444/
     489     *
     490     * @param string $cssSelector
     491     *
     492     * @return string
     493     */
     494    private function translateCssToXpath($cssSelector) {
     495        $cssSelector = trim($cssSelector);
     496        $xpathKey = md5($cssSelector);
     497        if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) {
    294498            // returns an Xpath selector
    295499            $search = array(
    296                                '/\s+>\s+/', // Matches any element that is a child of parent.
    297                                '/\s+\+\s+/', // Matches any element that is an adjacent sibling.
    298                                '/\s+/', // Matches any element that is a descendant of an parent element element.
    299                                '/([^\/]+):first-child/i', // first-child pseudo-selector
    300                                '/([^\/]+):last-child/i', // last-child pseudo-selector
    301                                '/(\w)\[(\w+)\]/', // Matches element with attribute
    302                                '/(\w)\[(\w+)\=[\'"]?(\w+)[\'"]?\]/', // Matches element with EXACT attribute
     500                // Matches any element that is a child of parent.
     501                '/\\s+>\\s+/',
     502                // Matches any element that is an adjacent sibling.
     503                '/\\s+\\+\\s+/',
     504                // Matches any element that is a descendant of an parent element element.
     505                '/\\s+/',
     506                // first-child pseudo-selector
     507                '/([^\\/]+):first-child/i',
     508                // last-child pseudo-selector
     509                '/([^\\/]+):last-child/i',
     510                // Matches element with attribute
     511                '/(\\w)\\[(\\w+)\\]/',
     512                // Matches element with EXACT attribute
     513                '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/',
    303514            );
    304515            $replace = array(
    305                                '/',
    306                                '/following-sibling::*[1]/self::',
    307                                '//',
    308                                '*[1]/self::\\1',
    309                                '*[last()]/self::\\1',
    310                                '\\1[@\\2]',
    311                                '\\1[@\\2="\\3"]',
     516                '/',
     517                '/following-sibling::*[1]/self::',
     518                '//',
     519                '*[1]/self::\\1',
     520                '*[last()]/self::\\1',
     521                '\\1[@\\2]',
     522                '\\1[@\\2="\\3"]',
    312523            );
    313524
    314             $css_selector = '//'.preg_replace($search, $replace, $css_selector);
    315 
    316             // matches ids and classes
    317             $css_selector = preg_replace_callback('/(\w+)?\#([\w\-]+)/', array($this, 'matchIdAttributes'), $css_selector);
    318             $css_selector = preg_replace_callback('/(\w+|[\*\]])?((\.[\w\-]+)+)/', array($this, 'matchClassAttributes'), $css_selector);
    319 
    320             // advanced selectors are going to require a bit more advanced emogrification
    321             // if we required PHP 5.3 we could do this with closures
    322             $css_selector = preg_replace_callback('/([^\/]+):nth-child\(\s*(odd|even|[+\-]?\d|[+\-]?\d?n(\s*[+\-]\s*\d)?)\s*\)/i', array($this, 'translateNthChild'), $css_selector);
    323             $css_selector = preg_replace_callback('/([^\/]+):nth-of-type\(\s*(odd|even|[+\-]?\d|[+\-]?\d?n(\s*[+\-]\s*\d)?)\s*\)/i', array($this, 'translateNthOfType'), $css_selector);
    324 
    325             $this->caches[CACHE_SELECTOR][$xpathkey] = $css_selector;
    326         }
    327         return $this->caches[CACHE_SELECTOR][$xpathkey];
    328     }
    329 
    330     private function matchIdAttributes($m) {
    331       return (strlen($m[1]) ? $m[1] : '*').'[@id="'.$m[2].'"]';
    332     }
    333 
    334     private function matchClassAttributes($m) {
    335       return (strlen($m[1]) ? $m[1] : '*').'[contains(concat(" ",@class," "),concat(" ","'.implode('"," "))][contains(concat(" ",@class," "),concat(" ","',explode('.',substr($m[2],1))).'"," "))]';
    336     }
    337 
    338     private function translateNthChild($match) {
    339 
     525            $cssSelector = '//' . preg_replace($search, $replace, $cssSelector);
     526
     527            $cssSelector = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, array($this, 'matchIdAttributes'), $cssSelector);
     528            $cssSelector = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, array($this, 'matchClassAttributes'), $cssSelector);
     529
     530            // Advanced selectors are going to require a bit more advanced emogrification.
     531            // When we required PHP 5.3, we could do this with closures.
     532            $cssSelector = preg_replace_callback(
     533                '/([^\\/]+):nth-child\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
     534                array($this, 'translateNthChild'), $cssSelector
     535            );
     536            $cssSelector = preg_replace_callback(
     537                '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
     538                array($this, 'translateNthOfType'), $cssSelector
     539            );
     540
     541            $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector;
     542        }
     543        return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey];
     544    }
     545
     546    /**
     547     * @param array $match
     548     *
     549     * @return string
     550     */
     551    private function matchIdAttributes(array $match) {
     552        return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
     553    }
     554
     555    /**
     556     * @param array $match
     557     *
     558     * @return string
     559     */
     560    private function matchClassAttributes(array $match) {
     561        return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' .
     562            implode(
     563                '"," "))][contains(concat(" ",@class," "),concat(" ","',
     564                explode('.', substr($match[2], 1))
     565            ) . '"," "))]';
     566    }
     567
     568    /**
     569     * @param array $match
     570     *
     571     * @return string
     572     */
     573    private function translateNthChild(array $match) {
    340574        $result = $this->parseNth($match);
    341575
     
    343577            if ($result[self::MULTIPLIER] < 0) {
    344578                $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
    345                 return sprintf("*[(last() - position()) mod %u = %u]/self::%s", $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
     579                return sprintf('*[(last() - position()) mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
    346580            } else {
    347                 return sprintf("*[position() mod %u = %u]/self::%s", $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
     581                return sprintf('*[position() mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
    348582            }
    349583        } else {
    350             return sprintf("*[%u]/self::%s", $result[self::INDEX], $match[1]);
    351         }
    352     }
    353 
    354     private function translateNthOfType($match) {
    355 
     584            return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]);
     585        }
     586    }
     587
     588    /**
     589     * @param array $match
     590     *
     591     * @return string
     592     */
     593    private function translateNthOfType(array $match) {
    356594        $result = $this->parseNth($match);
    357595
     
    359597            if ($result[self::MULTIPLIER] < 0) {
    360598                $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
    361                 return sprintf("%s[(last() - position()) mod %u = %u]", $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
     599                return sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
    362600            } else {
    363                 return sprintf("%s[position() mod %u = %u]", $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
     601                return sprintf('%s[position() mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
    364602            }
    365603        } else {
    366             return sprintf("%s[%u]", $match[1], $result[self::INDEX]);
    367         }
    368     }
    369 
    370     private function parseNth($match) {
    371 
     604            return sprintf('%s[%u]', $match[1], $result[self::INDEX]);
     605        }
     606    }
     607
     608    /**
     609     * @param array $match
     610     *
     611     * @return array
     612     */
     613    private function parseNth(array $match) {
    372614        if (in_array(strtolower($match[2]), array('even','odd'))) {
    373615            $index = strtolower($match[2]) == 'even' ? 0 : 1;
    374616            return array(self::MULTIPLIER => 2, self::INDEX => $index);
    375         // if there is a multiplier
    376         } else if (stripos($match[2], 'n') === false) {
     617        } elseif (stripos($match[2], 'n') === FALSE) {
     618            // if there is a multiplier
    377619            $index = intval(str_replace(' ', '', $match[2]));
    378620            return array(self::INDEX => $index);
    379621        } else {
    380 
    381622            if (isset($match[3])) {
    382                 $multiple_term = str_replace($match[3], '', $match[2]);
     623                $multipleTerm = str_replace($match[3], '', $match[2]);
    383624                $index = intval(str_replace(' ', '', $match[3]));
    384625            } else {
    385                 $multiple_term = $match[2];
     626                $multipleTerm = $match[2];
    386627                $index = 0;
    387628            }
    388629
    389             $multiplier = str_ireplace('n', '', $multiple_term);
    390 
    391             if (!strlen($multiplier)) $multiplier = 1;
    392             elseif ($multiplier == 0) return array(self::INDEX => $index);
    393             else $multiplier = intval($multiplier);
    394 
    395             while ($index < 0) $index += abs($multiplier);
     630            $multiplier = str_ireplace('n', '', $multipleTerm);
     631
     632            if (!strlen($multiplier)) {
     633                $multiplier = 1;
     634            } elseif ($multiplier == 0) {
     635                return array(self::INDEX => $index);
     636            } else {
     637                $multiplier = intval($multiplier);
     638            }
     639
     640            while ($index < 0) {
     641                $index += abs($multiplier);
     642            }
    396643
    397644            return array(self::MULTIPLIER => $multiplier, self::INDEX => $index);
     
    399646    }
    400647
    401     private function cssStyleDefinitionToArray($style) {
    402         $definitions = explode(';',$style);
    403         $retArr = array();
    404         foreach ($definitions as $def) {
    405             if (empty($def) || strpos($def, ':') === false) continue;
    406             list($key,$value) = explode(':',$def,2);
    407             if (empty($key) || strlen(trim($value)) === 0) continue;
    408             $retArr[trim($key)] = trim($value);
    409         }
    410         return $retArr;
     648    /**
     649     * Parses a CSS declaration block into property name/value pairs.
     650     *
     651     * Example:
     652     *
     653     * The declaration block
     654     *
     655     *   "color: #000; font-weight: bold;"
     656     *
     657     * will be parsed into the following array:
     658     *
     659     *   "color" => "#000"
     660     *   "font-weight" => "bold"
     661     *
     662     * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty
     663     *
     664     * @return array the CSS declarations with the property names as array keys and the property values as array values
     665     */
     666    private function parseCssDeclarationBlock($cssDeclarationBlock) {
     667        $properties = array();
     668
     669        $declarations = explode(';', $cssDeclarationBlock);
     670        foreach ($declarations as $declaration) {
     671            $matches = array();
     672            if (!preg_match('/ *([a-z\-]+) *: *([^;]+) */', $declaration, $matches)) {
     673                continue;
     674            }
     675            $propertyName = $matches[1];
     676            $propertyValue = $matches[2];
     677            $properties[$propertyName] = $propertyValue;
     678        }
     679
     680        return $properties;
    411681    }
    412682}
  • trunk/include/functions_mail.inc.php

    r26461 r27403  
    908908  include_once(PHPWG_ROOT_PATH.'include/emogrifier.class.php');
    909909
    910   // disable DOM warnings
    911   $e_state = libxml_use_internal_errors(true);
    912 
    913910  $e = new Emogrifier($content);
    914   // $e->preserveStyleTag = true;
    915   $content = $e->emogrify();
    916 
    917   libxml_clear_errors();
    918   libxml_use_internal_errors($e_state);
    919 
    920   return $content;
     911  return $e->emogrify();
    921912}
    922913
Note: See TracChangeset for help on using the changeset viewer.