Changeset 27403 for trunk/include
- Timestamp:
- Feb 18, 2014, 6:32:36 PM (10 years ago)
- Location:
- trunk/include
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/emogrifier.class.php
r26972 r27403 1 1 <?php 2 /* 3 UPDATES 4 5 2008-08-10 Fixed CSS comment stripping regex to add PCRE_DOTALL (changed from '/\/\*.*\*\//U' to '/\/\*.*\*\//sU') 6 2008-08-18 Added lines instructing DOMDocument to attempt to normalize HTML before processing 7 2008-10-20 Fixed bug with bad variable name... Thanks Thomas! 8 2008-03-02 Added licensing terms under the MIT License 9 Only remove unprocessable HTML tags if they exist in the array 10 2009-06-03 Normalize existing CSS (style) attributes in the HTML before we process the CSS. 11 Made it so that the display:none stripper doesn't require a trailing semi-colon. 12 2009-08-13 Added support for subset class values (e.g. "p.class1.class2"). 13 Added better protection for bad css attributes. 14 Fixed support for HTML entities. 15 2009-08-17 Fixed CSS selector processing so that selectors are processed by precedence/specificity, and not just in order. 16 2009-10-29 Fixed so that selectors appearing later in the CSS will have precedence over identical selectors appearing earlier. 17 2009-11-04 Explicitly declared static functions static to get rid of E_STRICT notices. 18 2010-05-18 Fixed bug where full url filenames with protocols wouldn't get split improperly when we explode on ':'... Thanks Mark! 19 Added two new attribute selectors 20 2010-06-16 Added static caching for less processing overhead in situations where multiple emogrification takes place 21 2010-07-26 Fixed bug where '0' values were getting discarded because of php's empty() function... Thanks Scott! 22 2010-09-03 Added checks to invisible node removal to ensure that we don't try to remove non-existent child nodes of parents that have already been deleted 23 2011-04-08 Fixed errors in CSS->XPath conversion for adjacent sibling selectors and id/class combinations... Thanks Bob V.! 24 2011-06-08 Fixed an error where CSS @media types weren't being parsed correctly... Thanks Will W.! 25 2011-08-03 Fixed an error where an empty selector at the beginning of the CSS would cause a parse error on the next selector... Thanks Alexei T.! 26 2011-10-13 Fully fixed a bug introduced in 2011-06-08 where selectors at the beginning of the CSS would be parsed incorrectly... Thanks Thomas A.! 27 2011-10-26 Added an option to allow you to output emogrified code without extended characters being turned into HTML entities. 28 Moved static references to class attributes so they can be manipulated. 29 Added the ability to clear out the (formerly) static cache when CSS is reloaded. 30 2011-12-22 Fixed a bug that was overwriting existing inline styles from the original HTML... Thanks Sagi L.! 31 2012-01-31 Fixed a bug that was introduced with the 2011-12-22 revision... Thanks Sagi L. and M. Bąkowski! 32 Added extraction of <style> blocks within the HTML due to popular demand. 33 Added several new pseudo-selectors (first-child, last-child, nth-child, and nth-of-type). 34 2012-02-07 Fixed some recent code introductions to use class constants rather than global constants. 35 Fixed some recent code introductions to make it cleaner to read. 36 2012-05-01 Made removal of invisible nodes operate in a case-insensitive manner... Thanks Juha P.! 37 2013-10-10 Add preserveStyleTag option 38 2014-01-26 PHP 5.5 compatibility (/e modifier is deprecated in preg_replace) 39 */ 40 41 define('CACHE_CSS', 0); 42 define('CACHE_SELECTOR', 1); 43 define('CACHE_XPATH', 2); 44 2 3 /** 4 * This class provides functions for converting CSS styles into inline style attributes in your HTML code. 5 * 6 * For more information, please see the README.md file. 7 * 8 * @author Cameron Brooks 9 * @author Jaime Prado 10 */ 45 11 class Emogrifier { 46 47 // for calculating nth-of-type and nth-child selectors 12 /** 13 * @var string 14 */ 15 const ENCODING = 'UTF-8'; 16 17 /** 18 * @var integer 19 */ 20 const CACHE_KEY_CSS = 0; 21 22 /** 23 * @var integer 24 */ 25 const CACHE_KEY_SELECTOR = 1; 26 27 /** 28 * @var integer 29 */ 30 const CACHE_KEY_XPATH = 2; 31 32 /** 33 * for calculating nth-of-type and nth-child selectors 34 * 35 * @var integer 36 */ 48 37 const INDEX = 0; 38 39 /** 40 * for calculating nth-of-type and nth-child selectors 41 * 42 * @var integer 43 */ 49 44 const MULTIPLIER = 1; 50 45 46 /** 47 * @var string 48 */ 49 const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/'; 50 51 /** 52 * @var string 53 */ 54 const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/'; 55 56 /** 57 * @var string 58 */ 51 59 private $html = ''; 60 61 /** 62 * @var string 63 */ 52 64 private $css = ''; 53 private $unprocessableHTMLTags = array('wbr'); 54 private $caches = array(); 55 56 // this attribute applies to the case where you want to preserve your original text encoding. 57 // by default, emogrifier translates your text into HTML entities for two reasons: 58 // 1. because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email 59 // 2. it translates any illegal XML characters that DOMDocument cannot work with 60 // if you would like to preserve your original encoding, set this attribute to true. 61 public $preserveEncoding = false; 62 63 // by default, emogrifier removes <style> tags, set preserveStyleTag to true to keep them 64 public $preserveStyleTag = false; 65 65 66 /** 67 * @var array<string> 68 */ 69 private $unprocessableHtmlTags = array('wbr'); 70 71 /** 72 * @var array<array> 73 */ 74 private $caches = array( 75 self::CACHE_KEY_CSS => array(), 76 self::CACHE_KEY_SELECTOR => array(), 77 self::CACHE_KEY_XPATH => array(), 78 ); 79 80 /** 81 * the visited nodes with the XPath paths as array keys 82 * 83 * @var array<\DOMNode> 84 */ 85 private $visitedNodes = array(); 86 87 /** 88 * the styles to apply to the nodes with the XPath paths as array keys for the outer array and the attribute names/values 89 * as key/value pairs for the inner array 90 * 91 * @var array<array><string> 92 */ 93 private $styleAttributesForNodes = array(); 94 95 /** 96 * This attribute applies to the case where you want to preserve your original text encoding. 97 * 98 * By default, emogrifier translates your text into HTML entities for two reasons: 99 * 100 * 1. Because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email. 101 * 102 * 2. It translates any illegal XML characters that DOMDocument cannot work with. 103 * 104 * If you would like to preserve your original encoding, set this attribute to TRUE. 105 * 106 * @var boolean 107 */ 108 public $preserveEncoding = FALSE; 109 110 /** 111 * The constructor. 112 * 113 * @param string $html the HTML to emogrify, must be UTF-8-encoded 114 * @param string $css the CSS to merge, must be UTF-8-encoded 115 */ 66 116 public function __construct($html = '', $css = '') { 117 $this->setHtml($html); 118 $this->setCss($css); 119 } 120 121 /** 122 * The destructor. 123 */ 124 public function __destruct() { 125 $this->purgeVisitedNodes(); 126 } 127 128 /** 129 * Sets the HTML to emogrify. 130 * 131 * @param string $html the HTML to emogrify, must be UTF-8-encoded 132 * 133 * @return void 134 */ 135 public function setHtml($html = '') { 67 136 $this->html = $html; 68 $this->css = $css; 69 $this->clearCache(); 70 } 71 72 public function setHTML($html = '') { $this->html = $html; } 73 public function setCSS($css = '') { 137 } 138 139 /** 140 * Sets the CSS to merge with the HTML. 141 * 142 * @param string $css the CSS to merge, must be UTF-8-encoded 143 * 144 * @return void 145 */ 146 public function setCss($css = '') { 74 147 $this->css = $css; 75 $this->clearCache(CACHE_CSS); 76 } 77 78 public function clearCache($key = null) { 79 if (!is_null($key)) { 80 if (isset($this->caches[$key])) $this->caches[$key] = array(); 81 } else { 82 $this->caches = array( 83 CACHE_CSS => array(), 84 CACHE_SELECTOR => array(), 85 CACHE_XPATH => array(), 86 ); 87 } 88 } 89 90 // there are some HTML tags that DOMDocument cannot process, and will throw an error if it encounters them. 91 // in particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document. 92 // these functions allow you to add/remove them if necessary. 93 // it only strips them from the code (does not remove actual nodes). 94 public function addUnprocessableHTMLTag($tag) { $this->unprocessableHTMLTags[] = $tag; } 95 public function removeUnprocessableHTMLTag($tag) { 96 if (($key = array_search($tag,$this->unprocessableHTMLTags)) !== false) 97 unset($this->unprocessableHTMLTags[$key]); 98 } 99 100 // applies the CSS you submit to the html you submit. places the css inline 148 } 149 150 /** 151 * Clears all caches. 152 * 153 * @return void 154 */ 155 private function clearAllCaches() { 156 $this->clearCache(self::CACHE_KEY_CSS); 157 $this->clearCache(self::CACHE_KEY_SELECTOR); 158 $this->clearCache(self::CACHE_KEY_XPATH); 159 } 160 161 /** 162 * Clears a single cache by key. 163 * 164 * @param integer $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR or CACHE_KEY_XPATH 165 * 166 * @return void 167 * 168 * @throws \InvalidArgumentException 169 */ 170 private function clearCache($key) { 171 $allowedCacheKeys = array(self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH); 172 if (!in_array($key, $allowedCacheKeys, TRUE)) { 173 throw new \InvalidArgumentException('Invalid cache key: ' . $key, 1391822035); 174 } 175 176 $this->caches[$key] = array(); 177 } 178 179 /** 180 * Purges the visited nodes. 181 * 182 * @return void 183 */ 184 private function purgeVisitedNodes() { 185 $this->visitedNodes = array(); 186 $this->styleAttributesForNodes = array(); 187 } 188 189 /** 190 * Marks a tag for removal. 191 * 192 * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them. 193 * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document. 194 * 195 * Note: The tags will not be removed if they have any content. 196 * 197 * @param string $tagName the tag name, e.g., "p" 198 * 199 * @return void 200 */ 201 public function addUnprocessableHtmlTag($tagName) { 202 $this->unprocessableHtmlTags[] = $tagName; 203 } 204 205 /** 206 * Drops a tag from the removal list. 207 * 208 * @param string $tagName the tag name, e.g., "p" 209 * 210 * @return void 211 */ 212 public function removeUnprocessableHtmlTag($tagName) { 213 $key = array_search($tagName, $this->unprocessableHtmlTags, TRUE); 214 if ($key !== FALSE) { 215 unset($this->unprocessableHtmlTags[$key]); 216 } 217 } 218 219 /** 220 * Applies the CSS you submit to the HTML you submit. 221 * 222 * This method places the CSS inline. 223 * 224 * @return string 225 * 226 * @throws \BadMethodCallException 227 */ 101 228 public function emogrify() { 102 $body = $this->html; 103 104 // remove any unprocessable HTML tags (tags that DOMDocument cannot parse; this includes wbr and many new HTML5 tags) 105 if (count($this->unprocessableHTMLTags)) { 106 $unprocessableHTMLTags = implode('|',$this->unprocessableHTMLTags); 107 $body = preg_replace("/<\/?($unprocessableHTMLTags)[^>]*>/i",'',$body); 108 } 109 110 $encoding = mb_detect_encoding($body); 111 $body = mb_convert_encoding($body, 'HTML-ENTITIES', $encoding); 112 113 $xmldoc = new DOMDocument; 114 $xmldoc->encoding = $encoding; 115 $xmldoc->strictErrorChecking = false; 116 $xmldoc->formatOutput = true; 117 $xmldoc->loadHTML($body); 118 $xmldoc->normalizeDocument(); 119 120 $xpath = new DOMXPath($xmldoc); 229 if ($this->html === '') { 230 throw new \BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096); 231 } 232 233 $xmlDocument = $this->createXmlDocument(); 234 $xpath = new \DOMXPath($xmlDocument); 235 $this->clearAllCaches(); 121 236 122 237 // before be begin processing the CSS file, parse the document and normalize all existing CSS attributes (changes 'DISPLAY: none' to 'display: none'); 123 238 // we wouldn't have to do this if DOMXPath supported XPath 2.0. 124 239 // also store a reference of nodes with existing inline styles so we don't overwrite them 125 $vistedNodes = $vistedNodeRef = array(); 126 $nodes = @$xpath->query('//*[@style]'); 127 foreach ($nodes as $node) { 128 $normalizedOrigStyle = preg_replace_callback('/[A-z\-]+(?=\:)/S',create_function('$m', 'return strtolower($m[0]);'),$node->getAttribute('style')); 129 130 // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles 131 $nodeKey = md5($node->getNodePath()); 132 if (!isset($vistedNodeRef[$nodeKey])) { 133 $vistedNodeRef[$nodeKey] = $this->cssStyleDefinitionToArray($normalizedOrigStyle); 134 $vistedNodes[$nodeKey] = $node; 135 } 136 137 $node->setAttribute('style', $normalizedOrigStyle); 240 $this->purgeVisitedNodes(); 241 242 $nodesWithStyleAttributes = $xpath->query('//*[@style]'); 243 if ($nodesWithStyleAttributes !== FALSE) { 244 $callback = create_function('$m', 'return strtolower($m[0]);'); 245 246 /** @var $nodeWithStyleAttribute \DOMNode */ 247 foreach ($nodesWithStyleAttributes as $node) { 248 $normalizedOriginalStyle = preg_replace_callback( 249 '/[A-z\\-]+(?=\\:)/S', 250 $callback, 251 $node->getAttribute('style') 252 ); 253 254 // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles 255 $nodePath = $node->getNodePath(); 256 if (!isset($this->styleAttributesForNodes[$nodePath])) { 257 $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle); 258 $this->visitedNodes[$nodePath] = $node; 259 } 260 261 $node->setAttribute('style', $normalizedOriginalStyle); 262 } 138 263 } 139 264 … … 141 266 // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS) 142 267 $css = $this->css; 143 $nodes = @$xpath->query('//style'); 144 foreach ($nodes as $node) { 145 // append the css 146 $css .= "\n\n{$node->nodeValue}"; 147 // remove the <style> node 148 if (!$this->preserveStyleTag) { 149 $node->parentNode->removeChild($node); 268 $styleNodes = $xpath->query('//style'); 269 if ($styleNodes !== FALSE) { 270 /** @var $styleNode \DOMNode */ 271 foreach ($styleNodes as $styleNode) { 272 // append the css 273 $css .= "\n\n" . $styleNode->nodeValue; 274 // remove the <style> node 275 $styleNode->parentNode->removeChild($styleNode); 150 276 } 151 277 } … … 153 279 // filter the CSS 154 280 $search = array( 155 '/\/\*.*\*\//sU', // get rid of css comment code 156 '/^\s*@import\s[^;]+;/misU', // strip out any import directives 157 '/^\s*@media\s[^{]+{\s*}/misU', // strip any empty media enclosures 158 '/^\s*@media\s+((aural|braille|embossed|handheld|print|projection|speech|tty|tv)\s*,*\s*)+{.*}\s*}/misU', // strip out all media types that are not 'screen' or 'all' (these don't apply to email) 159 '/^\s*@media\s[^{]+{(.*})\s*}/misU', // get rid of remaining media type enclosures 281 // get rid of css comment code 282 '/\\/\\*.*\\*\\//sU', 283 // strip out any import directives 284 '/^\\s*@import\\s[^;]+;/misU', 285 // strip any empty media enclosures 286 '/^\\s*@media\\s[^{]+{\\s*}/misU', 287 // strip out all media rules that are not 'screen' or 'all' (these don't apply to email) 288 '/^\\s*@media\\s+((aural|braille|embossed|handheld|print|projection|speech|tty|tv)\\s*,*\\s*)+{.*}\\s*}/misU', 289 // get rid of remaining media type rules 290 '/^\\s*@media\\s[^{]+{(.*})\\s*}/misU', 160 291 ); 161 292 … … 170 301 $css = preg_replace($search, $replace, $css); 171 302 172 $csskey = md5($css); 173 if (!isset($this->caches[CACHE_CSS][$csskey])) { 174 303 $cssKey = md5($css); 304 if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) { 175 305 // process the CSS file for selectors and definitions 176 preg_match_all('/( ^|[^{}])\s*([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER);177 178 $all _selectors = array();306 preg_match_all('/(?:^|[^{}])\\s*([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER); 307 308 $allSelectors = array(); 179 309 foreach ($matches as $key => $selectorString) { 180 310 // if there is a blank definition, skip 181 if (!strlen(trim($selectorString[3]))) continue; 311 if (!strlen(trim($selectorString[2]))) { 312 continue; 313 } 182 314 183 315 // else split by commas and duplicate attributes so we can sort by selector precedence 184 $selectors = explode(',', $selectorString[2]);316 $selectors = explode(',', $selectorString[1]); 185 317 foreach ($selectors as $selector) { 186 187 318 // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; ONLY allow structural pseudo-classes 188 if (strpos($selector, ':') !== false && !preg_match('/:\S+\-(child|type)\(/i', $selector)) continue; 189 190 $all_selectors[] = array('selector' => trim($selector), 191 'attributes' => trim($selectorString[3]), 192 'line' => $key, // keep track of where it appears in the file, since order is important 319 if (strpos($selector, ':') !== FALSE && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)) { 320 continue; 321 } 322 323 $allSelectors[] = array('selector' => trim($selector), 324 'attributes' => trim($selectorString[2]), 325 // keep track of where it appears in the file, since order is important 326 'line' => $key, 193 327 ); 194 328 } … … 196 330 197 331 // now sort the selectors by precedence 198 usort($all_selectors, array($this,'sortBySelectorPrecedence')); 199 200 $this->caches[CACHE_CSS][$csskey] = $all_selectors; 201 } 202 203 foreach ($this->caches[CACHE_CSS][$csskey] as $value) { 204 332 usort($allSelectors, array($this,'sortBySelectorPrecedence')); 333 334 $this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors; 335 } 336 337 foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) { 205 338 // query the body for the xpath selector 206 $nodes = $xpath->query($this->translateCSStoXpath(trim($value['selector']))); 207 208 foreach($nodes as $node) { 339 $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath(trim($value['selector']))); 340 341 /** @var $node \DOMNode */ 342 foreach ($nodesMatchingCssSelectors as $node) { 209 343 // if it has a style attribute, get it, process it, and append (overwrite) new stuff 210 344 if ($node->hasAttribute('style')) { 211 345 // break it up into an associative array 212 $oldStyle Arr = $this->cssStyleDefinitionToArray($node->getAttribute('style'));213 $newStyle Arr = $this->cssStyleDefinitionToArray($value['attributes']);346 $oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style')); 347 $newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']); 214 348 215 349 // new styles overwrite the old styles (not technically accurate, but close enough) 216 $combinedArr = array_merge($oldStyleArr,$newStyleArr);350 $combinedArray = array_merge($oldStyleDeclarations, $newStyleDeclarations); 217 351 $style = ''; 218 foreach ($combinedArr as $k => $v) $style .= (strtolower($k) . ':' . $v . ';'); 352 foreach ($combinedArray as $attributeName => $attributeValue) { 353 $style .= (strtolower($attributeName) . ':' . $attributeValue . ';'); 354 } 219 355 } else { 220 356 // otherwise create a new style … … 226 362 227 363 // now iterate through the nodes that contained inline styles in the original HTML 228 foreach ($ vistedNodeRef as $nodeKey => $origStyleArr) {229 $node = $ vistedNodes[$nodeKey];230 $curr StyleArr = $this->cssStyleDefinitionToArray($node->getAttribute('style'));231 232 $combinedArr = array_merge($currStyleArr, $origStyleArr);364 foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) { 365 $node = $this->visitedNodes[$nodePath]; 366 $currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style')); 367 368 $combinedArray = array_merge($currentStyleAttributes, $styleAttributesForNode); 233 369 $style = ''; 234 foreach ($combinedArr as $k => $v) $style .= (strtolower($k) . ':' . $v . ';'); 370 foreach ($combinedArray as $attributeName => $attributeValue) { 371 $style .= (strtolower($attributeName) . ':' . $attributeValue . ';'); 372 } 235 373 236 374 $node->setAttribute('style', $style); … … 241 379 // lower-case() isn't available to us. We've thus far only set attributes to lowercase, not attribute values. Consequently, we need 242 380 // to translate() the letters that would be in 'NONE' ("NOE") to lowercase. 243 $nodes = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');381 $nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]'); 244 382 // The checks on parentNode and is_callable below ensure that if we've deleted the parent node, 245 383 // we don't try to call removeChild on a nonexistent child node 246 if ($nodes->length > 0) 247 foreach ($nodes as $node) 248 if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) 249 $node->parentNode->removeChild($node); 384 if ($nodesWithStyleDisplayNone->length > 0) { 385 /** @var $node \DOMNode */ 386 foreach ($nodesWithStyleDisplayNone as $node) { 387 if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) { 388 $node->parentNode->removeChild($node); 389 } 390 } 391 } 250 392 251 393 if ($this->preserveEncoding) { 252 return mb_convert_encoding($xml doc->saveHTML(), $encoding, 'HTML-ENTITIES');394 return mb_convert_encoding($xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES'); 253 395 } else { 254 return $xmldoc->saveHTML(); 255 } 256 } 257 258 private function sortBySelectorPrecedence($a, $b) { 259 $precedenceA = $this->getCSSSelectorPrecedence($a['selector']); 260 $precedenceB = $this->getCSSSelectorPrecedence($b['selector']); 261 262 // we want these sorted ascendingly so selectors with lesser precedence get processed first and 263 // selectors with greater precedence get sorted last 264 return ($precedenceA == $precedenceB) ? ($a['line'] < $b['line'] ? -1 : 1) : ($precedenceA < $precedenceB ? -1 : 1); 265 } 266 267 private function getCSSSelectorPrecedence($selector) { 268 $selectorkey = md5($selector); 269 if (!isset($this->caches[CACHE_SELECTOR][$selectorkey])) { 396 return $xmlDocument->saveHTML(); 397 } 398 } 399 400 /** 401 * Creates a DOMDocument instance with the current HTML. 402 * 403 * @return \DOMDocument 404 */ 405 private function createXmlDocument() { 406 $xmlDocument = new \DOMDocument; 407 $xmlDocument->encoding = self::ENCODING; 408 $xmlDocument->strictErrorChecking = FALSE; 409 $xmlDocument->formatOutput = TRUE; 410 $libxmlState = libxml_use_internal_errors(TRUE); 411 $xmlDocument->loadHTML($this->getUnifiedHtml()); 412 libxml_clear_errors(); 413 libxml_use_internal_errors($libxmlState); 414 $xmlDocument->normalizeDocument(); 415 416 return $xmlDocument; 417 } 418 419 /** 420 * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable HTML tags removed. 421 * 422 * @return string the unified HTML 423 * 424 * @throws \BadMethodCallException 425 */ 426 private function getUnifiedHtml() { 427 if (!empty($this->unprocessableHtmlTags)) { 428 $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags); 429 $bodyWithoutUnprocessableTags = preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $this->html); 430 } else { 431 $bodyWithoutUnprocessableTags = $this->html; 432 } 433 434 return mb_convert_encoding($bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING); 435 } 436 437 /** 438 * @param array $a 439 * @param array $b 440 * 441 * @return integer 442 */ 443 private function sortBySelectorPrecedence(array $a, array $b) { 444 $precedenceA = $this->getCssSelectorPrecedence($a['selector']); 445 $precedenceB = $this->getCssSelectorPrecedence($b['selector']); 446 447 // We want these sorted in ascending order so selectors with lesser precedence get processed first and 448 // selectors with greater precedence get sorted last. 449 // The parenthesis around the -1 are necessary to avoid a PHP_CodeSniffer warning about missing spaces around 450 // arithmetic operators. 451 // @see http://forge.typo3.org/issues/55605 452 $precedenceForEquals = ($a['line'] < $b['line'] ? (-1) : 1); 453 $precedenceForNotEquals = ($precedenceA < $precedenceB ? (-1) : 1); 454 return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals; 455 } 456 457 /** 458 * @param string $selector 459 * 460 * @return integer 461 */ 462 private function getCssSelectorPrecedence($selector) { 463 $selectorKey = md5($selector); 464 if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) { 270 465 $precedence = 0; 271 466 $value = 100; 272 $search = array('\#','\.',''); // ids: worth 100, classes: worth 10, elements: worth 1 467 // ids: worth 100, classes: worth 10, elements: worth 1 468 $search = array('\\#','\\.',''); 273 469 274 470 foreach ($search as $s) { 275 if (trim($selector == '')) break; 276 $num = 0; 277 $selector = preg_replace('/'.$s.'\w+/','',$selector,-1,$num); 278 $precedence += ($value * $num); 471 if (trim($selector == '')) { 472 break; 473 } 474 $number = 0; 475 $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number); 476 $precedence += ($value * $number); 279 477 $value /= 10; 280 478 } 281 $this->caches[CACHE_SELECTOR][$selectorkey] = $precedence; 282 } 283 284 return $this->caches[CACHE_SELECTOR][$selectorkey]; 285 } 286 287 // right now we support all CSS 1 selectors and most CSS2/3 selectors. 288 // http://plasmasturm.org/log/444/ 289 private function translateCSStoXpath($css_selector) { 290 291 $css_selector = trim($css_selector); 292 $xpathkey = md5($css_selector); 293 if (!isset($this->caches[CACHE_XPATH][$xpathkey])) { 479 $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence; 480 } 481 482 return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey]; 483 } 484 485 /** 486 * Right now, we support all CSS 1 selectors and most CSS2/3 selectors. 487 * 488 * @see http://plasmasturm.org/log/444/ 489 * 490 * @param string $cssSelector 491 * 492 * @return string 493 */ 494 private function translateCssToXpath($cssSelector) { 495 $cssSelector = trim($cssSelector); 496 $xpathKey = md5($cssSelector); 497 if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) { 294 498 // returns an Xpath selector 295 499 $search = array( 296 '/\s+>\s+/', // Matches any element that is a child of parent. 297 '/\s+\+\s+/', // Matches any element that is an adjacent sibling. 298 '/\s+/', // Matches any element that is a descendant of an parent element element. 299 '/([^\/]+):first-child/i', // first-child pseudo-selector 300 '/([^\/]+):last-child/i', // last-child pseudo-selector 301 '/(\w)\[(\w+)\]/', // Matches element with attribute 302 '/(\w)\[(\w+)\=[\'"]?(\w+)[\'"]?\]/', // Matches element with EXACT attribute 500 // Matches any element that is a child of parent. 501 '/\\s+>\\s+/', 502 // Matches any element that is an adjacent sibling. 503 '/\\s+\\+\\s+/', 504 // Matches any element that is a descendant of an parent element element. 505 '/\\s+/', 506 // first-child pseudo-selector 507 '/([^\\/]+):first-child/i', 508 // last-child pseudo-selector 509 '/([^\\/]+):last-child/i', 510 // Matches element with attribute 511 '/(\\w)\\[(\\w+)\\]/', 512 // Matches element with EXACT attribute 513 '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/', 303 514 ); 304 515 $replace = array( 305 306 307 308 309 310 311 516 '/', 517 '/following-sibling::*[1]/self::', 518 '//', 519 '*[1]/self::\\1', 520 '*[last()]/self::\\1', 521 '\\1[@\\2]', 522 '\\1[@\\2="\\3"]', 312 523 ); 313 524 314 $css_selector = '//'.preg_replace($search, $replace, $css_selector); 315 316 // matches ids and classes 317 $css_selector = preg_replace_callback('/(\w+)?\#([\w\-]+)/', array($this, 'matchIdAttributes'), $css_selector); 318 $css_selector = preg_replace_callback('/(\w+|[\*\]])?((\.[\w\-]+)+)/', array($this, 'matchClassAttributes'), $css_selector); 319 320 // advanced selectors are going to require a bit more advanced emogrification 321 // if we required PHP 5.3 we could do this with closures 322 $css_selector = preg_replace_callback('/([^\/]+):nth-child\(\s*(odd|even|[+\-]?\d|[+\-]?\d?n(\s*[+\-]\s*\d)?)\s*\)/i', array($this, 'translateNthChild'), $css_selector); 323 $css_selector = preg_replace_callback('/([^\/]+):nth-of-type\(\s*(odd|even|[+\-]?\d|[+\-]?\d?n(\s*[+\-]\s*\d)?)\s*\)/i', array($this, 'translateNthOfType'), $css_selector); 324 325 $this->caches[CACHE_SELECTOR][$xpathkey] = $css_selector; 326 } 327 return $this->caches[CACHE_SELECTOR][$xpathkey]; 328 } 329 330 private function matchIdAttributes($m) { 331 return (strlen($m[1]) ? $m[1] : '*').'[@id="'.$m[2].'"]'; 332 } 333 334 private function matchClassAttributes($m) { 335 return (strlen($m[1]) ? $m[1] : '*').'[contains(concat(" ",@class," "),concat(" ","'.implode('"," "))][contains(concat(" ",@class," "),concat(" ","',explode('.',substr($m[2],1))).'"," "))]'; 336 } 337 338 private function translateNthChild($match) { 339 525 $cssSelector = '//' . preg_replace($search, $replace, $cssSelector); 526 527 $cssSelector = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, array($this, 'matchIdAttributes'), $cssSelector); 528 $cssSelector = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, array($this, 'matchClassAttributes'), $cssSelector); 529 530 // Advanced selectors are going to require a bit more advanced emogrification. 531 // When we required PHP 5.3, we could do this with closures. 532 $cssSelector = preg_replace_callback( 533 '/([^\\/]+):nth-child\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', 534 array($this, 'translateNthChild'), $cssSelector 535 ); 536 $cssSelector = preg_replace_callback( 537 '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', 538 array($this, 'translateNthOfType'), $cssSelector 539 ); 540 541 $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector; 542 } 543 return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey]; 544 } 545 546 /** 547 * @param array $match 548 * 549 * @return string 550 */ 551 private function matchIdAttributes(array $match) { 552 return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]'; 553 } 554 555 /** 556 * @param array $match 557 * 558 * @return string 559 */ 560 private function matchClassAttributes(array $match) { 561 return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' . 562 implode( 563 '"," "))][contains(concat(" ",@class," "),concat(" ","', 564 explode('.', substr($match[2], 1)) 565 ) . '"," "))]'; 566 } 567 568 /** 569 * @param array $match 570 * 571 * @return string 572 */ 573 private function translateNthChild(array $match) { 340 574 $result = $this->parseNth($match); 341 575 … … 343 577 if ($result[self::MULTIPLIER] < 0) { 344 578 $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]); 345 return sprintf( "*[(last() - position()) mod %u = %u]/self::%s", $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);579 return sprintf('*[(last() - position()) mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]); 346 580 } else { 347 return sprintf( "*[position() mod %u = %u]/self::%s", $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);581 return sprintf('*[position() mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]); 348 582 } 349 583 } else { 350 return sprintf("*[%u]/self::%s", $result[self::INDEX], $match[1]); 351 } 352 } 353 354 private function translateNthOfType($match) { 355 584 return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]); 585 } 586 } 587 588 /** 589 * @param array $match 590 * 591 * @return string 592 */ 593 private function translateNthOfType(array $match) { 356 594 $result = $this->parseNth($match); 357 595 … … 359 597 if ($result[self::MULTIPLIER] < 0) { 360 598 $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]); 361 return sprintf( "%s[(last() - position()) mod %u = %u]", $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);599 return sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]); 362 600 } else { 363 return sprintf( "%s[position() mod %u = %u]", $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);601 return sprintf('%s[position() mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]); 364 602 } 365 603 } else { 366 return sprintf("%s[%u]", $match[1], $result[self::INDEX]); 367 } 368 } 369 370 private function parseNth($match) { 371 604 return sprintf('%s[%u]', $match[1], $result[self::INDEX]); 605 } 606 } 607 608 /** 609 * @param array $match 610 * 611 * @return array 612 */ 613 private function parseNth(array $match) { 372 614 if (in_array(strtolower($match[2]), array('even','odd'))) { 373 615 $index = strtolower($match[2]) == 'even' ? 0 : 1; 374 616 return array(self::MULTIPLIER => 2, self::INDEX => $index); 375 // if there is a multiplier376 } else if (stripos($match[2], 'n') === false) {617 } elseif (stripos($match[2], 'n') === FALSE) { 618 // if there is a multiplier 377 619 $index = intval(str_replace(' ', '', $match[2])); 378 620 return array(self::INDEX => $index); 379 621 } else { 380 381 622 if (isset($match[3])) { 382 $multiple _term = str_replace($match[3], '', $match[2]);623 $multipleTerm = str_replace($match[3], '', $match[2]); 383 624 $index = intval(str_replace(' ', '', $match[3])); 384 625 } else { 385 $multiple _term = $match[2];626 $multipleTerm = $match[2]; 386 627 $index = 0; 387 628 } 388 629 389 $multiplier = str_ireplace('n', '', $multiple_term); 390 391 if (!strlen($multiplier)) $multiplier = 1; 392 elseif ($multiplier == 0) return array(self::INDEX => $index); 393 else $multiplier = intval($multiplier); 394 395 while ($index < 0) $index += abs($multiplier); 630 $multiplier = str_ireplace('n', '', $multipleTerm); 631 632 if (!strlen($multiplier)) { 633 $multiplier = 1; 634 } elseif ($multiplier == 0) { 635 return array(self::INDEX => $index); 636 } else { 637 $multiplier = intval($multiplier); 638 } 639 640 while ($index < 0) { 641 $index += abs($multiplier); 642 } 396 643 397 644 return array(self::MULTIPLIER => $multiplier, self::INDEX => $index); … … 399 646 } 400 647 401 private function cssStyleDefinitionToArray($style) { 402 $definitions = explode(';',$style); 403 $retArr = array(); 404 foreach ($definitions as $def) { 405 if (empty($def) || strpos($def, ':') === false) continue; 406 list($key,$value) = explode(':',$def,2); 407 if (empty($key) || strlen(trim($value)) === 0) continue; 408 $retArr[trim($key)] = trim($value); 409 } 410 return $retArr; 648 /** 649 * Parses a CSS declaration block into property name/value pairs. 650 * 651 * Example: 652 * 653 * The declaration block 654 * 655 * "color: #000; font-weight: bold;" 656 * 657 * will be parsed into the following array: 658 * 659 * "color" => "#000" 660 * "font-weight" => "bold" 661 * 662 * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty 663 * 664 * @return array the CSS declarations with the property names as array keys and the property values as array values 665 */ 666 private function parseCssDeclarationBlock($cssDeclarationBlock) { 667 $properties = array(); 668 669 $declarations = explode(';', $cssDeclarationBlock); 670 foreach ($declarations as $declaration) { 671 $matches = array(); 672 if (!preg_match('/ *([a-z\-]+) *: *([^;]+) */', $declaration, $matches)) { 673 continue; 674 } 675 $propertyName = $matches[1]; 676 $propertyValue = $matches[2]; 677 $properties[$propertyName] = $propertyValue; 678 } 679 680 return $properties; 411 681 } 412 682 } -
trunk/include/functions_mail.inc.php
r26461 r27403 908 908 include_once(PHPWG_ROOT_PATH.'include/emogrifier.class.php'); 909 909 910 // disable DOM warnings911 $e_state = libxml_use_internal_errors(true);912 913 910 $e = new Emogrifier($content); 914 // $e->preserveStyleTag = true; 915 $content = $e->emogrify(); 916 917 libxml_clear_errors(); 918 libxml_use_internal_errors($e_state); 919 920 return $content; 911 return $e->emogrify(); 921 912 } 922 913
Note: See TracChangeset
for help on using the changeset viewer.