?????????????? ?????????????? ?????????????? ?????????????? JavascriptPacker/class.JavaScriptPacker.php000066600000060152151451671630015032 0ustar00pack(); * * or * * $myPacker = new JavaScriptPacker($script, 'Normal', true, false); * $packed = $myPacker->pack(); * * or (default values) * * $myPacker = new JavaScriptPacker($script); * $packed = $myPacker->pack(); * * * params of the constructor : * $script: the JavaScript to pack, string. * $encoding: level of encoding, int or string : * 0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'. * default: 62. * $fastDecode: include the fast decoder in the packed result, boolean. * default : true. * $specialChars: if you are flagged your private and local variables * in the script, boolean. * default: false. * * The pack() method return the compressed JavasScript, as a string. * * see http://dean.edwards.name/packer/usage/ for more information. * * Notes : * # [del]need PHP 5 . Tested with PHP 5.1.2[/del] * this is a modified version for PHP 4 * * # The packed result may be different than with the Dean Edwards * version, but with the same length. The reason is that the PHP * function usort to sort array don't necessarily preserve the * original order of two equal member. The Javascript sort function * in fact preserve this order (but that's not require by the * ECMAScript standard). So the encoded keywords order can be * different in the two results. * * # Be careful with the 'High ASCII' Level encoding if you use * UTF-8 in your files... */ /* * modified by Mark Fabrizio Jr. to work with php 4 */ class JavaScriptPacker { var $IGNORE = '$1'; // validate parameters var $_script = ''; var $_encoding = 62; var $_fastDecode = true; var $_specialChars = false; var $LITERAL_ENCODING = array( 'None' => 0, 'Numeric' => 10, 'Normal' => 62, 'High ASCII' => 95 ); // http://doc.spip.org/@JavaScriptPacker function JavaScriptPacker($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false) { $this->_script = $_script . "\n"; if (array_key_exists($_encoding, $this->LITERAL_ENCODING)) $_encoding = $this->LITERAL_ENCODING[$_encoding]; $this->_encoding = min((int)$_encoding, 95); $this->_fastDecode = $_fastDecode; $this->_specialChars = $_specialChars; } // http://doc.spip.org/@pack function pack() { $this->_addParser('_basicCompression'); if ($this->_specialChars) $this->_addParser('_encodeSpecialChars'); if ($this->_encoding) $this->_addParser('_encodeKeywords'); // go! return $this->_pack($this->_script); } // apply all parsing routines // http://doc.spip.org/@_pack function _pack($script) { for ($i = 0; isset($this->_parsers[$i]); $i++) { $script = call_user_func(array(&$this,$this->_parsers[$i]), $script); } return $script; } // keep a list of parsing functions, they'll be executed all at once var $_parsers = array(); // http://doc.spip.org/@_addParser function _addParser($parser) { $this->_parsers[] = $parser; } // zero encoding - just removal of white space and comments // http://doc.spip.org/@_basicCompression function _basicCompression($script) { $parser = new ParseMaster(); // make safe $parser->escapeChar = '\\'; // protect strings $parser->add('/\'[^\'\\n\\r]*\'/',$this->IGNORE); $parser->add('/"[^"\\n\\r]*"/', $this->IGNORE); // remove comments $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' '); $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' '); // protect regular expressions $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', $this->IGNORE); // remove: ;;; doSomething(); if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/'); // remove redundant semi-colons $parser->add('/\\(;;\\)/', $this->IGNORE); // protect for (;;) loops $parser->add('/;+\\s*([};])/', '$2'); // apply the above $script = $parser->exec($script); // remove white-space # $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3'); # $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3'); # $parser->add('/\\s+/', ''); # Modif fil@rezo.net pour conserver les \n $parser->add('/(\\b|\\x24)[\\t ]+(\\b|\\x24)/', '$2 $3'); $parser->add('/([+\\-])[\\t ]+([+\\-])/', '$2 $3'); $parser->add('/[\\t ]+/', ''); $parser->add('/\\s+/', "\n"); // done return $parser->exec($script); } // http://doc.spip.org/@_encodeSpecialChars function _encodeSpecialChars($script) { $parser = new ParseMaster(); // replace: $name -> n, $$name -> na $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/', array('fn' => '_replace_name') ); // replace: _name -> _0, double-underscore (__name) is ignored $regexp = '/\\b_[A-Za-z\\d]\\w*/'; // build the word list $keywords = $this->_analyze($script, $regexp, '_encodePrivate'); // quick ref $encoded = $keywords['encoded']; $parser->add($regexp, array( 'fn' => '_replace_encoded', 'data' => $encoded ) ); return $parser->exec($script); } // http://doc.spip.org/@_encodeKeywords function _encodeKeywords($script) { // escape high-ascii values already in the script (i.e. in strings) if ($this->_encoding > 62) $script = $this->_escape95($script); // create the parser $parser = new ParseMaster(); $encode = $this->_getEncoder($this->_encoding); // for high-ascii, don't encode single character low-ascii $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/'; // build the word list $keywords = $this->_analyze($script, $regexp, $encode); $encoded = $keywords['encoded']; // encode $parser->add($regexp, array( 'fn' => '_replace_encoded', 'data' => $encoded ) ); if (empty($script)) return $script; else { //$res = $parser->exec($script); //$res = $this->_bootStrap($res, $keywords); //return $res; return $this->_bootStrap($parser->exec($script), $keywords); } } // http://doc.spip.org/@_analyze function _analyze($script, $regexp, $encode) { // analyse // retreive all words in the script $all = array(); preg_match_all($regexp, $script, $all); $_sorted = array(); // list of words sorted by frequency $_encoded = array(); // dictionary of word->encoding $_protected = array(); // instances of "protected" words $all = $all[0]; // simulate the javascript comportement of global match if (!empty($all)) { $unsorted = array(); // same list, not sorted $protected = array(); // "protected" words (dictionary of word->"word") $value = array(); // dictionary of charCode->encoding (eg. 256->ff) $this->_count = array(); // word->count $i = count($all); $j = 0; //$word = null; // count the occurrences - used for sorting later do { --$i; $word = '$' . $all[$i]; if (!isset($this->_count[$word])) { $this->_count[$word] = 0; $unsorted[$j] = $word; // make a dictionary of all of the protected words in this script // these are words that might be mistaken for encoding //if (is_string($encode) && method_exists($this, $encode)) $values[$j] = call_user_func(array(&$this, $encode), $j); $protected['$' . $values[$j]] = $j++; } // increment the word counter $this->_count[$word]++; } while ($i > 0); // prepare to sort the word list, first we must protect // words that are also used as codes. we assign them a code // equivalent to the word itself. // e.g. if "do" falls within our encoding range // then we store keywords["do"] = "do"; // this avoids problems when decoding $i = count($unsorted); do { $word = $unsorted[--$i]; if (isset($protected[$word]) /*!= null*/) { $_sorted[$protected[$word]] = substr($word, 1); $_protected[$protected[$word]] = true; $this->_count[$word] = 0; } } while ($i); // sort the words by frequency // Note: the javascript and php version of sort can be different : // in php manual, usort : // " If two members compare as equal, // their order in the sorted array is undefined." // so the final packed script is different of the Dean's javascript version // but equivalent. // the ECMAscript standard does not guarantee this behaviour, // and thus not all browsers (e.g. Mozilla versions dating back to at // least 2003) respect this. usort($unsorted, array(&$this, '_sortWords')); $j = 0; // because there are "protected" words in the list // we must add the sorted words around them do { if (!isset($_sorted[$i])) $_sorted[$i] = substr($unsorted[$j++], 1); $_encoded[$_sorted[$i]] = $values[$i]; } while (++$i < count($unsorted)); } return array( 'sorted' => $_sorted, 'encoded' => $_encoded, 'protected' => $_protected); } var $_count = array(); // http://doc.spip.org/@_sortWords function _sortWords($match1, $match2) { return $this->_count[$match2] - $this->_count[$match1]; } // build the boot function used for loading and decoding // http://doc.spip.org/@_bootStrap function _bootStrap($packed, $keywords) { $ENCODE = $this->_safeRegExp('$encode\\($count\\)'); // $packed: the packed script $packed = "'" . $this->_escape($packed) . "'"; // $ascii: base for encoding $ascii = min(count($keywords['sorted']), $this->_encoding); if ($ascii == 0) $ascii = 1; // $count: number of words contained in the script $count = count($keywords['sorted']); // $keywords: list of words contained in the script foreach ($keywords['protected'] as $i=>$value) { $keywords['sorted'][$i] = ''; } // convert from a string to an array ksort($keywords['sorted']); $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')"; $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii); $encode = $this->_getJSFunction($encode); $encode = preg_replace('/_encoding/','$ascii', $encode); $encode = preg_replace('/arguments\\.callee/','$encode', $encode); $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : ''); // $decode: code snippet to speed up decoding if ($this->_fastDecode) { // create the decoder $decode = $this->_getJSFunction('_decodeBody'); if ($this->_encoding > 62) $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode); // perform the encoding inline for lower ascii values elseif ($ascii < 36) $decode = preg_replace($ENCODE, $inline, $decode); // special case: when $count==0 there are no keywords. I want to keep // the basic shape of the unpacking funcion so i'll frig the code... if ($count == 0) $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1); } // boot function $unpack = $this->_getJSFunction('_unpack'); if ($this->_fastDecode) { // insert the decoder $this->buffer = $decode; $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1); } $unpack = preg_replace('/"/', "'", $unpack); if ($this->_encoding > 62) { // high-ascii // get rid of the word-boundaries for regexp matches $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack); } if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) { // insert the encode function $this->buffer = $encode; $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1); } else { // perform the encoding inline $unpack = preg_replace($ENCODE, $inline, $unpack); } // pack the boot function too $unpackPacker = new JavaScriptPacker($unpack, 0, false, true); $unpack = $unpackPacker->pack(); // arguments $params = array($packed, $ascii, $count, $keywords); if ($this->_fastDecode) { $params[] = 0; $params[] = '{}'; } $params = implode(',', $params); // the whole thing return 'eval(' . $unpack . '(' . $params . "))\n"; } var $buffer; // http://doc.spip.org/@_insertFastDecode function _insertFastDecode($match) { return '{' . $this->buffer . ';'; } // http://doc.spip.org/@_insertFastEncode function _insertFastEncode($match) { return '{$encode=' . $this->buffer . ';'; } // mmm.. ..which one do i need ?? // http://doc.spip.org/@_getEncoder function _getEncoder($ascii) { return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ? '_encode95' : '_encode62' : '_encode36' : '_encode10'; } // zero encoding // characters: 0123456789 // http://doc.spip.org/@_encode10 function _encode10($charCode) { return $charCode; } // inherent base36 support // characters: 0123456789abcdefghijklmnopqrstuvwxyz // http://doc.spip.org/@_encode36 function _encode36($charCode) { return base_convert($charCode, 10, 36); } // hitch a ride on base36 and add the upper case alpha characters // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ // http://doc.spip.org/@_encode62 function _encode62($charCode) { $res = ''; if ($charCode >= $this->_encoding) { $res = $this->_encode62((int)($charCode / $this->_encoding)); } $charCode = $charCode % $this->_encoding; if ($charCode > 35) return $res . chr($charCode + 29); else return $res . base_convert($charCode, 10, 36); } // use high-ascii values // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ // http://doc.spip.org/@_encode95 function _encode95($charCode) { $res = ''; if ($charCode >= $this->_encoding) $res = $this->_encode95($charCode / $this->_encoding); return $res . chr(($charCode % $this->_encoding) + 161); } // http://doc.spip.org/@_safeRegExp function _safeRegExp($string) { return '/'.preg_replace('/\$/', '\\\$', $string).'/'; } // http://doc.spip.org/@_encodePrivate function _encodePrivate($charCode) { return "_" . $charCode; } // protect characters used by the parser // http://doc.spip.org/@_escape function _escape($script) { return preg_replace('/([\\\\\'])/', '\\\$1', $script); } // protect high-ascii characters already in the script // http://doc.spip.org/@_escape95 function _escape95($script) { return preg_replace_callback( '/[\\xa1-\\xff]/', array(&$this, '_escape95Bis'), $script ); } // http://doc.spip.org/@_escape95Bis function _escape95Bis($match) { return '\x'.((string)dechex(ord($match))); } // http://doc.spip.org/@_getJSFunction function _getJSFunction($aName) { $func = 'JSFUNCTION'.$aName; if (isset($this->$func)){ return $this->$func; } else return ''; } // JavaScript Functions used. // Note : In Dean's version, these functions are converted // with 'String(aFunctionName);'. // This internal conversion complete the original code, ex : // 'while (aBool) anAction();' is converted to // 'while (aBool) { anAction(); }'. // The JavaScript functions below are corrected. // unpacking function - this is the boot strap function // data extracted from this packing routine is passed to // this function when decoded in the target // NOTE ! : without the ';' final. var $JSFUNCTION_unpack = 'function($packed, $ascii, $count, $keywords, $encode, $decode) { while ($count--) { if ($keywords[$count]) { $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]); } } return $packed; }'; /* 'function($packed, $ascii, $count, $keywords, $encode, $decode) { while ($count--) if ($keywords[$count]) $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]); return $packed; }'; */ // code-snippet inserted into the unpacker to speed up decoding var $JSFUNCTION_decodeBody = ' if (!\'\'.replace(/^/, String)) { // decode all the values we need while ($count--) { $decode[$encode($count)] = $keywords[$count] || $encode($count); } // global replacement function $keywords = [function ($encoded) {return $decode[$encoded]}]; // generic match $encode = function () {return \'\\\\w+\'}; // reset the loop counter - we are now doing a global replace $count = 1; } '; //}; /* ' if (!\'\'.replace(/^/, String)) { // decode all the values we need while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count); // global replacement function $keywords = [function ($encoded) {return $decode[$encoded]}]; // generic match $encode = function () {return\'\\\\w+\'}; // reset the loop counter - we are now doing a global replace $count = 1; }'; */ // zero encoding // characters: 0123456789 var $JSFUNCTION_encode10 = 'function($charCode) { return $charCode; }';//;'; // inherent base36 support // characters: 0123456789abcdefghijklmnopqrstuvwxyz var $JSFUNCTION_encode36 = 'function($charCode) { return $charCode.toString(36); }';//;'; // hitch a ride on base36 and add the upper case alpha characters // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ var $JSFUNCTION_encode62 = 'function($charCode) { return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) + (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36)); }'; // use high-ascii values // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿À�?ÂÃÄÅÆÇÈÉÊËÌ�?Î�?�?ÑÒÓÔÕÖרÙÚÛÜ�?Þßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ var $JSFUNCTION_encode95 = 'function($charCode) { return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) + String.fromCharCode($charCode % _encoding + 161); }'; } class ParseMaster { var $ignoreCase = false; var $escapeChar = ''; // constants var $EXPRESSION = 0; var $REPLACEMENT = 1; var $LENGTH = 2; // used to determine nesting levels var $GROUPS = '/\\(/';//g var $SUB_REPLACE = '/\\$\\d/'; var $INDEXED = '/^\\$\\d+$/'; var $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/'; var $ESCAPE = '/\\\./';//g var $QUOTE = '/\'/'; var $DELETED = '/\\x01[^\\x01]*\\x01/';//g // http://doc.spip.org/@add function add($expression, $replacement = '') { // count the number of sub-expressions // - add one because each pattern is itself a sub-expression $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out); // treat only strings $replacement if (is_string($replacement)) { // does the pattern deal with sub-expressions? if (preg_match($this->SUB_REPLACE, $replacement)) { // a simple lookup? (e.g. "$2") if (preg_match($this->INDEXED, $replacement)) { // store the index (used for fast retrieval of matched strings) $replacement = (int)(substr($replacement, 1)) - 1; } else { // a complicated lookup (e.g. "Hello $2 $1") // build a function to do the lookup $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement)) ? '"' : "'"; $replacement = array( 'fn' => '_backReferences', 'data' => array( 'replacement' => $replacement, 'length' => $length, 'quote' => $quote ) ); } } } // pass the modified arguments if (!empty($expression)) $this->_add($expression, $replacement, $length); else $this->_add('/^$/', $replacement, $length); } // http://doc.spip.org/@exec function exec($string) { // execute the global replacement $this->_escaped = array(); // simulate the _patterns.toSTring of Dean $regexp = '/'; foreach ($this->_patterns as $reg) { $regexp .= '(' . substr($reg[$this->EXPRESSION], 1, -1) . ')|'; } $regexp = substr($regexp, 0, -1) . '/'; $regexp .= ($this->ignoreCase) ? 'i' : ''; $string = $this->_escape($string, $this->escapeChar); $string = preg_replace_callback( $regexp, array( &$this, '_replacement' ), $string ); $string = $this->_unescape($string, $this->escapeChar); return preg_replace($this->DELETED, '', $string); } // http://doc.spip.org/@reset function reset() { // clear the patterns collection so that this object may be re-used $this->_patterns = array(); } // private var $_escaped = array(); // escaped characters var $_patterns = array(); // patterns stored by index // create and add a new pattern to the patterns collection // http://doc.spip.org/@_add function _add() { $arguments = func_get_args(); $this->_patterns[] = $arguments; } // this is the global replace function (it's quite complicated) // http://doc.spip.org/@_replacement function _replacement($arguments) { if (empty($arguments)) return ''; $i = 1; $j = 0; // loop through the patterns while (isset($this->_patterns[$j])) { $pattern = $this->_patterns[$j++]; // do we have a result? if (isset($arguments[$i]) && ($arguments[$i] != '')) { $replacement = $pattern[$this->REPLACEMENT]; if (is_array($replacement) && isset($replacement['fn'])) { if (isset($replacement['data'])) $this->buffer = $replacement['data']; return call_user_func(array(&$this, $replacement['fn']), $arguments, $i); } elseif (is_int($replacement)) { return $arguments[$replacement + $i]; } $delete = ($this->escapeChar == '' || strpos($arguments[$i], $this->escapeChar) === false) ? '' : "\x01" . $arguments[$i] . "\x01"; return $delete . $replacement; // skip over references to sub-expressions } else { $i += $pattern[$this->LENGTH]; } } } // http://doc.spip.org/@_backReferences function _backReferences($match, $offset) { $replacement = $this->buffer['replacement']; $quote = $this->buffer['quote']; $i = $this->buffer['length']; while ($i) { $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement); } return $replacement; } // http://doc.spip.org/@_replace_name function _replace_name($match, $offset){ $length = strlen($match[$offset + 2]); $start = $length - max($length - strlen($match[$offset + 3]), 0); return substr($match[$offset + 1], $start, $length) . $match[$offset + 4]; } // http://doc.spip.org/@_replace_encoded function _replace_encoded($match, $offset) { return $this->buffer[$match[$offset]]; } // php : we cannot pass additional data to preg_replace_callback, // and we cannot use &$this in create_function, so let's go to lower level var $buffer; // encode escaped characters // http://doc.spip.org/@_escape function _escape($string, $escapeChar) { if ($escapeChar) { $this->buffer = $escapeChar; return preg_replace_callback( '/\\' . $escapeChar . '(.)' .'/', array(&$this, '_escapeBis'), $string ); } else { return $string; } } // http://doc.spip.org/@_escapeBis function _escapeBis($match) { $this->_escaped[] = $match[1]; return $this->buffer; } // decode escaped characters // http://doc.spip.org/@_unescape function _unescape($string, $escapeChar) { if ($escapeChar) { $regexp = '/'.'\\'.$escapeChar.'/'; $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0); return preg_replace_callback ( $regexp, array(&$this, '_unescapeBis'), $string ); } else { return $string; } } // http://doc.spip.org/@_unescapeBis function _unescapeBis() { if (!empty($this->_escaped[$this->buffer['i']])) { $temp = $this->_escaped[$this->buffer['i']]; } else { $temp = ''; } $this->buffer['i']++; return $this->buffer['escapeChar'] . $temp; } // http://doc.spip.org/@_internalEscape function _internalEscape($string) { return preg_replace($this->ESCAPE, '', $string); } } ?> safehtml/license.txt000066600000003007151451720700010540 0ustar00(c) Roman Ivanov, 2004-2005 (c) Pixel-Apes ( http://pixel-apes.com/ ), 2004-2005 (c) JetStyle ( http://jetstyle.ru/ ), 2004-2005 Maintainer -- Roman Ivanov Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. safehtml/index.php000066600000000002151451720700010165 0ustar00X safehtml/classes/HTMLSax3/States.php000066600000012027151451720700013273 0ustar00scanUntilString('<'); if ($data != '') { $context->handler_object_data-> {$context->handler_method_data}($context->htmlsax, $data); } $context->IgnoreCharacter(); return XML_HTMLSAX3_STATE_TAG; } } class XML_HTMLSax3_TagState { function parse(&$context) { switch($context->ScanCharacter()) { case '/': return XML_HTMLSAX3_STATE_CLOSING_TAG; break; case '?': return XML_HTMLSAX3_STATE_PI; break; case '%': return XML_HTMLSAX3_STATE_JASP; break; case '!': return XML_HTMLSAX3_STATE_ESCAPE; break; default: $context->unscanCharacter(); return XML_HTMLSAX3_STATE_OPENING_TAG; } } } class XML_HTMLSax3_ClosingTagState { function parse(&$context) { $tag = $context->scanUntilCharacters('/>'); if ($tag != '') { $char = $context->scanCharacter(); if ($char == '/') { $char = $context->scanCharacter(); if ($char != '>') { $context->unscanCharacter(); } } $context->handler_object_element-> {$context->handler_method_closing}($context->htmlsax, $tag, FALSE); } return XML_HTMLSAX3_STATE_START; } } class XML_HTMLSax3_OpeningTagState { function parseAttributes(&$context) { $Attributes = array(); $context->ignoreWhitespace(); $attributename = $context->scanUntilCharacters("=/> \n\r\t"); while ($attributename != '') { $attributevalue = NULL; $context->ignoreWhitespace(); $char = $context->scanCharacter(); if ($char == '=') { $context->ignoreWhitespace(); $char = $context->ScanCharacter(); if ($char == '"') { $attributevalue= $context->scanUntilString('"'); $context->IgnoreCharacter(); } else if ($char == "'") { $attributevalue = $context->scanUntilString("'"); $context->IgnoreCharacter(); } else { $context->unscanCharacter(); $attributevalue = $context->scanUntilCharacters("> \n\r\t"); } } else if ($char !== NULL) { $attributevalue = NULL; $context->unscanCharacter(); } $Attributes[$attributename] = $attributevalue; $context->ignoreWhitespace(); $attributename = $context->scanUntilCharacters("=/> \n\r\t"); } return $Attributes; } function parse(&$context) { $tag = $context->scanUntilCharacters("/> \n\r\t"); if ($tag != '') { $this->attrs = array(); $Attributes = $this->parseAttributes($context); $char = $context->scanCharacter(); if ($char == '/') { $char = $context->scanCharacter(); if ($char != '>') { $context->unscanCharacter(); } $context->handler_object_element-> {$context->handler_method_opening}($context->htmlsax, $tag, $Attributes, TRUE); $context->handler_object_element-> {$context->handler_method_closing}($context->htmlsax, $tag, TRUE); } else { $context->handler_object_element-> {$context->handler_method_opening}($context->htmlsax, $tag, $Attributes, FALSE); } } return XML_HTMLSAX3_STATE_START; } } class XML_HTMLSax3_EscapeState { function parse(&$context) { $char = $context->ScanCharacter(); if ($char == '-') { $char = $context->ScanCharacter(); if ($char == '-') { $context->unscanCharacter(); $context->unscanCharacter(); $text = $context->scanUntilString('-->'); $text .= $context->scanCharacter(); $text .= $context->scanCharacter(); } else { $context->unscanCharacter(); $text = $context->scanUntilString('>'); } } else if ( $char == '[') { $context->unscanCharacter(); $text = $context->scanUntilString(']>'); $text.= $context->scanCharacter(); } else { $context->unscanCharacter(); $text = $context->scanUntilString('>'); } $context->IgnoreCharacter(); if ($text != '') { $context->handler_object_escape-> {$context->handler_method_escape}($context->htmlsax, $text); } return XML_HTMLSAX3_STATE_START; } } class XML_HTMLSax3_JaspState { function parse(&$context) { $text = $context->scanUntilString('%>'); if ($text != '') { $context->handler_object_jasp-> {$context->handler_method_jasp}($context->htmlsax, $text); } $context->IgnoreCharacter(); $context->IgnoreCharacter(); return XML_HTMLSAX3_STATE_START; } } class XML_HTMLSax3_PiState { function parse(&$context) { $target = $context->scanUntilCharacters(" \n\r\t"); $data = $context->scanUntilString('?>'); if ($data != '') { $context->handler_object_pi-> {$context->handler_method_pi}($context->htmlsax, $target, $data); } $context->IgnoreCharacter(); $context->IgnoreCharacter(); return XML_HTMLSAX3_STATE_START; } } ?>safehtml/classes/HTMLSax3/Decorators.php000066600000007147151451720700014144 0ustar00orig_obj =& $orig_obj; $this->orig_method = $orig_method; } function trimData(&$parser, $data) { $data = trim($data); if ($data != '') { $this->orig_obj->{$this->orig_method}($parser, $data); } } } class XML_HTMLSax3_CaseFolding { var $orig_obj; var $orig_open_method; var $orig_close_method; function XML_HTMLSax3_CaseFolding(&$orig_obj, $orig_open_method, $orig_close_method) { $this->orig_obj =& $orig_obj; $this->orig_open_method = $orig_open_method; $this->orig_close_method = $orig_close_method; } function foldOpen(&$parser, $tag, $attrs=array(), $empty = FALSE) { $this->orig_obj->{$this->orig_open_method}($parser, strtoupper($tag), $attrs, $empty); } function foldClose(&$parser, $tag, $empty = FALSE) { $this->orig_obj->{$this->orig_close_method}($parser, strtoupper($tag), $empty); } } class XML_HTMLSax3_Linefeed { var $orig_obj; var $orig_method; function XML_HTMLSax3_LineFeed(&$orig_obj, $orig_method) { $this->orig_obj =& $orig_obj; $this->orig_method = $orig_method; } function breakData(&$parser, $data) { $data = explode("\n",$data); foreach ( $data as $chunk ) { $this->orig_obj->{$this->orig_method}($parser, $chunk); } } } class XML_HTMLSax3_Tab { var $orig_obj; var $orig_method; function XML_HTMLSax3_Tab(&$orig_obj, $orig_method) { $this->orig_obj =& $orig_obj; $this->orig_method = $orig_method; } function breakData(&$parser, $data) { $data = explode("\t",$data); foreach ( $data as $chunk ) { $this->orig_obj->{$this->orig_method}($this, $chunk); } } } class XML_HTMLSax3_Entities_Parsed { var $orig_obj; var $orig_method; function XML_HTMLSax3_Entities_Parsed(&$orig_obj, $orig_method) { $this->orig_obj =& $orig_obj; $this->orig_method = $orig_method; } function breakData(&$parser, $data) { $data = preg_split('/(&.+?;)/',$data,-1,PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); foreach ( $data as $chunk ) { $chunk = html_entity_decode($chunk,ENT_NOQUOTES); $this->orig_obj->{$this->orig_method}($this, $chunk); } } } if (version_compare(phpversion(), '4.3', '<') && !function_exists('html_entity_decode') ) { function html_entity_decode($str, $style=ENT_NOQUOTES) { return strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES,$style))); } } class XML_HTMLSax3_Entities_Unparsed { var $orig_obj; var $orig_method; function XML_HTMLSax3_Entities_Unparsed(&$orig_obj, $orig_method) { $this->orig_obj =& $orig_obj; $this->orig_method = $orig_method; } function breakData(&$parser, $data) { $data = preg_split('/(&.+?;)/',$data,-1,PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); foreach ( $data as $chunk ) { $this->orig_obj->{$this->orig_method}($this, $chunk); } } } class XML_HTMLSax3_Escape_Stripper { var $orig_obj; var $orig_method; function XML_HTMLSax3_Escape_Stripper(&$orig_obj, $orig_method) { $this->orig_obj =& $orig_obj; $this->orig_method = $orig_method; } function strip(&$parser, $data) { if ( substr($data,0,2) == '--' ) { $patterns = array( '/^\-\-/', // Opening comment: -- '/\-\-$/', // Closing comment: -- ); $data = preg_replace($patterns,'',$data); } else if ( substr($data,0,1) == '[' ) { $patterns = array( '/^\[.*CDATA.*\[/s', // Opening CDATA '/\].*\]$/s', // Closing CDATA ); $data = preg_replace($patterns,'',$data); } $this->orig_obj->{$this->orig_method}($this, $data); } } ?>safehtml/classes/HTMLSax3/index.php000066600000000002151451720700013125 0ustar00X safehtml/classes/index.php000066600000000002151451720700011622 0ustar00X safehtml/classes/HTMLSax3.php000066600000023731151451720700012034 0ustar00 Original port from Python | // | Authors: Harry Fuecks Port to PEAR + more | // | Authors: Many @ Sitepointforums Advanced PHP Forums | // +----------------------------------------------------------------------+ // if (!defined('_ECRIRE_INC_VERSION')) return; if (!defined('XML_HTMLSAX3')) { define('XML_HTMLSAX3', 'XML/'); } require_once(XML_HTMLSAX3 . 'HTMLSax3/States.php'); require_once(XML_HTMLSAX3 . 'HTMLSax3/Decorators.php'); class XML_HTMLSax3_StateParser { var $htmlsax; var $handler_object_element; var $handler_method_opening; var $handler_method_closing; var $handler_object_data; var $handler_method_data; var $handler_object_pi; var $handler_method_pi; var $handler_object_jasp; var $handler_method_jasp; var $handler_object_escape; var $handler_method_escape; var $handler_default; var $parser_options = array(); var $rawtext; var $position; var $length; var $State = array(); function XML_HTMLSax3_StateParser (& $htmlsax) { $this->htmlsax = & $htmlsax; $this->State[XML_HTMLSAX3_STATE_START] = new XML_HTMLSax3_StartingState(); $this->State[XML_HTMLSAX3_STATE_CLOSING_TAG] = new XML_HTMLSax3_ClosingTagState(); $this->State[XML_HTMLSAX3_STATE_TAG] = new XML_HTMLSax3_TagState(); $this->State[XML_HTMLSAX3_STATE_OPENING_TAG] = new XML_HTMLSax3_OpeningTagState(); $this->State[XML_HTMLSAX3_STATE_PI] = new XML_HTMLSax3_PiState(); $this->State[XML_HTMLSAX3_STATE_JASP] = new XML_HTMLSax3_JaspState(); $this->State[XML_HTMLSAX3_STATE_ESCAPE] = new XML_HTMLSax3_EscapeState(); } function unscanCharacter() { $this->position -= 1; } function ignoreCharacter() { $this->position += 1; } function scanCharacter() { if ($this->position < $this->length) { return $this->rawtext{$this->position++}; } } function scanUntilString($string) { $start = $this->position; $this->position = strpos($this->rawtext, $string, $start); if ($this->position === FALSE) { $this->position = $this->length; } return substr($this->rawtext, $start, $this->position - $start); } function scanUntilCharacters($string) {} function ignoreWhitespace() {} function parse($data) { if ($this->parser_options['XML_OPTION_TRIM_DATA_NODES']==1) { $decorator = new XML_HTMLSax3_Trim( $this->handler_object_data, $this->handler_method_data); $this->handler_object_data =& $decorator; $this->handler_method_data = 'trimData'; } if ($this->parser_options['XML_OPTION_CASE_FOLDING']==1) { $open_decor = new XML_HTMLSax3_CaseFolding( $this->handler_object_element, $this->handler_method_opening, $this->handler_method_closing); $this->handler_object_element =& $open_decor; $this->handler_method_opening ='foldOpen'; $this->handler_method_closing ='foldClose'; } if ($this->parser_options['XML_OPTION_LINEFEED_BREAK']==1) { $decorator = new XML_HTMLSax3_Linefeed( $this->handler_object_data, $this->handler_method_data); $this->handler_object_data =& $decorator; $this->handler_method_data = 'breakData'; } if ($this->parser_options['XML_OPTION_TAB_BREAK']==1) { $decorator = new XML_HTMLSax3_Tab( $this->handler_object_data, $this->handler_method_data); $this->handler_object_data =& $decorator; $this->handler_method_data = 'breakData'; } if ($this->parser_options['XML_OPTION_ENTITIES_UNPARSED']==1) { $decorator = new XML_HTMLSax3_Entities_Unparsed( $this->handler_object_data, $this->handler_method_data); $this->handler_object_data =& $decorator; $this->handler_method_data = 'breakData'; } if ($this->parser_options['XML_OPTION_ENTITIES_PARSED']==1) { $decorator = new XML_HTMLSax3_Entities_Parsed( $this->handler_object_data, $this->handler_method_data); $this->handler_object_data =& $decorator; $this->handler_method_data = 'breakData'; } // Note switched on by default if ($this->parser_options['XML_OPTION_STRIP_ESCAPES']==1) { $decorator = new XML_HTMLSax3_Escape_Stripper( $this->handler_object_escape, $this->handler_method_escape); $this->handler_object_escape =& $decorator; $this->handler_method_escape = 'strip'; } $this->rawtext = $data; $this->length = strlen($data); $this->position = 0; $this->_parse(); } function _parse($state = XML_HTMLSAX3_STATE_START) { do { $state = $this->State[$state]->parse($this); } while ($state != XML_HTMLSAX3_STATE_STOP && $this->position < $this->length); } } class XML_HTMLSax3_StateParser_Lt430 extends XML_HTMLSax3_StateParser { function XML_HTMLSax3_StateParser_Lt430(& $htmlsax) { parent::XML_HTMLSax3_StateParser($htmlsax); $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0; $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0; $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0; $this->parser_options['XML_OPTION_TAB_BREAK'] = 0; $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0; $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0; $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0; } function scanUntilCharacters($string) { $startpos = $this->position; while ($this->position < $this->length && strpos($string, $this->rawtext{$this->position}) === FALSE) { $this->position++; } return substr($this->rawtext, $startpos, $this->position - $startpos); } function ignoreWhitespace() { while ($this->position < $this->length && strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE) { $this->position++; } } function parse($data) { parent::parse($data); } } class XML_HTMLSax3_StateParser_Gtet430 extends XML_HTMLSax3_StateParser { function XML_HTMLSax3_StateParser_Gtet430(& $htmlsax) { parent::XML_HTMLSax3_StateParser($htmlsax); $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0; $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0; $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0; $this->parser_options['XML_OPTION_TAB_BREAK'] = 0; $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0; $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0; $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0; } function scanUntilCharacters($string) { $startpos = $this->position; $length = strcspn($this->rawtext, $string, $startpos); $this->position += $length; return substr($this->rawtext, $startpos, $length); } function ignoreWhitespace() { $this->position += strspn($this->rawtext, " \n\r\t", $this->position); } function parse($data) { parent::parse($data); } } class XML_HTMLSax3_NullHandler { function DoNothing() { } } class XML_HTMLSax3 { var $state_parser; function XML_HTMLSax3() { if (version_compare(phpversion(), '4.3', 'ge')) { $this->state_parser = new XML_HTMLSax3_StateParser_Gtet430($this); } else { $this->state_parser = new XML_HTMLSax3_StateParser_Lt430($this); } $nullhandler = new XML_HTMLSax3_NullHandler(); $this->set_object($nullhandler); $this->set_element_handler('DoNothing', 'DoNothing'); $this->set_data_handler('DoNothing'); $this->set_pi_handler('DoNothing'); $this->set_jasp_handler('DoNothing'); $this->set_escape_handler('DoNothing'); } function set_object(&$object) { if ( is_object($object) ) { $this->state_parser->handler_default =& $object; return true; } else { require_once('PEAR.php'); PEAR::raiseError('XML_HTMLSax3::set_object requires '. 'an object instance'); } } function set_option($name, $value=1) { if ( array_key_exists($name,$this->state_parser->parser_options) ) { $this->state_parser->parser_options[$name] = $value; return true; } else { require_once('PEAR.php'); PEAR::raiseError('XML_HTMLSax3::set_option('.$name.') illegal'); } } function set_data_handler($data_method) { $this->state_parser->handler_object_data =& $this->state_parser->handler_default; $this->state_parser->handler_method_data = $data_method; } function set_element_handler($opening_method, $closing_method) { $this->state_parser->handler_object_element =& $this->state_parser->handler_default; $this->state_parser->handler_method_opening = $opening_method; $this->state_parser->handler_method_closing = $closing_method; } function set_pi_handler($pi_method) { $this->state_parser->handler_object_pi =& $this->state_parser->handler_default; $this->state_parser->handler_method_pi = $pi_method; } function set_escape_handler($escape_method) { $this->state_parser->handler_object_escape =& $this->state_parser->handler_default; $this->state_parser->handler_method_escape = $escape_method; } function set_jasp_handler ($jasp_method) { $this->state_parser->handler_object_jasp =& $this->state_parser->handler_default; $this->state_parser->handler_method_jasp = $jasp_method; } function get_current_position() { return $this->state_parser->position; } function get_length() { return $this->state_parser->length; } function parse($data) { $this->state_parser->parse($data); } } ?>safehtml/classes/safehtml.php000066600000022075151451720700012334 0ustar00 * @copyright 2004-2005 Roman Ivanov * @license http://www.debian.org/misc/bsd.license BSD License (3 Clause) * @version 1.3.7 * @link http://pixel-apes.com/safehtml/ */ if (!defined('_ECRIRE_INC_VERSION')) return; require_once(XML_HTMLSAX3 . 'HTMLSax3.php'); class SafeHTML { var $_xhtml = ''; var $_counter = array(); var $_stack = array(); var $_dcCounter = array(); var $_dcStack = array(); var $_listScope = 0; var $_liStack = array(); var $_protoRegexps = array(); var $_cssRegexps = array(); var $singleTags = array('area', 'br', 'img', 'input', 'hr', 'wbr', ); var $deleteTags = array( 'applet', 'base', 'basefont', 'bgsound', 'blink', 'body', 'embed', 'frame', 'frameset', 'head', 'html', 'ilayer', 'iframe', 'layer', 'link', 'meta', 'object', 'style', 'title', 'script', ); var $deleteTagsContent = array('script', 'style', 'title', 'xml', ); var $protocolFiltering = 'white'; var $blackProtocols = array( 'about', 'chrome', 'data', 'disk', 'hcp', 'help', 'javascript', 'livescript', 'lynxcgi', 'lynxexec', 'ms-help', 'ms-its', 'mhtml', 'mocha', 'opera', 'res', 'resource', 'shell', 'vbscript', 'view-source', 'vnd.ms.radio', 'wysiwyg', ); var $whiteProtocols = array( 'ed2k', 'file', 'ftp', 'gopher', 'http', 'https', 'irc', 'mailto', 'news', 'nntp', 'telnet', 'webcal', 'xmpp', 'callto', ); var $protocolAttributes = array( 'action', 'background', 'codebase', 'dynsrc', 'href', 'lowsrc', 'src', ); var $cssKeywords = array( 'absolute', 'behavior', 'behaviour', 'content', 'expression', 'fixed', 'include-source', 'moz-binding', ); var $noClose = array(); var $closeParagraph = array( 'address', 'blockquote', 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'isindex', 'listing', 'marquee', 'menu', 'multicol', 'ol', 'p', 'plaintext', 'pre', 'table', 'ul', 'xmp', ); var $tableTags = array( 'caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', ); var $listTags = array('dir', 'menu', 'ol', 'ul', 'dl', ); var $attributes = array('dynsrc', 'id', 'name', ); var $attributesNS = array('xml:lang', ); function SafeHTML() { //making regular expressions based on Proto & CSS arrays foreach ($this->blackProtocols as $proto) { $preg = "/[\s\x01-\x1F]*"; for ($i=0; $i_protoRegexps[] = $preg; } foreach ($this->cssKeywords as $css) { $this->_cssRegexps[] = '/' . $css . '/i'; } return true; } function _writeAttrs ($attrs) { if (is_array($attrs)) { foreach ($attrs as $name => $value) { $name = strtolower($name); if (strpos($name, 'on') === 0) { continue; } if (strpos($name, 'data') === 0) { continue; } if (in_array($name, $this->attributes)) { continue; } if (!preg_match("/^[a-z0-9-]+$/i", $name)) { if (!in_array($name, $this->attributesNS)) { continue; } } if (($value === TRUE) || (is_null($value))) { $value = $name; } if ($name == 'style') { // removes insignificant backslahes $value = str_replace("\\", '', $value); // removes CSS comments while (1) { $_value = preg_replace("!/\*.*?\*/!s", '', $value); if ($_value == $value) break; $value = $_value; } // replace all & to & $value = str_replace('&', '&', $value); $value = str_replace('&', '&', $value); foreach ($this->_cssRegexps as $css) { if (preg_match($css, $value)) { continue 2; } } foreach ($this->_protoRegexps as $proto) { if (preg_match($proto, $value)) { continue 2; } } } $tempval = preg_replace('/&#(\d+);?/me', "chr('\\1')", $value); //"' $tempval = preg_replace('/&#x([0-9a-f]+);?/mei', "chr(hexdec('\\1'))", $tempval); if ((in_array($name, $this->protocolAttributes)) && (strpos($tempval, ':') !== false)) { if ($this->protocolFiltering == 'black') { foreach ($this->_protoRegexps as $proto) { if (preg_match($proto, $tempval)) continue 2; } } else { $_tempval = explode(':', $tempval); $proto = $_tempval[0]; if (!in_array($proto, $this->whiteProtocols)) { continue; } } } $value = str_replace("\"", """, $value); $this->_xhtml .= ' ' . $name . '="' . $value . '"'; } } return true; } function _openHandler(&$parser, $name, $attrs) { $name = strtolower($name); if (in_array($name, $this->deleteTagsContent)) { array_push($this->_dcStack, $name); $this->_dcCounter[$name] = isset($this->_dcCounter[$name]) ? $this->_dcCounter[$name]+1 : 1; } if (count($this->_dcStack) != 0) { return true; } if (in_array($name, $this->deleteTags)) { return true; } if (!preg_match("/^[a-z0-9]+$/i", $name)) { if (preg_match("!(?:\@|://)!i", $name)) { $this->_xhtml .= '<' . $name . '>'; } return true; } if (in_array($name, $this->singleTags)) { $this->_xhtml .= '<' . $name; $this->_writeAttrs($attrs); $this->_xhtml .= ' />'; return true; } // TABLES: cannot open table elements when we are not inside table if ((isset($this->_counter['table'])) && ($this->_counter['table'] <= 0) && (in_array($name, $this->tableTags))) { return true; } // PARAGRAPHS: close paragraph when closeParagraph tags opening if ((in_array($name, $this->closeParagraph)) && (in_array('p', $this->_stack))) { $this->_closeHandler($parser, 'p'); } // LISTS: we should close
  • if
  • of the same level opening if ($name == 'li' && count($this->_liStack) && $this->_listScope == $this->_liStack[count($this->_liStack)-1]) { $this->_closeHandler($parser, 'li'); } // LISTS: we want to know on what nesting level of lists we are if (in_array($name, $this->listTags)) { $this->_listScope++; } if ($name == 'li') { array_push($this->_liStack, $this->_listScope); } $this->_xhtml .= '<' . $name; $this->_writeAttrs($attrs); $this->_xhtml .= '>'; array_push($this->_stack,$name); $this->_counter[$name] = isset($this->_counter[$name]) ? $this->_counter[$name]+1 : 1; return true; } function _closeHandler(&$parser, $name) { $name = strtolower($name); if (isset($this->_dcCounter[$name]) && ($this->_dcCounter[$name] > 0) && (in_array($name, $this->deleteTagsContent))) { while ($name != ($tag = array_pop($this->_dcStack))) { $this->_dcCounter[$tag]--; } $this->_dcCounter[$name]--; } if (count($this->_dcStack) != 0) { return true; } if ((isset($this->_counter[$name])) && ($this->_counter[$name] > 0)) { while ($name != ($tag = array_pop($this->_stack))) { $this->_closeTag($tag); } $this->_closeTag($name); } return true; } function _closeTag($tag) { if (!in_array($tag, $this->noClose)) { $this->_xhtml .= ''; } $this->_counter[$tag]--; if (in_array($tag, $this->listTags)) { $this->_listScope--; } if ($tag == 'li') { array_pop($this->_liStack); } return true; } function _dataHandler(&$parser, $data) { if (count($this->_dcStack) == 0) { $this->_xhtml .= $data; } return true; } function _escapeHandler(&$parser, $data) { return true; } function getXHTML () { while ($tag = array_pop($this->_stack)) { $this->_closeTag($tag); } return $this->_xhtml; } function clear() { $this->_xhtml = ''; return true; } function parse($doc) { // Save all '<' symbols $doc = preg_replace("/<(?=[^a-zA-Z\/\!\?\%])/", '<', $doc); // Web documents shouldn't contains \x00 symbol $doc = str_replace("\x00", '', $doc); // Opera6 bug workaround $doc = str_replace("\xC0\xBC", '<', $doc); // UTF-7 encoding ASCII decode $doc = $this->repackUTF7($doc); // Instantiate the parser $parser= new XML_HTMLSax3(); // Set up the parser $parser->set_object($this); $parser->set_element_handler('_openHandler','_closeHandler'); $parser->set_data_handler('_dataHandler'); $parser->set_escape_handler('_escapeHandler'); $parser->parse($doc); return $this->getXHTML(); } function repackUTF7($str) { return preg_replace_callback('!\+([0-9a-zA-Z/]+)\-!', array($this, 'repackUTF7Callback'), $str); } function repackUTF7Callback($str) { $str = base64_decode($str[1]); $str = preg_replace_callback('/^((?:\x00.)*)((?:[^\x00].)+)/', array($this, 'repackUTF7Back'), $str); return preg_replace('/\x00(.)/', '$1', $str); } function repackUTF7Back($str) { return $str[1].'+'.rtrim(base64_encode($str[2]), '=').'-'; } } ?> safehtml/readme-SPIP.txt000066600000000441151451720700011123 0ustar00 SafeHTML pour SPIP -------- Version 1.3.7. http://pixel-apes.com/safehtml/ -------- Ce repertoire est extrait de SafeHTML 1.3.7 ; les fichiers suivants ont ete supprimes : safehtml/safehtml.php safehtml/tests/* On s'appuie sur la version sans commentaires (un peu plus legere) safehtml/readme.txt000066600000007444151451720700010364 0ustar00SafeHTML -------- Version 1.3.7. http://pixel-apes.com/safehtml/ -------- This parser strips down all potentially dangerous content within HTML: * opening tag without its closing tag * closing tag without its opening tag * any of these tags: "base", "basefont", "head", "html", "body", "applet", "object", "iframe", "frame", "frameset", "script", "layer", "ilayer", "embed", "bgsound", "link", "meta", "style", "title", "blink", "xml" etc. * any of these attributes: on*, data*, dynsrc * javascript:/vbscript:/about: etc. protocols * expression/behavior etc. in styles * any other active content It also tries to convert code to XHTML valid, but htmltidy is far better solution for this task. If you found any bugs in this parser, please inform me -- ICQ:551593 or mailto:thingol@mail.ru Please, subscribe to http://pixel-apes.com/safehtml/feed/rss feed in order to receive notices when SAFEHTML will be updated. -- Roman Ivanov. -- Pixel-Apes ( http://pixel-apes.com ). -- JetStyle ( http://jetstyle.ru/ ). -------- Version history: -------- 1.3.7. * Added 'dl' to the list of 'lists' tags. * Added 'callto' to the white list of protocols. * Added white list of "namespaced" attributes. 1.3.6. * More accurate UTF-7 decoding. 1.3.5. * Two serious security flaws fixed: UTF-7 XSS and CSS comments handling. 1.3.2. * Security flaw (improper quotes handling in attributes' values) fixed. Big thanks to Nick Cleaton. 1.3.1. * Dumb bug fixed (some closing tags were ignored). 1.3.0. * Two holes (with decimal HTML entities and with \x00 symbol) fixed. * Class rewritten under PEAR coding standarts. * Class now uses unmodified HTMLSax3 from PEAR. * To the list of table tags added: "caption", "col", "colgroup". 1.2.1. * It was possible to create XSS with hexadecimal HTML entities. Fixed. Big thanks to Christian Stocker. 1.2.0. * "id" and "name" attributes added to dangerous attributes list, because malefactor can broke legal javascript by spoofing ID or NAME of some element. * New method parse() allows to do all parsing process in two lines of code. Examples also updated. * New array, closeParagraph, contains list of block-level elements. When we open such elemet, we should close paragraph before. . It allows SafeHTML to produce more XHTML compliant code. * Added "webcal" to white list of protocols for those who uses calendar programs (Mozilla/iCal/etc). * Now SafeHTML strips down table elements when we are not inside table. * Now SafeHTML correctly closes unclosed "li" tags: before opening "li" of the same nesting level. 1.1.0. * New "dangerous" protocols: hcp, ms-help, help, disk, vnd.ms.radio, opera, res, resource, chrome, mocha, livescript. * tag was moved from "tags for deletion" to "tags for deletion with content". * New "dangerous" CSS instruction "include-source" (NN4 specific). * New array, Attributes, contains list of attributes for removal. If you need to remove "id" or "name" attribute, just add it to this array. * Now it is possible to choose between white-list and black-list filtering of protocols. Defaults are "white-list". This list is: "http", "https", "ftp", "telnet", "news", "nntp", "gopher", "mailto", "file". * For speed purposes, we now filter protocols only from these attributes: src, href, action, lowsrc, dynsrc, background, codebase. * Opera6 XSS bug ([\xC0][\xBC]script>alert(1)[\xC0][\xBC]/script> [UTF-8] workarounded. 1.0.4. New "dangerous" tag: plaintext. 1.0.3. Added array of elements that can have no closing tag. 1.0.2. Bug fix: attack. Thanks to shmel. 1.0.1. Bug fix: safehtml hangs on code. Thanks to lj user=electrocat. 1.0.0. First public release JavaScriptPacker/class.JavaScriptPacker.php000066600000060140151453276310014765 0ustar00pack(); * * or * * $myPacker = new JavaScriptPacker($script, 'Normal', true, false); * $packed = $myPacker->pack(); * * or (default values) * * $myPacker = new JavaScriptPacker($script); * $packed = $myPacker->pack(); * * * params of the constructor : * $script: the JavaScript to pack, string. * $encoding: level of encoding, int or string : * 0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'. * default: 62. * $fastDecode: include the fast decoder in the packed result, boolean. * default : true. * $specialChars: if you are flagged your private and local variables * in the script, boolean. * default: false. * * The pack() method return the compressed JavasScript, as a string. * * see http://dean.edwards.name/packer/usage/ for more information. * * Notes : * # [del]need PHP 5 . Tested with PHP 5.1.2[/del] * this is a modified version for PHP 4 * * # The packed result may be different than with the Dean Edwards * version, but with the same length. The reason is that the PHP * function usort to sort array don't necessarily preserve the * original order of two equal member. The Javascript sort function * in fact preserve this order (but that's not require by the * ECMAScript standard). So the encoded keywords order can be * different in the two results. * * # Be careful with the 'High ASCII' Level encoding if you use * UTF-8 in your files... */ /* * modified by Mark Fabrizio Jr. to work with php 4 */ class JavaScriptPacker { var $IGNORE = '$1'; // validate parameters var $_script = ''; var $_encoding = 62; var $_fastDecode = true; var $_specialChars = false; var $LITERAL_ENCODING = array( 'None' => 0, 'Numeric' => 10, 'Normal' => 62, 'High ASCII' => 95 ); // http://doc.spip.org/@JavaScriptPacker function JavaScriptPacker($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false) { $this->_script = $_script . "\n"; if (array_key_exists($_encoding, $this->LITERAL_ENCODING)) $_encoding = $this->LITERAL_ENCODING[$_encoding]; $this->_encoding = min((int)$_encoding, 95); $this->_fastDecode = $_fastDecode; $this->_specialChars = $_specialChars; } // http://doc.spip.org/@pack function pack() { $this->_addParser('_basicCompression'); if ($this->_specialChars) $this->_addParser('_encodeSpecialChars'); if ($this->_encoding) $this->_addParser('_encodeKeywords'); // go! return $this->_pack($this->_script); } // apply all parsing routines // http://doc.spip.org/@_pack function _pack($script) { for ($i = 0; isset($this->_parsers[$i]); $i++) { $script = call_user_func(array(&$this,$this->_parsers[$i]), $script); } return $script; } // keep a list of parsing functions, they'll be executed all at once var $_parsers = array(); // http://doc.spip.org/@_addParser function _addParser($parser) { $this->_parsers[] = $parser; } // zero encoding - just removal of white space and comments // http://doc.spip.org/@_basicCompression function _basicCompression($script) { $parser = new ParseMaster(); // make safe $parser->escapeChar = '\\'; // protect strings $parser->add('/\'[^\'\\n\\r]*\'/',$this->IGNORE); $parser->add('/"[^"\\n\\r]*"/', $this->IGNORE); // remove comments $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' '); $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' '); // protect regular expressions $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', $this->IGNORE); // remove: ;;; doSomething(); if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/'); // remove redundant semi-colons $parser->add('/\\(;;\\)/', $this->IGNORE); // protect for (;;) loops $parser->add('/;+\\s*([};])/', '$2'); // apply the above $script = $parser->exec($script); // remove white-space # $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3'); # $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3'); # $parser->add('/\\s+/', ''); # Modif fil@rezo.net pour conserver les \n $parser->add('/(\\b|\\x24)[\\t ]+(\\b|\\x24)/', '$2 $3'); $parser->add('/([+\\-])[\\t ]+([+\\-])/', '$2 $3'); $parser->add('/[\\t ]+/', ''); $parser->add('/\\s+/', "\n"); // done return $parser->exec($script); } // http://doc.spip.org/@_encodeSpecialChars function _encodeSpecialChars($script) { $parser = new ParseMaster(); // replace: $name -> n, $$name -> na $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/', array('fn' => '_replace_name') ); // replace: _name -> _0, double-underscore (__name) is ignored $regexp = '/\\b_[A-Za-z\\d]\\w*/'; // build the word list $keywords = $this->_analyze($script, $regexp, '_encodePrivate'); // quick ref $encoded = $keywords['encoded']; $parser->add($regexp, array( 'fn' => '_replace_encoded', 'data' => $encoded ) ); return $parser->exec($script); } // http://doc.spip.org/@_encodeKeywords function _encodeKeywords($script) { // escape high-ascii values already in the script (i.e. in strings) if ($this->_encoding > 62) $script = $this->_escape95($script); // create the parser $parser = new ParseMaster(); $encode = $this->_getEncoder($this->_encoding); // for high-ascii, don't encode single character low-ascii $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/'; // build the word list $keywords = $this->_analyze($script, $regexp, $encode); $encoded = $keywords['encoded']; // encode $parser->add($regexp, array( 'fn' => '_replace_encoded', 'data' => $encoded ) ); if (empty($script)) return $script; else { //$res = $parser->exec($script); //$res = $this->_bootStrap($res, $keywords); //return $res; return $this->_bootStrap($parser->exec($script), $keywords); } } // http://doc.spip.org/@_analyze function _analyze($script, $regexp, $encode) { // analyse // retreive all words in the script $all = array(); preg_match_all($regexp, $script, $all); $_sorted = array(); // list of words sorted by frequency $_encoded = array(); // dictionary of word->encoding $_protected = array(); // instances of "protected" words $all = $all[0]; // simulate the javascript comportement of global match if (!empty($all)) { $unsorted = array(); // same list, not sorted $protected = array(); // "protected" words (dictionary of word->"word") $value = array(); // dictionary of charCode->encoding (eg. 256->ff) $this->_count = array(); // word->count $i = count($all); $j = 0; //$word = null; // count the occurrences - used for sorting later do { --$i; $word = '$' . $all[$i]; if (!isset($this->_count[$word])) { $this->_count[$word] = 0; $unsorted[$j] = $word; // make a dictionary of all of the protected words in this script // these are words that might be mistaken for encoding //if (is_string($encode) && method_exists($this, $encode)) $values[$j] = call_user_func(array(&$this, $encode), $j); $protected['$' . $values[$j]] = $j++; } // increment the word counter $this->_count[$word]++; } while ($i > 0); // prepare to sort the word list, first we must protect // words that are also used as codes. we assign them a code // equivalent to the word itself. // e.g. if "do" falls within our encoding range // then we store keywords["do"] = "do"; // this avoids problems when decoding $i = count($unsorted); do { $word = $unsorted[--$i]; if (isset($protected[$word]) /*!= null*/) { $_sorted[$protected[$word]] = substr($word, 1); $_protected[$protected[$word]] = true; $this->_count[$word] = 0; } } while ($i); // sort the words by frequency // Note: the javascript and php version of sort can be different : // in php manual, usort : // " If two members compare as equal, // their order in the sorted array is undefined." // so the final packed script is different of the Dean's javascript version // but equivalent. // the ECMAscript standard does not guarantee this behaviour, // and thus not all browsers (e.g. Mozilla versions dating back to at // least 2003) respect this. usort($unsorted, array(&$this, '_sortWords')); $j = 0; // because there are "protected" words in the list // we must add the sorted words around them do { if (!isset($_sorted[$i])) $_sorted[$i] = substr($unsorted[$j++], 1); $_encoded[$_sorted[$i]] = $values[$i]; } while (++$i < count($unsorted)); } return array( 'sorted' => $_sorted, 'encoded' => $_encoded, 'protected' => $_protected); } var $_count = array(); // http://doc.spip.org/@_sortWords function _sortWords($match1, $match2) { return $this->_count[$match2] - $this->_count[$match1]; } // build the boot function used for loading and decoding // http://doc.spip.org/@_bootStrap function _bootStrap($packed, $keywords) { $ENCODE = $this->_safeRegExp('$encode\\($count\\)'); // $packed: the packed script $packed = "'" . $this->_escape($packed) . "'"; // $ascii: base for encoding $ascii = min(count($keywords['sorted']), $this->_encoding); if ($ascii == 0) $ascii = 1; // $count: number of words contained in the script $count = count($keywords['sorted']); // $keywords: list of words contained in the script foreach ($keywords['protected'] as $i=>$value) { $keywords['sorted'][$i] = ''; } // convert from a string to an array ksort($keywords['sorted']); $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')"; $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii); $encode = $this->_getJSFunction($encode); $encode = preg_replace('/_encoding/','$ascii', $encode); $encode = preg_replace('/arguments\\.callee/','$encode', $encode); $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : ''); // $decode: code snippet to speed up decoding if ($this->_fastDecode) { // create the decoder $decode = $this->_getJSFunction('_decodeBody'); if ($this->_encoding > 62) $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode); // perform the encoding inline for lower ascii values elseif ($ascii < 36) $decode = preg_replace($ENCODE, $inline, $decode); // special case: when $count==0 there are no keywords. I want to keep // the basic shape of the unpacking funcion so i'll frig the code... if ($count == 0) $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1); } // boot function $unpack = $this->_getJSFunction('_unpack'); if ($this->_fastDecode) { // insert the decoder $this->buffer = $decode; $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1); } $unpack = preg_replace('/"/', "'", $unpack); if ($this->_encoding > 62) { // high-ascii // get rid of the word-boundaries for regexp matches $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack); } if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) { // insert the encode function $this->buffer = $encode; $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1); } else { // perform the encoding inline $unpack = preg_replace($ENCODE, $inline, $unpack); } // pack the boot function too $unpackPacker = new JavaScriptPacker($unpack, 0, false, true); $unpack = $unpackPacker->pack(); // arguments $params = array($packed, $ascii, $count, $keywords); if ($this->_fastDecode) { $params[] = 0; $params[] = '{}'; } $params = implode(',', $params); // the whole thing return 'eval(' . $unpack . '(' . $params . "))\n"; } var $buffer; // http://doc.spip.org/@_insertFastDecode function _insertFastDecode($match) { return '{' . $this->buffer . ';'; } // http://doc.spip.org/@_insertFastEncode function _insertFastEncode($match) { return '{$encode=' . $this->buffer . ';'; } // mmm.. ..which one do i need ?? // http://doc.spip.org/@_getEncoder function _getEncoder($ascii) { return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ? '_encode95' : '_encode62' : '_encode36' : '_encode10'; } // zero encoding // characters: 0123456789 // http://doc.spip.org/@_encode10 function _encode10($charCode) { return $charCode; } // inherent base36 support // characters: 0123456789abcdefghijklmnopqrstuvwxyz // http://doc.spip.org/@_encode36 function _encode36($charCode) { return base_convert($charCode, 10, 36); } // hitch a ride on base36 and add the upper case alpha characters // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ // http://doc.spip.org/@_encode62 function _encode62($charCode) { $res = ''; if ($charCode >= $this->_encoding) { $res = $this->_encode62((int)($charCode / $this->_encoding)); } $charCode = $charCode % $this->_encoding; if ($charCode > 35) return $res . chr($charCode + 29); else return $res . base_convert($charCode, 10, 36); } // use high-ascii values // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿À?ÂÃÄÅÆÇÈÉÊËÌ?Î??ÑÒÓÔÕÖרÙÚÛÜ?Þßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ // http://doc.spip.org/@_encode95 function _encode95($charCode) { $res = ''; if ($charCode >= $this->_encoding) $res = $this->_encode95($charCode / $this->_encoding); return $res . chr(($charCode % $this->_encoding) + 161); } // http://doc.spip.org/@_safeRegExp function _safeRegExp($string) { return '/'.preg_replace('/\$/', '\\\$', $string).'/'; } // http://doc.spip.org/@_encodePrivate function _encodePrivate($charCode) { return "_" . $charCode; } // protect characters used by the parser // http://doc.spip.org/@_escape function _escape($script) { return preg_replace('/([\\\\\'])/', '\\\$1', $script); } // protect high-ascii characters already in the script // http://doc.spip.org/@_escape95 function _escape95($script) { return preg_replace_callback( '/[\\xa1-\\xff]/', array(&$this, '_escape95Bis'), $script ); } // http://doc.spip.org/@_escape95Bis function _escape95Bis($match) { return '\x'.((string)dechex(ord($match))); } // http://doc.spip.org/@_getJSFunction function _getJSFunction($aName) { $func = 'JSFUNCTION'.$aName; if (isset($this->$func)){ return $this->$func; } else return ''; } // JavaScript Functions used. // Note : In Dean's version, these functions are converted // with 'String(aFunctionName);'. // This internal conversion complete the original code, ex : // 'while (aBool) anAction();' is converted to // 'while (aBool) { anAction(); }'. // The JavaScript functions below are corrected. // unpacking function - this is the boot strap function // data extracted from this packing routine is passed to // this function when decoded in the target // NOTE ! : without the ';' final. var $JSFUNCTION_unpack = 'function($packed, $ascii, $count, $keywords, $encode, $decode) { while ($count--) { if ($keywords[$count]) { $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]); } } return $packed; }'; /* 'function($packed, $ascii, $count, $keywords, $encode, $decode) { while ($count--) if ($keywords[$count]) $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]); return $packed; }'; */ // code-snippet inserted into the unpacker to speed up decoding var $JSFUNCTION_decodeBody = ' if (!\'\'.replace(/^/, String)) { // decode all the values we need while ($count--) { $decode[$encode($count)] = $keywords[$count] || $encode($count); } // global replacement function $keywords = [function ($encoded) {return $decode[$encoded]}]; // generic match $encode = function () {return \'\\\\w+\'}; // reset the loop counter - we are now doing a global replace $count = 1; } '; //}; /* ' if (!\'\'.replace(/^/, String)) { // decode all the values we need while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count); // global replacement function $keywords = [function ($encoded) {return $decode[$encoded]}]; // generic match $encode = function () {return\'\\\\w+\'}; // reset the loop counter - we are now doing a global replace $count = 1; }'; */ // zero encoding // characters: 0123456789 var $JSFUNCTION_encode10 = 'function($charCode) { return $charCode; }';//;'; // inherent base36 support // characters: 0123456789abcdefghijklmnopqrstuvwxyz var $JSFUNCTION_encode36 = 'function($charCode) { return $charCode.toString(36); }';//;'; // hitch a ride on base36 and add the upper case alpha characters // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ var $JSFUNCTION_encode62 = 'function($charCode) { return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) + (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36)); }'; // use high-ascii values // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿À?ÂÃÄÅÆÇÈÉÊËÌ?Î??ÑÒÓÔÕÖרÙÚÛÜ?Þßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ var $JSFUNCTION_encode95 = 'function($charCode) { return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) + String.fromCharCode($charCode % _encoding + 161); }'; } class ParseMaster { var $ignoreCase = false; var $escapeChar = ''; // constants var $EXPRESSION = 0; var $REPLACEMENT = 1; var $LENGTH = 2; // used to determine nesting levels var $GROUPS = '/\\(/';//g var $SUB_REPLACE = '/\\$\\d/'; var $INDEXED = '/^\\$\\d+$/'; var $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/'; var $ESCAPE = '/\\\./';//g var $QUOTE = '/\'/'; var $DELETED = '/\\x01[^\\x01]*\\x01/';//g // http://doc.spip.org/@add function add($expression, $replacement = '') { // count the number of sub-expressions // - add one because each pattern is itself a sub-expression $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out); // treat only strings $replacement if (is_string($replacement)) { // does the pattern deal with sub-expressions? if (preg_match($this->SUB_REPLACE, $replacement)) { // a simple lookup? (e.g. "$2") if (preg_match($this->INDEXED, $replacement)) { // store the index (used for fast retrieval of matched strings) $replacement = (int)(substr($replacement, 1)) - 1; } else { // a complicated lookup (e.g. "Hello $2 $1") // build a function to do the lookup $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement)) ? '"' : "'"; $replacement = array( 'fn' => '_backReferences', 'data' => array( 'replacement' => $replacement, 'length' => $length, 'quote' => $quote ) ); } } } // pass the modified arguments if (!empty($expression)) $this->_add($expression, $replacement, $length); else $this->_add('/^$/', $replacement, $length); } // http://doc.spip.org/@exec function exec($string) { // execute the global replacement $this->_escaped = array(); // simulate the _patterns.toSTring of Dean $regexp = '/'; foreach ($this->_patterns as $reg) { $regexp .= '(' . substr($reg[$this->EXPRESSION], 1, -1) . ')|'; } $regexp = substr($regexp, 0, -1) . '/'; $regexp .= ($this->ignoreCase) ? 'i' : ''; $string = $this->_escape($string, $this->escapeChar); $string = preg_replace_callback( $regexp, array( &$this, '_replacement' ), $string ); $string = $this->_unescape($string, $this->escapeChar); return preg_replace($this->DELETED, '', $string); } // http://doc.spip.org/@reset function reset() { // clear the patterns collection so that this object may be re-used $this->_patterns = array(); } // private var $_escaped = array(); // escaped characters var $_patterns = array(); // patterns stored by index // create and add a new pattern to the patterns collection // http://doc.spip.org/@_add function _add() { $arguments = func_get_args(); $this->_patterns[] = $arguments; } // this is the global replace function (it's quite complicated) // http://doc.spip.org/@_replacement function _replacement($arguments) { if (empty($arguments)) return ''; $i = 1; $j = 0; // loop through the patterns while (isset($this->_patterns[$j])) { $pattern = $this->_patterns[$j++]; // do we have a result? if (isset($arguments[$i]) && ($arguments[$i] != '')) { $replacement = $pattern[$this->REPLACEMENT]; if (is_array($replacement) && isset($replacement['fn'])) { if (isset($replacement['data'])) $this->buffer = $replacement['data']; return call_user_func(array(&$this, $replacement['fn']), $arguments, $i); } elseif (is_int($replacement)) { return $arguments[$replacement + $i]; } $delete = ($this->escapeChar == '' || strpos($arguments[$i], $this->escapeChar) === false) ? '' : "\x01" . $arguments[$i] . "\x01"; return $delete . $replacement; // skip over references to sub-expressions } else { $i += $pattern[$this->LENGTH]; } } } // http://doc.spip.org/@_backReferences function _backReferences($match, $offset) { $replacement = $this->buffer['replacement']; $quote = $this->buffer['quote']; $i = $this->buffer['length']; while ($i) { $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement); } return $replacement; } // http://doc.spip.org/@_replace_name function _replace_name($match, $offset){ $length = strlen($match[$offset + 2]); $start = $length - max($length - strlen($match[$offset + 3]), 0); return substr($match[$offset + 1], $start, $length) . $match[$offset + 4]; } // http://doc.spip.org/@_replace_encoded function _replace_encoded($match, $offset) { return $this->buffer[$match[$offset]]; } // php : we cannot pass additional data to preg_replace_callback, // and we cannot use &$this in create_function, so let's go to lower level var $buffer; // encode escaped characters // http://doc.spip.org/@_escape function _escape($string, $escapeChar) { if ($escapeChar) { $this->buffer = $escapeChar; return preg_replace_callback( '/\\' . $escapeChar . '(.)' .'/', array(&$this, '_escapeBis'), $string ); } else { return $string; } } // http://doc.spip.org/@_escapeBis function _escapeBis($match) { $this->_escaped[] = $match[1]; return $this->buffer; } // decode escaped characters // http://doc.spip.org/@_unescape function _unescape($string, $escapeChar) { if ($escapeChar) { $regexp = '/'.'\\'.$escapeChar.'/'; $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0); return preg_replace_callback ( $regexp, array(&$this, '_unescapeBis'), $string ); } else { return $string; } } // http://doc.spip.org/@_unescapeBis function _unescapeBis() { if (!empty($this->_escaped[$this->buffer['i']])) { $temp = $this->_escaped[$this->buffer['i']]; } else { $temp = ''; } $this->buffer['i']++; return $this->buffer['escapeChar'] . $temp; } // http://doc.spip.org/@_internalEscape function _internalEscape($string) { return preg_replace($this->ESCAPE, '', $string); } } ?>