update IDNA-class to latest version 0.8.0

This commit is contained in:
Michael Kaufmann (d00p)
2013-06-06 15:07:43 +02:00
parent 21aabb154d
commit 6bde0820c6

View File

@@ -23,6 +23,8 @@
// }}}
// Source for updates: http://phlymail.com/en/downloads/idna-convert.html
/**
* Encode/decode Internationalized Domain Names.
*
@@ -47,8 +49,8 @@
* ACE input and output is always expected to be ASCII.
*
* @author Matthias Sommerfeld <mso@phlylabs.de>
* @copyright 2004-2010 phlyLabs Berlin, http://phlylabs.de
* @version 0.7.0 2010-11-20
* @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de
* @version 0.8.0 2011-03-11
*/
class idna_convert
{
@@ -76,11 +78,14 @@ class idna_convert
protected $_scount = 11172; // _lcount * _tcount * _vcount
protected $_error = false;
protected static $_mb_string_overload = null;
// See {@link set_paramter()} for details of how to change the following
// settings from within your script / application
protected $_api_encoding = 'utf8'; // Default input charset is UTF-8
protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
protected $_strict_mode = false; // Behave strict or not
protected $_idn_version = 2003; // Can be either 2003 (old, default) or 2008
/**
* the constructor
@@ -93,7 +98,15 @@ class idna_convert
{
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
// If parameters are given, pass these to the respective method
if (is_array($options)) return $this->set_parameter($options);
if (is_array($options)) {
$this->set_parameter($options);
}
// populate mbstring overloading cache if not set
if (self::$_mb_string_overload === null) {
self::$_mb_string_overload = (extension_loaded('mbstring')
&& (ini_get('mbstring.func_overload') & 0x02) === 0x02);
}
}
/**
@@ -136,11 +149,18 @@ class idna_convert
case 'strict':
$this->_strict_mode = ($v) ? true : false;
break;
case 'encode_german_sz':
if (!$v) {
$this->NP['replacemaps'][0xDF] = array(0x73, 0x73);
case 'idn_version':
if (in_array($v, array('2003', '2008'))) {
$this->_idn_version = $v;
} else {
unset($this->NP['replacemaps'][0xDF]);
$this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
}
break;
case 'encode_german_sz': // Deprecated
if (!$v) {
self::$NP['replacemaps'][0xDF] = array(0x73, 0x73);
} else {
unset(self::$NP['replacemaps'][0xDF]);
}
break;
default:
@@ -398,13 +418,13 @@ class idna_convert
}
// Find last occurence of the delimiter
$delim_pos = strrpos($encoded, '-');
if ($delim_pos > strlen($this->_punycode_prefix)) {
for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
$decoded[] = ord($encoded{$k});
}
}
$deco_len = count($decoded);
$enco_len = strlen($encoded);
$enco_len = self::byteLength($encoded);
// Wandering through the strings; init
$is_first = true;
@@ -442,7 +462,7 @@ class idna_convert
protected function _encode($decoded)
{
// We cannot encode a domain name containing the Punycode prefix
$extract = strlen($this->_punycode_prefix);
$extract = self::byteLength($this->_punycode_prefix);
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
$check_deco = array_slice($decoded, 0, $extract);
@@ -589,24 +609,28 @@ class idna_convert
// While mapping required chars we apply the cannonical ordering
foreach ($input as $v) {
// Map to nothing == skip that code point
if (in_array($v, $this->NP['map_nothing'])) continue;
if (in_array($v, self::$NP['map_nothing'])) continue;
// Try to find prohibited input
if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) {
if (in_array($v, self::$NP['prohibit']) || in_array($v, self::$NP['general_prohibited'])) {
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
return false;
}
foreach ($this->NP['prohibit_ranges'] as $range) {
foreach (self::$NP['prohibit_ranges'] as $range) {
if ($range[0] <= $v && $v <= $range[1]) {
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
return false;
}
}
// Hangul syllable decomposition
if (0xAC00 <= $v && $v <= 0xD7AF) {
foreach ($this->_hangul_decompose($v) as $out) $output[] = (int) $out;
// There's a decomposition mapping for that code point
} elseif (isset($this->NP['replacemaps'][$v])) {
foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) {
// Hangul syllable decomposition
foreach ($this->_hangul_decompose($v) as $out) {
$output[] = (int) $out;
}
} elseif (($this->_idn_version == '2003') && isset(self::$NP['replacemaps'][$v])) {
// There's a decomposition mapping for that code point
// Decompositions only in version 2003 (original) of IDNA
foreach ($this->_apply_cannonical_ordering(self::$NP['replacemaps'][$v]) as $out) {
$output[] = (int) $out;
}
} else {
@@ -715,11 +739,11 @@ class idna_convert
*/
protected function _get_combining_class($char)
{
return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0;
return isset(self::$NP['norm_combcls'][$char]) ? self::$NP['norm_combcls'][$char] : 0;
}
/**
* Apllies the cannonical ordering of a decomposed UCS4 sequence
* Applies the cannonical ordering of a decomposed UCS4 sequence
* @param array Decomposed UCS4 sequence
* @return array Ordered USC4 sequence
*/
@@ -758,7 +782,7 @@ class idna_convert
protected function _combine($input)
{
$inp_len = count($input);
foreach ($this->NP['replacemaps'] as $np_src => $np_target) {
foreach (self::$NP['replacemaps'] as $np_src => $np_target) {
if ($np_target[0] != $input[0]) continue;
if (count($np_target) != $inp_len) continue;
$hit = false;
@@ -797,12 +821,7 @@ class idna_convert
{
$output = array();
$out_len = 0;
// Patch by Daniel Hahler; work around prolbem with mbstring.func_overload
if (function_exists('mb_strlen')) {
$inp_len = mb_strlen($input, '8bit');
} else {
$inp_len = strlen($input);
}
$inp_len = self::byteLength($input);
$mode = 'next';
$test = 'none';
for ($k = 0; $k < $inp_len; ++$k) {
@@ -923,7 +942,7 @@ class idna_convert
protected function _ucs4_string_to_ucs4($input)
{
$output = array();
$inp_len = strlen($input);
$inp_len = self::byteLength($input);
// Input length must be dividable by 4
if ($inp_len % 4) {
$this->_error('Input UCS4 string is broken');
@@ -942,6 +961,56 @@ class idna_convert
return $output;
}
/**
* Gets the length of a string in bytes even if mbstring function
* overloading is turned on
*
* @param string $string the string for which to get the length.
* @return integer the length of the string in bytes.
*/
protected static function byteLength($string)
{
if (self::$_mb_string_overload) {
return mb_strlen($string, '8bit');
}
return strlen((binary) $string);
}
/**
* Attempts to return a concrete IDNA instance.
*
* @param array $params Set of paramaters
* @return idna_convert
* @access public
*/
public function getInstance($params = array())
{
return new idna_convert($params);
}
/**
* Attempts to return a concrete IDNA instance for either php4 or php5,
* only creating a new instance if no IDNA instance with the same
* parameters currently exists.
*
* @param array $params Set of paramaters
*
* @return object idna_convert
* @access public
*/
public function singleton($params = array())
{
static $instances;
if (!isset($instances)) {
$instances = array();
}
$signature = serialize($params);
if (!isset($instances[$signature])) {
$instances[$signature] = idna_convert::getInstance($params);
}
return $instances[$signature];
}
/**
* Holds all relevant mapping tables
* See RFC3454 for details
@@ -949,7 +1018,7 @@ class idna_convert
* @private array
* @since 0.5.2
*/
protected $NP = array
protected static $NP = array
('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C
,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07
,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF
@@ -984,7 +1053,7 @@ class idna_convert
,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3)
,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8)
,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC)
,0xDD => array(0xFD), 0xDE => array(0xFE) /* Here was German "ß" -> "ss", is now configurable */
,0xDD => array(0xFD), 0xDE => array(0xFE), 0xDF => array(0x73, 0x73)
,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105)
,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B)
,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111)