update IDNA-class to latest version 0.8.0

This commit is contained in:
Michael Kaufmann (d00p)
2013-06-06 15:07:43 +02:00
parent 21aabb154d
commit 6bde0820c6

View File

@@ -23,6 +23,8 @@
// }}} // }}}
// Source for updates: http://phlymail.com/en/downloads/idna-convert.html
/** /**
* Encode/decode Internationalized Domain Names. * Encode/decode Internationalized Domain Names.
* *
@@ -47,8 +49,8 @@
* ACE input and output is always expected to be ASCII. * ACE input and output is always expected to be ASCII.
* *
* @author Matthias Sommerfeld <mso@phlylabs.de> * @author Matthias Sommerfeld <mso@phlylabs.de>
* @copyright 2004-2010 phlyLabs Berlin, http://phlylabs.de * @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de
* @version 0.7.0 2010-11-20 * @version 0.8.0 2011-03-11
*/ */
class idna_convert class idna_convert
{ {
@@ -76,11 +78,14 @@ class idna_convert
protected $_scount = 11172; // _lcount * _tcount * _vcount protected $_scount = 11172; // _lcount * _tcount * _vcount
protected $_error = false; protected $_error = false;
protected static $_mb_string_overload = null;
// See {@link set_paramter()} for details of how to change the following // See {@link set_paramter()} for details of how to change the following
// settings from within your script / application // settings from within your script / application
protected $_api_encoding = 'utf8'; // Default input charset is UTF-8 protected $_api_encoding = 'utf8'; // Default input charset is UTF-8
protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
protected $_strict_mode = false; // Behave strict or not protected $_strict_mode = false; // Behave strict or not
protected $_idn_version = 2003; // Can be either 2003 (old, default) or 2008
/** /**
* the constructor * the constructor
@@ -93,7 +98,15 @@ class idna_convert
{ {
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount; $this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
// If parameters are given, pass these to the respective method // If parameters are given, pass these to the respective method
if (is_array($options)) return $this->set_parameter($options); if (is_array($options)) {
$this->set_parameter($options);
}
// populate mbstring overloading cache if not set
if (self::$_mb_string_overload === null) {
self::$_mb_string_overload = (extension_loaded('mbstring')
&& (ini_get('mbstring.func_overload') & 0x02) === 0x02);
}
} }
/** /**
@@ -136,11 +149,18 @@ class idna_convert
case 'strict': case 'strict':
$this->_strict_mode = ($v) ? true : false; $this->_strict_mode = ($v) ? true : false;
break; break;
case 'encode_german_sz': case 'idn_version':
if (!$v) { if (in_array($v, array('2003', '2008'))) {
$this->NP['replacemaps'][0xDF] = array(0x73, 0x73); $this->_idn_version = $v;
} else { } else {
unset($this->NP['replacemaps'][0xDF]); $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
}
break;
case 'encode_german_sz': // Deprecated
if (!$v) {
self::$NP['replacemaps'][0xDF] = array(0x73, 0x73);
} else {
unset(self::$NP['replacemaps'][0xDF]);
} }
break; break;
default: default:
@@ -398,13 +418,13 @@ class idna_convert
} }
// Find last occurence of the delimiter // Find last occurence of the delimiter
$delim_pos = strrpos($encoded, '-'); $delim_pos = strrpos($encoded, '-');
if ($delim_pos > strlen($this->_punycode_prefix)) { if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) { for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
$decoded[] = ord($encoded{$k}); $decoded[] = ord($encoded{$k});
} }
} }
$deco_len = count($decoded); $deco_len = count($decoded);
$enco_len = strlen($encoded); $enco_len = self::byteLength($encoded);
// Wandering through the strings; init // Wandering through the strings; init
$is_first = true; $is_first = true;
@@ -442,7 +462,7 @@ class idna_convert
protected function _encode($decoded) protected function _encode($decoded)
{ {
// We cannot encode a domain name containing the Punycode prefix // We cannot encode a domain name containing the Punycode prefix
$extract = strlen($this->_punycode_prefix); $extract = self::byteLength($this->_punycode_prefix);
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix); $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
$check_deco = array_slice($decoded, 0, $extract); $check_deco = array_slice($decoded, 0, $extract);
@@ -589,24 +609,28 @@ class idna_convert
// While mapping required chars we apply the cannonical ordering // While mapping required chars we apply the cannonical ordering
foreach ($input as $v) { foreach ($input as $v) {
// Map to nothing == skip that code point // Map to nothing == skip that code point
if (in_array($v, $this->NP['map_nothing'])) continue; if (in_array($v, self::$NP['map_nothing'])) continue;
// Try to find prohibited input // Try to find prohibited input
if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) { if (in_array($v, self::$NP['prohibit']) || in_array($v, self::$NP['general_prohibited'])) {
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v)); $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
return false; return false;
} }
foreach ($this->NP['prohibit_ranges'] as $range) { foreach (self::$NP['prohibit_ranges'] as $range) {
if ($range[0] <= $v && $v <= $range[1]) { if ($range[0] <= $v && $v <= $range[1]) {
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v)); $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
return false; return false;
} }
} }
// Hangul syllable decomposition
if (0xAC00 <= $v && $v <= 0xD7AF) { if (0xAC00 <= $v && $v <= 0xD7AF) {
foreach ($this->_hangul_decompose($v) as $out) $output[] = (int) $out; // Hangul syllable decomposition
foreach ($this->_hangul_decompose($v) as $out) {
$output[] = (int) $out;
}
} elseif (($this->_idn_version == '2003') && isset(self::$NP['replacemaps'][$v])) {
// There's a decomposition mapping for that code point // There's a decomposition mapping for that code point
} elseif (isset($this->NP['replacemaps'][$v])) { // Decompositions only in version 2003 (original) of IDNA
foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) { foreach ($this->_apply_cannonical_ordering(self::$NP['replacemaps'][$v]) as $out) {
$output[] = (int) $out; $output[] = (int) $out;
} }
} else { } else {
@@ -715,11 +739,11 @@ class idna_convert
*/ */
protected function _get_combining_class($char) protected function _get_combining_class($char)
{ {
return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0; return isset(self::$NP['norm_combcls'][$char]) ? self::$NP['norm_combcls'][$char] : 0;
} }
/** /**
* Apllies the cannonical ordering of a decomposed UCS4 sequence * Applies the cannonical ordering of a decomposed UCS4 sequence
* @param array Decomposed UCS4 sequence * @param array Decomposed UCS4 sequence
* @return array Ordered USC4 sequence * @return array Ordered USC4 sequence
*/ */
@@ -758,7 +782,7 @@ class idna_convert
protected function _combine($input) protected function _combine($input)
{ {
$inp_len = count($input); $inp_len = count($input);
foreach ($this->NP['replacemaps'] as $np_src => $np_target) { foreach (self::$NP['replacemaps'] as $np_src => $np_target) {
if ($np_target[0] != $input[0]) continue; if ($np_target[0] != $input[0]) continue;
if (count($np_target) != $inp_len) continue; if (count($np_target) != $inp_len) continue;
$hit = false; $hit = false;
@@ -797,12 +821,7 @@ class idna_convert
{ {
$output = array(); $output = array();
$out_len = 0; $out_len = 0;
// Patch by Daniel Hahler; work around prolbem with mbstring.func_overload $inp_len = self::byteLength($input);
if (function_exists('mb_strlen')) {
$inp_len = mb_strlen($input, '8bit');
} else {
$inp_len = strlen($input);
}
$mode = 'next'; $mode = 'next';
$test = 'none'; $test = 'none';
for ($k = 0; $k < $inp_len; ++$k) { for ($k = 0; $k < $inp_len; ++$k) {
@@ -923,7 +942,7 @@ class idna_convert
protected function _ucs4_string_to_ucs4($input) protected function _ucs4_string_to_ucs4($input)
{ {
$output = array(); $output = array();
$inp_len = strlen($input); $inp_len = self::byteLength($input);
// Input length must be dividable by 4 // Input length must be dividable by 4
if ($inp_len % 4) { if ($inp_len % 4) {
$this->_error('Input UCS4 string is broken'); $this->_error('Input UCS4 string is broken');
@@ -942,6 +961,56 @@ class idna_convert
return $output; return $output;
} }
/**
* Gets the length of a string in bytes even if mbstring function
* overloading is turned on
*
* @param string $string the string for which to get the length.
* @return integer the length of the string in bytes.
*/
protected static function byteLength($string)
{
if (self::$_mb_string_overload) {
return mb_strlen($string, '8bit');
}
return strlen((binary) $string);
}
/**
* Attempts to return a concrete IDNA instance.
*
* @param array $params Set of paramaters
* @return idna_convert
* @access public
*/
public function getInstance($params = array())
{
return new idna_convert($params);
}
/**
* Attempts to return a concrete IDNA instance for either php4 or php5,
* only creating a new instance if no IDNA instance with the same
* parameters currently exists.
*
* @param array $params Set of paramaters
*
* @return object idna_convert
* @access public
*/
public function singleton($params = array())
{
static $instances;
if (!isset($instances)) {
$instances = array();
}
$signature = serialize($params);
if (!isset($instances[$signature])) {
$instances[$signature] = idna_convert::getInstance($params);
}
return $instances[$signature];
}
/** /**
* Holds all relevant mapping tables * Holds all relevant mapping tables
* See RFC3454 for details * See RFC3454 for details
@@ -949,7 +1018,7 @@ class idna_convert
* @private array * @private array
* @since 0.5.2 * @since 0.5.2
*/ */
protected $NP = array protected static $NP = array
('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C ('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C
,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07 ,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07
,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF ,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF
@@ -984,7 +1053,7 @@ class idna_convert
,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3) ,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3)
,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8) ,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8)
,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC) ,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC)
,0xDD => array(0xFD), 0xDE => array(0xFE) /* Here was German "ß" -> "ss", is now configurable */ ,0xDD => array(0xFD), 0xDE => array(0xFE), 0xDF => array(0x73, 0x73)
,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105) ,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105)
,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B) ,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B)
,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111) ,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111)