update IDNA-class to latest version 0.8.0
This commit is contained in:
@@ -23,6 +23,8 @@
|
|||||||
|
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
|
// Source for updates: http://phlymail.com/en/downloads/idna-convert.html
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encode/decode Internationalized Domain Names.
|
* Encode/decode Internationalized Domain Names.
|
||||||
*
|
*
|
||||||
@@ -47,8 +49,8 @@
|
|||||||
* ACE input and output is always expected to be ASCII.
|
* ACE input and output is always expected to be ASCII.
|
||||||
*
|
*
|
||||||
* @author Matthias Sommerfeld <mso@phlylabs.de>
|
* @author Matthias Sommerfeld <mso@phlylabs.de>
|
||||||
* @copyright 2004-2010 phlyLabs Berlin, http://phlylabs.de
|
* @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de
|
||||||
* @version 0.7.0 2010-11-20
|
* @version 0.8.0 2011-03-11
|
||||||
*/
|
*/
|
||||||
class idna_convert
|
class idna_convert
|
||||||
{
|
{
|
||||||
@@ -76,11 +78,14 @@ class idna_convert
|
|||||||
protected $_scount = 11172; // _lcount * _tcount * _vcount
|
protected $_scount = 11172; // _lcount * _tcount * _vcount
|
||||||
protected $_error = false;
|
protected $_error = false;
|
||||||
|
|
||||||
|
protected static $_mb_string_overload = null;
|
||||||
|
|
||||||
// See {@link set_paramter()} for details of how to change the following
|
// See {@link set_paramter()} for details of how to change the following
|
||||||
// settings from within your script / application
|
// settings from within your script / application
|
||||||
protected $_api_encoding = 'utf8'; // Default input charset is UTF-8
|
protected $_api_encoding = 'utf8'; // Default input charset is UTF-8
|
||||||
protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
|
protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
|
||||||
protected $_strict_mode = false; // Behave strict or not
|
protected $_strict_mode = false; // Behave strict or not
|
||||||
|
protected $_idn_version = 2003; // Can be either 2003 (old, default) or 2008
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the constructor
|
* the constructor
|
||||||
@@ -93,7 +98,15 @@ class idna_convert
|
|||||||
{
|
{
|
||||||
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
|
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
|
||||||
// If parameters are given, pass these to the respective method
|
// If parameters are given, pass these to the respective method
|
||||||
if (is_array($options)) return $this->set_parameter($options);
|
if (is_array($options)) {
|
||||||
|
$this->set_parameter($options);
|
||||||
|
}
|
||||||
|
|
||||||
|
// populate mbstring overloading cache if not set
|
||||||
|
if (self::$_mb_string_overload === null) {
|
||||||
|
self::$_mb_string_overload = (extension_loaded('mbstring')
|
||||||
|
&& (ini_get('mbstring.func_overload') & 0x02) === 0x02);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -136,11 +149,18 @@ class idna_convert
|
|||||||
case 'strict':
|
case 'strict':
|
||||||
$this->_strict_mode = ($v) ? true : false;
|
$this->_strict_mode = ($v) ? true : false;
|
||||||
break;
|
break;
|
||||||
case 'encode_german_sz':
|
case 'idn_version':
|
||||||
if (!$v) {
|
if (in_array($v, array('2003', '2008'))) {
|
||||||
$this->NP['replacemaps'][0xDF] = array(0x73, 0x73);
|
$this->_idn_version = $v;
|
||||||
} else {
|
} else {
|
||||||
unset($this->NP['replacemaps'][0xDF]);
|
$this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'encode_german_sz': // Deprecated
|
||||||
|
if (!$v) {
|
||||||
|
self::$NP['replacemaps'][0xDF] = array(0x73, 0x73);
|
||||||
|
} else {
|
||||||
|
unset(self::$NP['replacemaps'][0xDF]);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -398,13 +418,13 @@ class idna_convert
|
|||||||
}
|
}
|
||||||
// Find last occurence of the delimiter
|
// Find last occurence of the delimiter
|
||||||
$delim_pos = strrpos($encoded, '-');
|
$delim_pos = strrpos($encoded, '-');
|
||||||
if ($delim_pos > strlen($this->_punycode_prefix)) {
|
if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
|
||||||
for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
|
for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
|
||||||
$decoded[] = ord($encoded{$k});
|
$decoded[] = ord($encoded{$k});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$deco_len = count($decoded);
|
$deco_len = count($decoded);
|
||||||
$enco_len = strlen($encoded);
|
$enco_len = self::byteLength($encoded);
|
||||||
|
|
||||||
// Wandering through the strings; init
|
// Wandering through the strings; init
|
||||||
$is_first = true;
|
$is_first = true;
|
||||||
@@ -442,7 +462,7 @@ class idna_convert
|
|||||||
protected function _encode($decoded)
|
protected function _encode($decoded)
|
||||||
{
|
{
|
||||||
// We cannot encode a domain name containing the Punycode prefix
|
// We cannot encode a domain name containing the Punycode prefix
|
||||||
$extract = strlen($this->_punycode_prefix);
|
$extract = self::byteLength($this->_punycode_prefix);
|
||||||
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
|
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
|
||||||
$check_deco = array_slice($decoded, 0, $extract);
|
$check_deco = array_slice($decoded, 0, $extract);
|
||||||
|
|
||||||
@@ -589,24 +609,28 @@ class idna_convert
|
|||||||
// While mapping required chars we apply the cannonical ordering
|
// While mapping required chars we apply the cannonical ordering
|
||||||
foreach ($input as $v) {
|
foreach ($input as $v) {
|
||||||
// Map to nothing == skip that code point
|
// Map to nothing == skip that code point
|
||||||
if (in_array($v, $this->NP['map_nothing'])) continue;
|
if (in_array($v, self::$NP['map_nothing'])) continue;
|
||||||
// Try to find prohibited input
|
// Try to find prohibited input
|
||||||
if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) {
|
if (in_array($v, self::$NP['prohibit']) || in_array($v, self::$NP['general_prohibited'])) {
|
||||||
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
|
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
foreach ($this->NP['prohibit_ranges'] as $range) {
|
foreach (self::$NP['prohibit_ranges'] as $range) {
|
||||||
if ($range[0] <= $v && $v <= $range[1]) {
|
if ($range[0] <= $v && $v <= $range[1]) {
|
||||||
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
|
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Hangul syllable decomposition
|
|
||||||
if (0xAC00 <= $v && $v <= 0xD7AF) {
|
if (0xAC00 <= $v && $v <= 0xD7AF) {
|
||||||
foreach ($this->_hangul_decompose($v) as $out) $output[] = (int) $out;
|
// Hangul syllable decomposition
|
||||||
// There's a decomposition mapping for that code point
|
foreach ($this->_hangul_decompose($v) as $out) {
|
||||||
} elseif (isset($this->NP['replacemaps'][$v])) {
|
$output[] = (int) $out;
|
||||||
foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) {
|
}
|
||||||
|
} elseif (($this->_idn_version == '2003') && isset(self::$NP['replacemaps'][$v])) {
|
||||||
|
// There's a decomposition mapping for that code point
|
||||||
|
// Decompositions only in version 2003 (original) of IDNA
|
||||||
|
foreach ($this->_apply_cannonical_ordering(self::$NP['replacemaps'][$v]) as $out) {
|
||||||
$output[] = (int) $out;
|
$output[] = (int) $out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -715,11 +739,11 @@ class idna_convert
|
|||||||
*/
|
*/
|
||||||
protected function _get_combining_class($char)
|
protected function _get_combining_class($char)
|
||||||
{
|
{
|
||||||
return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0;
|
return isset(self::$NP['norm_combcls'][$char]) ? self::$NP['norm_combcls'][$char] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Apllies the cannonical ordering of a decomposed UCS4 sequence
|
* Applies the cannonical ordering of a decomposed UCS4 sequence
|
||||||
* @param array Decomposed UCS4 sequence
|
* @param array Decomposed UCS4 sequence
|
||||||
* @return array Ordered USC4 sequence
|
* @return array Ordered USC4 sequence
|
||||||
*/
|
*/
|
||||||
@@ -758,7 +782,7 @@ class idna_convert
|
|||||||
protected function _combine($input)
|
protected function _combine($input)
|
||||||
{
|
{
|
||||||
$inp_len = count($input);
|
$inp_len = count($input);
|
||||||
foreach ($this->NP['replacemaps'] as $np_src => $np_target) {
|
foreach (self::$NP['replacemaps'] as $np_src => $np_target) {
|
||||||
if ($np_target[0] != $input[0]) continue;
|
if ($np_target[0] != $input[0]) continue;
|
||||||
if (count($np_target) != $inp_len) continue;
|
if (count($np_target) != $inp_len) continue;
|
||||||
$hit = false;
|
$hit = false;
|
||||||
@@ -797,12 +821,7 @@ class idna_convert
|
|||||||
{
|
{
|
||||||
$output = array();
|
$output = array();
|
||||||
$out_len = 0;
|
$out_len = 0;
|
||||||
// Patch by Daniel Hahler; work around prolbem with mbstring.func_overload
|
$inp_len = self::byteLength($input);
|
||||||
if (function_exists('mb_strlen')) {
|
|
||||||
$inp_len = mb_strlen($input, '8bit');
|
|
||||||
} else {
|
|
||||||
$inp_len = strlen($input);
|
|
||||||
}
|
|
||||||
$mode = 'next';
|
$mode = 'next';
|
||||||
$test = 'none';
|
$test = 'none';
|
||||||
for ($k = 0; $k < $inp_len; ++$k) {
|
for ($k = 0; $k < $inp_len; ++$k) {
|
||||||
@@ -923,7 +942,7 @@ class idna_convert
|
|||||||
protected function _ucs4_string_to_ucs4($input)
|
protected function _ucs4_string_to_ucs4($input)
|
||||||
{
|
{
|
||||||
$output = array();
|
$output = array();
|
||||||
$inp_len = strlen($input);
|
$inp_len = self::byteLength($input);
|
||||||
// Input length must be dividable by 4
|
// Input length must be dividable by 4
|
||||||
if ($inp_len % 4) {
|
if ($inp_len % 4) {
|
||||||
$this->_error('Input UCS4 string is broken');
|
$this->_error('Input UCS4 string is broken');
|
||||||
@@ -942,6 +961,56 @@ class idna_convert
|
|||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the length of a string in bytes even if mbstring function
|
||||||
|
* overloading is turned on
|
||||||
|
*
|
||||||
|
* @param string $string the string for which to get the length.
|
||||||
|
* @return integer the length of the string in bytes.
|
||||||
|
*/
|
||||||
|
protected static function byteLength($string)
|
||||||
|
{
|
||||||
|
if (self::$_mb_string_overload) {
|
||||||
|
return mb_strlen($string, '8bit');
|
||||||
|
}
|
||||||
|
return strlen((binary) $string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to return a concrete IDNA instance.
|
||||||
|
*
|
||||||
|
* @param array $params Set of paramaters
|
||||||
|
* @return idna_convert
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public function getInstance($params = array())
|
||||||
|
{
|
||||||
|
return new idna_convert($params);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to return a concrete IDNA instance for either php4 or php5,
|
||||||
|
* only creating a new instance if no IDNA instance with the same
|
||||||
|
* parameters currently exists.
|
||||||
|
*
|
||||||
|
* @param array $params Set of paramaters
|
||||||
|
*
|
||||||
|
* @return object idna_convert
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public function singleton($params = array())
|
||||||
|
{
|
||||||
|
static $instances;
|
||||||
|
if (!isset($instances)) {
|
||||||
|
$instances = array();
|
||||||
|
}
|
||||||
|
$signature = serialize($params);
|
||||||
|
if (!isset($instances[$signature])) {
|
||||||
|
$instances[$signature] = idna_convert::getInstance($params);
|
||||||
|
}
|
||||||
|
return $instances[$signature];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds all relevant mapping tables
|
* Holds all relevant mapping tables
|
||||||
* See RFC3454 for details
|
* See RFC3454 for details
|
||||||
@@ -949,7 +1018,7 @@ class idna_convert
|
|||||||
* @private array
|
* @private array
|
||||||
* @since 0.5.2
|
* @since 0.5.2
|
||||||
*/
|
*/
|
||||||
protected $NP = array
|
protected static $NP = array
|
||||||
('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C
|
('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C
|
||||||
,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07
|
,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07
|
||||||
,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF
|
,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF
|
||||||
@@ -984,7 +1053,7 @@ class idna_convert
|
|||||||
,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3)
|
,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3)
|
||||||
,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8)
|
,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8)
|
||||||
,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC)
|
,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC)
|
||||||
,0xDD => array(0xFD), 0xDE => array(0xFE) /* Here was German "ß" -> "ss", is now configurable */
|
,0xDD => array(0xFD), 0xDE => array(0xFE), 0xDF => array(0x73, 0x73)
|
||||||
,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105)
|
,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105)
|
||||||
,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B)
|
,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B)
|
||||||
,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111)
|
,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111)
|
||||||
|
|||||||
Reference in New Issue
Block a user