Files
Pichome/dzz/class/class_encode.php

119 lines
3.2 KiB
PHP
Raw Permalink Normal View History

2022-01-29 10:44:42 +08:00
<?php
/*
* @copyright QiaoQiaoShiDai Internet Technology(Shanghai)Co.,Ltd
* @license https://www.oaooa.com/licenses/
*
* @link https://www.oaooa.com
* @author zyx(zyx@oaooa.com)
*/
define ('UTF32_BIG_ENDIAN_BOM' , chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF));
define ('UTF32_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00));
define ('UTF16_BIG_ENDIAN_BOM' , chr(0xFE) . chr(0xFF));
define ('UTF16_LITTLE_ENDIAN_BOM', chr(0xFF) . chr(0xFE));
define ('UTF8_BOM' , chr(0xEF) . chr(0xBB) . chr(0xBF));
class Encode_Core {
/**
* 文件分析方法来检查UNICODE文件ANSI文件没有文件头此处不分析
*/
private function detect_utf_encoding($text) {
$first2 = substr($text, 0, 2);
$first3 = substr($text, 0, 3);
$first4 = substr($text, 0, 3);
if ($first3 == UTF8_BOM) return 'UTF-8';
elseif ($first4 == UTF32_BIG_ENDIAN_BOM) return 'UTF-32BE';
elseif ($first4 == UTF32_LITTLE_ENDIAN_BOM) return 'UTF-32LE';
elseif ($first2 == UTF16_BIG_ENDIAN_BOM) return 'UTF-16BE';
elseif ($first2 == UTF16_LITTLE_ENDIAN_BOM) return 'UTF-16LE';
return '';
}
/**
* 检测是否GB2312编码
* @param string $str
* @since 2012-03-20
* @return boolean
*/
private function is_gb2312($str) {
for($i=0; $i<strlen($str); $i++) {
$v = ord( $str[$i] );
if( $v > 127) {
if( ($v >= 228) && ($v <= 233) ){
if( ($i+2) >= (strlen($str) - 1)) return true; // not enough characters
$v1 = ord( $str[$i+1] );
$v2 = ord( $str[$i+2] );
if( ($v1 >= 128) && ($v1 <=191) && ($v2 >=128) && ($v2 <= 191) )
return false;
else
return true; //GB编码
}
}
}
}
private function is_GBK($str){
$s1 = iconv('gbk','utf-8',$str);
$s0 = iconv('utf-8','gbk',$s1);
if($s0 == $str){
return true;
}else{
return false;
}
}
/**
* 取得编码
* @param string $str
* @return string $encoding
*/
public static function get_encoding($str){
$ary = array();
//$ary[] = "ASCII";
$ary[] = "UTF-8";
$ary[] = "GB18030";//简体码
$ary[] = "BIG-5";//繁体码
$ary[] = "EUC-CN";
$ary[] = "JIS";//日文编码
$ary[] = "EUC-JP";//日文编码
$encoding= self::detect_utf_encoding($str);
//if(empty($encoding) && self::is_GBK($str)) return 'GBK';
if(empty($encoding)){
$encoding=mb_detect_encoding($str,$ary);
}
if($encoding=='ASCII') $encoding='UTF-8';
return $encoding;
}
public function utf16_to_utf8($str) {
$len = strlen($str);
$dec = '';
for ($i = 0; $i < $len; $i += 2) {
$c = ($be) ? ord($str[$i]) << 8 | ord($str[$i + 1]) :
ord($str[$i + 1]) << 8 | ord($str[$i]);
if ($c >= 0x0001 && $c <= 0x007F) {
$dec .= chr($c);
} else if ($c > 0x07FF) {
$dec .= chr(0xE0 | (($c >> 12) & 0x0F));
$dec .= chr(0x80 | (($c >> 6) & 0x3F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
} else {
$dec .= chr(0xC0 | (($c >> 6) & 0x1F));
$dec .= chr(0x80 | (($c >> 0) & 0x3F));
}
}
return $dec;
}
}
?>