<?php
# converts a string to NFC or NFD and returns the result
# key routines are nfc and nfd
# Copyright (C) 2009  Richard Ishida ishida@w3.org
# Licence http://creativecommons.org/licenses/by-nc-sa/3.0/
# (If you use it, I'd be happy if you let me know.)
mb_internal_encoding("UTF-8");

function char2dec ($str) {
	$convmap = array(0x80, 0xffff, 0, 0xffff);
	$str = mb_encode_numericentity($str, $convmap, "UTF-8");
	$str = preg_replace('/[\&\#\;]/','',$str);
	return $str;
	}

function int2char ($int) { 
	$convmap = array(0x80, 0xffff, 0, 0xffff);
	$int = mb_decode_numericentity('&#'.$int.';', $convmap, "UTF-8");
	return $int;
	}

	
function decomposeHangul ($ch) {
	$chIndex = char2dec($ch);
	$sIndex = $chIndex-0xAC00;
	if ($sIndex<0 || $sIndex>=11172) {
		return $ch;
		}
	$result = '';
	$l = 0x1100+floor($sIndex/588);
	$v = 0x1161+floor(($sIndex % 588)/28);
	$t = 0x11a7+floor($sIndex % 28);
	
	$result .= int2char($l).int2char($v);
	
	if ($t != 0x11A7) { $result .= int2char($t); }
	return $result;
	}


function nfd ($str) { 
	$str = decompose($str); 
	$str = reorder($str); 
	return $str;
	}

function toArray ($string) {
    $strlen = mb_strlen($string);
    while ($strlen) {
		$ch = mb_substr($string,0,1,"UTF-8");
		if ($ch >= '' && $ch <= '') { $width = 2; }
		else { 	$width = 1; }

        $array[] = mb_substr($string,0,$width,"UTF-8");
        $string = mb_substr($string,$width,$strlen,"UTF-8");
        $strlen = mb_strlen($string);
		}
    return $array;
	}	
	

function decompose ($string) { 
	GLOBAL $decomposable;		
	$str = toArray($string); 
	$decomposed = ''; 
	for ($i=0; $i<count($str); $i++) { 
		$current = $str[$i]; 
		if (isset($decomposable[$current])) { 
			$decomposed .= decompose($decomposable[$current]);
			}
		else if ($current >= '가' && $current <= '힣') { // hangul syllable
			$decomposed .= decomposeHangul($current);
			}
		else {
			$decomposed .= $current;
			}
		}
	return $decomposed;
	}


function isort ($array) {
	GLOBAL $nonzerocc; 
	for ($i=1;$i<count($array);$i++) {
		$testvalue = $nonzerocc[$array[$i]];
        $value = $array[$i];
        $j = $i-1;
        while ($j >= 0 and $nonzerocc[$array[$j]] > $testvalue) {
            $array[$j + 1] = $array[$j];
            $j = $j-1;
			}
        $array[$j+1] = $value;
		}
	return $array;
	}



function reorder ($string) { 
	GLOBAL $nonzerocc; 
	$string .= 'X';
	$str = toArray($string);
	$reordered = ''; 
	$i = 0;
	while ($i < count($str)-1) { // go through each character
		if (isset($nonzerocc[$str[$i]]) && isset($nonzerocc[$str[$i+1]])) { // if more than one cc...
			$j = 0; $temp = array(); 
			while ( isset($nonzerocc[$str[$i+$j]]) ) {
				$temp[$j] = $str[$i+$j]; 
				// temp is an array where the characters in str are keys and the comb class is the value
				$j++;
				}
			$newtemp = isort($temp);
			foreach ($newtemp as $ch) {
				$reordered .= $ch;
				}
			$i += count($temp);
			}
		else {
			$reordered .= $str[$i++];  
			}
		}
	return $reordered;
	}



function nfc ($string) { 
	GLOBAL $decomposable;
	GLOBAL $composable;
	GLOBAL $nonzerocc;
	GLOBAL $nfcexclusions;
	$composed = '';	
	$string .= 'X';
	// replace non-starter decompositions
	$string = str_replace('́', '́', $string);
	$string = str_replace('̀', '̀', $string);
	$string = str_replace('̓', '̓', $string);
	$string = str_replace('̈́', '̈́', $string);
	$string = str_replace('ཱི', 'ཱི', $string);
	$i=-1; 
	$str = toArray($string);
	//str = str.replace('\u0F73', '\u0F71\u0F72')
	//str = str.replace('\u0F75', '\u0F71\u0F74')
	//str = str.replace('\u0F81', '\u0F71\u0F80')
	while (++$i<count($str)-1) {
		$current = $str[$i];
		$next = $str[$i+1];
		if (isset($decomposable[$current])) { // decomposable char 
			if ((! isset($nfcexclusions[$current])) && (! isset($nonzerocc[$next]))) { // current char is not in exclusions & next of cclass 0
				$composed .= $current;
				} 
			else { // in exclusions or next not in cclass 0
				$temp = decompose($current);
				while (isset($nonzerocc[$str[++$i]])) { $temp.=$str[$i]; } // find combining sequence
				$temp = reorder($temp);
				$composed .= compose($temp);
				$i--;
				}
			}
		else if ($current>='ᄀ' && $current<='ᇹ') { // jamo characters
			$temp = $current;
			while ($str[++$i]>='ᄀ' && $str[$i]<='ᇹ') { $temp.=$str[$i]; } // gather jamos
			$composed .= composeHangul($temp);
			$i--;
			}
		else { // not a composite character... 
			if (isset($nonzerocc[$next])) { // but followed by combining char(s)
				$temp = $current;
				while (isset($nonzerocc[$str[++$i]])) { $temp.=$str[$i]; } // find combining sequence
				$temp = reorder($temp);
				$composed .= compose($temp);
				$i--;
				}
			else { 
				if (! isset($composable[$current.$next])) {
					$composed .= $current; 
					} 
				else {
					$base=$i; 
					while (isset($composable[$str[$base].$str[++$i]])) { 
						$str[$base] = $composable[$str[$base].$str[$i]]; 
						}
					$composed .= $str[$base]; $i--;
					}
				}
			}
		}
	return $composed;
	}


function compose ($str) { 
	// takes a base character followed by combining characters in the right order and produces nfc
	GLOBAL $nonzerocc;
	GLOBAL $composable;
	$strlength = mb_strlen($str,"UTF-8");
	$str = $str.'X';
	$lastcclass = -1;
	$base = mb_substr($str,0,1,"UTF-8");
	$store = '';
	$next = '';
	$ptr = 1; 
	while ($ptr < $strlength) {
		$next = mb_substr($str,$ptr,1,"UTF-8");
		if (isset($composable[$base.$next]) && ((! isset($nonzerocc[$next])) || $nonzerocc[$next] != $lastcclass)) { 
			$base = $composable[$base.$next];
			$ptr++;
			}
		else {
			$store .= $next;
			if (isset($nonzerocc[$next])) { $lastcclass = $nonzerocc[$next]; }
			$ptr++;
			}
		}
	return $base.$store;
	}


function composeHangul ($str) {
	$strlength = mb_strlen($str);
	if ($strlength == 0) { return; }
	$last = char2dec(mb_substr($str,0,1));
	$result=array();
	$result[0] = mb_substr($str,0,1);

	for ($i=1; $i<$strlength; ++$i) {
		$ch = char2dec(mb_substr($str,$i,1));
		
		$lIndex = $last-0x1100;
		if (0<=$lIndex && $lIndex<19) {
			$vIndex = $ch-0x1161;
			if (0<=$vIndex && $vIndex<21) {
				$last = 0xAC00+($lIndex*21+$vIndex)*28;
				$result[count($result)-1] = int2char($last);
				continue;
				}
			}
		
		$sIndex = $last-0xAC00;
		if (0<=$sIndex && $sIndex<11172 && ($sIndex % 28)==0) {
			$tIndex = $ch-0x11A7;
			if (0<$tIndex && $tIndex<28) {
				$last = $last+$tIndex;
				$result[count($result)-1] = int2char($last);
				continue;
				}
			}
			
		$last = $ch;
		$result[] = int2char($ch);
		}
	$resultstr = '';
	for ($j=0;$j<count($result);$j++){ $resultstr .= $result[$j]; }
	return $resultstr;
	}

	
// DATA
include('n11ndata.php');


?>