根据文本语言的UILabel自动对齐文本文本、根据、语言、UILabel

2023-09-11 07:43:21 作者:自巳①個人佷恏

我感兴趣的是设置一些文本到的UILabel ,并根据语言的方向性(例如,希伯来文 - 从右到左[RTL]英语 - 左到右[LTR])设置的对齐的UILabel

请注意,使用的是iOS 6的 NSTextAlignmentNatural 并不能解决问题,因为根据当前的语言环境选择对齐,实验表明。

解决方案

最后按照本SO答案:写一个简短的脚本,它将分析的统一code数据的公开提供此处,并产生code标识code点是否具有强大的研发或AL方向性属性。然后,该字符串搜索的第一个这样的人物。 这正是 ubidi_getBaseDirection 从ICU包一样。

的NSString 的内部重新presentation是UTF16(这是一个可变长度编码),它首先被转换为UTF32为了简化扫描code。另一种方法是将去code在飞行,这需要处理BOM和统一code代理人的字符串。另一种方法是简单地忽略字符不会再presentable一个。欲了解更多详细信息,请参见维基百科的UTF16文章。

简答

  @interface的NSString(TextDirectionality)

/ *返回1,如果该字符串是强LTR,-1,如果强烈的RTL,或0,如果中性。 * /
/ *请参阅http://icu-project.org/apiref/icu4c/ubidi_8h.html#aeb1fd15743833278cc11906cd5a48aef * /
 - (INT)getBaseDirection;

@结束

@implementation的NSString(TextDirectionality)

//功能需要UTF32字符,而不是将unichar(= UTF16字符)
//因为一些统一code字符需要全32位重新present。
BOOL为codePointStrongRTL(UTF32Char C){
  返程((三== 0x5BE)||(C = = 0x5C0)||(C = = 0x5C3)||(C = = 0x5C6)||(C> = 0x5D0和放大器;和C< = 0x5EA) ||(C> = 0x5F0和放大器;和C< = 0x5F4)||(C == 0x608)||(C == 0x60B)||(C == 0x60D)||(C == 0x61B) ||(c取代; = 0x61E&安培;&安培;℃下= 0x64A)||(c取代; = 0x66D&安培;&安培;℃下= 0x66F)||(c取代; = 0x671&安培;&安培;℃下= 0x6D5)||(c取代; = 0x6E5&安培;&安培;℃下= 0x6E6)||(c取代; = 0x6EE&安培;&安培;℃下= 0x6EF)||(c取代; = 0x6FA&安培; &安培;℃下= 0x70D)||(c取代; = 0x70F&安培;&安培;℃下= 0x710)||(c取代; = 0x712&安培;&安培;℃下= 0x72F)||(c取代; = 0x74D&安培;&安培;℃下= 0x7A5)||(三== 0x7B1)||(c取代; = 0x7C0&安培;&安培;℃下= 0x7EA)||(c取代; = 0x7F4&安培;&安培; C< = 0x7F5)||(C == 0x7FA)||(C> =为0x800和放大器;和C< = 0x815)||(C == 0x81A)||(C == 0x824)|| (三== 0x828)||(C> = 0x830和放大器;和C< = 0x83E)||(C> = 0x840和放大器;和C< = 0x858)||(C == 0x85E) ||(C == 0x8A0)||(C> = 0x8A2和放大器;和C< = 0x8AC)||(C = = 0x200F)|| (三== 0xFB1D)|| (c取代; = 0xFB1F&安培;&安培;℃下= 0xFB28)|| (c取代; = 0xFB2A&安培;&安培;℃下= 0xFB36)|| (c取代; = 0xFB38&安培;&安培;℃下= 0xFB3C)|| (三== 0xFB3E)|| (c取代; = 0xFB40&安培;&安培;℃下= 0xFB41)|| (c取代; = 0xFB43&安培;&安培;℃下= 0xFB44)|| (c取代; = 0xFB46&安培;&安培;℃下= 0xFBC1)|| (c取代; = 0xFBD3&安培;&安培;℃下= 0xFD3D)|| (c取代; = 0xFD50&安培;&安培;℃下= 0xFD8F)|| (c取代; = 0xFD92&安培;&安培;℃下= 0xFDC7)|| (c取代; = 0xFDF0&安培;&安培;℃下= 0xFDFC)|| (c取代; = 0xFE70&安培;&安培;℃下= 0xFE74)|| (c取代; = 0xFE76&安培;&安培;℃下= 0xFEFC)|| (c取代; = 0x10800&安培;&安培;℃下= 0x10805)|| (三== 0x10808)|| (c取代; = 0x1080A&安培;&安培;℃下= 0x10835)|| (c取代; = 0x10837&安培;&安培;℃下= 0x10838)|| (三== 0x1083C)|| (c取代; = 0x1083F&安培;&安培;℃下= 0x10855)|| (c取代; = 0x10857&安培;&安培;℃下= 0x1085F)|| (c取代; = 0x10900&安培;&安培;℃下= 0x1091B)|| (c取代; = 0x10920&安培;&安培;℃下= 0x10939)|| (三== 0x1093F)|| (c取代; = 0x10980&安培;&安培;℃下= 0x109B7)|| (c取代; = 0x109BE&安培;&安培;℃下= 0x109BF)|| (三== 0x10A00)|| (c取代; = 0x10A10&安培;&安培;℃下= 0x10A13)|| (c取代; = 0x10A15&安培;&安培;℃下= 0x10A17)|| (c取代; = 0x10A19&安培;&安培;℃下= 0x10A33)|| (c取代; = 0x10A40&安培;&安培;℃下= 0x10A47)|| (c取代; = 0x10A50&安培;&安培;℃下= 0x10A58)|| (c取代; = 0x10A60&安培;&安培;℃下= 0x10A7F)|| (c取代; = 0x10B00&安培;&安培;℃下= 0x10B35)|| (c取代; = 0x10B40&安培;&安培;℃下= 0x10B55)|| (c取代; = 0x10B58&安培;&安培;℃下= 0x10B72)|| (c取代; = 0x10B78&安培;&安培;℃下= 0x10B7F)|| (c取代; = 0x10C00&安培;&安培;℃下= 0x10C48)|| (c取代; = 0x1EE00&安培;&安培;℃下= 0x1EE03)|| (c取代; = 0x1EE05&安培;&安培;℃下= 0x1EE1F)|| (c取代; = 0x1EE21&安培;&安培;℃下= 0x1EE22)|| (三== 0x1EE24)|| (三== 0x1EE27)|| (c取代; = 0x1EE29&安培;&安培;℃下= 0x1EE32)|| (c取代; = 0x1EE34&安培;&安培;℃下= 0x1EE37)|| (三== 0x1EE39)|| (三== 0x1EE3B)|| (三== 0x1EE42)|| (三== 0x1EE47)|| (三== 0x1EE49)|| (三== 0x1EE4B)|| (c取代; = 0x1EE4D&安培;&安培;℃下= 0x1EE4F)|| (c取代; = 0x1EE51&安培;&安培;℃下= 0x1EE52)|| (三== 0x1EE54)|| (三== 0x1EE57)|| (三== 0x1EE59)|| (三== 0x1EE5B)|| (三== 0x1EE5D)|| (三== 0x1EE5F)|| (c取代; = 0x1EE61&安培;&安培;℃下= 0x1EE62)|| (三== 0x1EE64)|| (c取代; = 0x1EE67&安培;&安培;℃下= 0x1EE6A)|| (c取代; = 0x1EE6C&安培;&安培;℃下= 0x1EE72)|| (c取代; = 0x1EE74&安培;&安培;℃下= 0x1EE77)|| (c取代; = 0x1EE79&安培;&安培;℃下= 0x1EE7C)|| (三== 0x1EE7E)|| (c取代; = 0x1EE80&安培;&安培;℃下= 0x1EE89)|| (c取代; = 0x1EE8B&安培;&安培;℃下= 0x1EE9B)|| (c取代; = 0x1EEA1&安培;&安培;℃下= 0x1EEA3​​)|| (c取代; = 0x1EEA5&安培;&安培;℃下= 0x1EEA9)|| (c取代; = 0x1EEAB&安培;&安培;℃下= 0x1EEBB));
}

BOOL为codePointStrongLTR(UTF32Char C){
  返程(C> = 0×41和放大器;和C< = 5AH即可)|| (c取代; = 0x61&安培;&安培;℃下= 0x7A)|| (三==和0xAA)|| (三== 0xB5执行)|| (三== 0xBA)|| (c取代; =&为0xC0安培;&安培;℃下= 0xD6)|| (c取代; =&为0xD8安培;&安培;℃下= 0xF6)|| (C> =为0xF8的&放大器;和C< = 0x2B8)|| (c取代; = 0x2BB&安培;&安培;℃下= 0x2C1)|| (c取代; = 0x2D0&安培;&安培;℃下= 0x2D1)|| (c取代; = 0x2E0&安培;&安培;℃下= 0x2E4)|| (三== 0x2EE)|| (c取代; = 0x370&安培;&安培;℃下= 0x373)|| (c取代; = 0x376&安培;&安培;℃下= 0x377)|| (c取代; = 0x37A&安培;&安培;℃下= 0x37D)|| (三== 0x386)|| (c取代; = 0x388&安培;&安培;℃下= 0x38A)|| (三== 0x38C)|| (c取代; = 0x38E&安培;&安培;℃下= 0x3A1)|| (c取代; = 0x3A3&安培;&安培;℃下= 0x3F5)|| (c取代; = 0x3F7&安培;&安培;℃下= 0x482)|| (c取代; = 0x48A&安培;&安培;℃下= 0x527)|| (c取代; = 0x531&安培;&安培;℃下= 0x556)|| (c取代; = 0x559&安培;&安培;℃下= 0x55F)|| (c取代; = 0x561&安培;&安培;℃下= 0x587)|| (三== 0x589)|| (c取代; = 0x903&安培;&安培;℃下= 0x939)|| (三== 0x93B)|| (c取代; = 0x93D&安培;&安培;℃下= 0x940)|| (c取代; = 0x949&安培;&安培;℃下= 0x94C)|| (c取代; = 0x94E&安培;&安培;℃下= 0x950)|| (c取代; = 0x958&安培;&安培;℃下= 0x961)|| (c取代; = 0x964&安培;&安培;℃下= 0x977)|| (c取代; = 0x979&安培;&安培;℃下= 0x97F)|| (c取代; = 0x982&安培;&安培;℃下= 0x983)|| (c取代; = 0x985&安培;&安培;℃下= 0x98C)|| (c取代; = 0x98F&安培;&安培;℃下= 0x990)|| (c取代; = 0x993&安培;&安培;℃下= 0x9A8)|| (c取代; = 0x9AA&安培;&安培;℃下= 0x9B0)|| (三== 0x9B2)|| (c取代; = 0x9B6&安培;&安培;℃下= 0x9B9)|| (c取代; = 0x9BD&安培;&安培;℃下= 0x9C0)|| (c取代; = 0x9C7&安培;&安培;℃下= 0x9C8)|| (c取代; = 0x9CB&安培;&安培;℃下= 0x9CC)|| (三== 0x9CE)|| (三== 0x9D7)|| (c取代; = 0x9DC&安培;&安培;℃下= 0x9DD)|| (c取代; = 0x9DF&安培;&安培;℃下= 0x9E1)|| (c取代; = 0x9E6&安培;&安培;℃下= 0x9F1)|| (c取代; = 0x9F4&安培;&安培;℃下= 0x9FA)|| (三== 0xA03)|| (c取代; = 0xA05&安培;&安培;℃下= 0xA0A)|| (c取代; = 0xA0F&安培;&安培;℃下= 0xA10)|| (c取代; = 0xA13&安培;&安培;℃下= 0xA28)|| (c取代; = 0xA2A&安培;&安培;℃下= 0xA30)|| (c取代; = 0xA32&安培;&安培;℃下= 0xA33)|| (c取代; = 0xA35&安培;&安培;℃下= 0xA36)|| (c取代; = 0xA38&安培;&安培;℃下= 0xA39)|| (c取代; = 0xA3E&安培;&安培;℃下= 0xA40)|| (c取代; = 0xA59&安培;&安培;℃下= 0xA5C)|| (三== 0xA5E)|| (c取代; = 0xA66&安培;&安培;℃下= 0xA6F)|| (c取代; = 0xA72&安培;&安培;℃下= 0xA74)|| (三== 0xA83)|| (c取代; = 0xA85&安培;&安培;℃下= 0xA8D)|| (c取代; = 0xA8F&安培;&安培;℃下= 0xA91)|| (c取代; = 0xA93&安培;&安培;℃下= 0xAA8)|| (c取代; = 0xAAA&安培;&安培;℃下= 0xAB0)|| (c取代; = 0xAB2&安培;&安培;℃下= 0xAB3)|| (c取代; = 0xAB5&安培;&安培;℃下= 0xAB9)|| (c取代; = 0xABD&安培;&安培;℃下= 0xAC0)|| (三== 0xAC9)|| (c取代; = 0xACB&安培;&安培;℃下= 0xACC)|| (三== 0xAD0)|| (c取代; = 0xAE0&安培;&安培;℃下= 0xAE1)|| (c取代; = 0xAE6&安培;&安培;℃下= 0xAF0)|| (c取代; = 0xB02&安培;&安培;℃下= 0xB03)|| (c取代; = 0xB05&安培;&安培;℃下= 0xB0C)|| (c取代; = 0xB0F&安培;&安培;℃下= 0xB10)|| (c取代; = 0xB13&安培;&安培;℃下= 0xB28)|| (c取代; = 0xB2A&安培;&安培;℃下= 0xB30)|| (c取代; = 0xB32&安培;&安培;℃下= 0xB33)|| (c取代; = 0xB35&安培;&安培;℃下= 0xB39)|| (c取代; = 0xB3D&安培;&安培;℃下= 0xB3E)|| (三== 0xB40)|| (c取代; = 0xB47&安培;&安培;℃下= 0xB48)|| (c取代; = 0xB4B&安培;&安培;℃下= 0xB4C)|| (三== 0xB57)|| (c取代; = 0xB5C&安培;&安培;℃下= 0xB5D)|| (c取代; = 0xB5F&安培;&安培;℃下= 0xB61)|| (c取代; = 0xB66&安培;&安培;℃下= 0xB77)|| (三== 0xB83)|| (c取代; = 0xB85&安培;&安培;℃下= 0xB8A)|| (c取代; = 0xB8E&安培;&安培;℃下= 0xB90)|| (c取代; = 0xB92&安培;&安培;℃下= 0xB95)|| (c取代; = 0xB99&安培;&安培;℃下= 0xB9A)|| (三== 0xB9C)|| (c取代; = 0xB9E&安培;&安培;℃下= 0xB9F)|| (c取代; = 0xBA3&安培;&安培;℃下= 0xBA4)|| (c取代; = 0xBA8&安培;&安培;℃下= 0xBAA)|| (c取代; = 0xBAE&安培;&安培;℃下= 0xBB9)|| (c取代; = 0xBBE&安培;&安培;℃下= 0xBBF)|| (c取代; = 0xBC1&安培;&安培;℃下= 0xBC2)|| (c取代; = 0xBC6&安培;&安培;℃下= 0xBC8)|| (c取代; = 0xBCA&安培;&安培;℃下= 0xBCC)|| (三== 0xBD0)|| (三== 0xBD7)|| (c取代; = 0xBE6&安培;&安培;℃下= 0xBF2)|| (c取代; = 0xC01&安培;&安培;℃下= 0xC03)|| (c取代; = 0xC05&安培;&安培;℃下= 0xC0C)|| (c取代; = 0xC0E&安培;&安培;℃下= 0xC10)|| (c取代; = 0xC12&安培;&安培;℃下= 0xC28)|| (c取代; = 0xC2A&安培;&安培;℃下= 0xC33)|| (c取代; = 0xC35&安培;&安培;℃下= 0xC39)|| (三== 0xC3D)|| (c取代; = 0xC41&安培;&安培;℃下= 0xC44)|| (c取代; = 0xC58&安培;&安培;℃下= 0xC59)|| (c取代; = 0xC60&安培;&安培;℃下= 0xC61)|| (c取代; = 0xC66&安培;&安培;℃下= 0xC6F)|| (三== 0xC7F)|| (c取代; = 0xC82&安培;&安培;℃下= 0xC83)|| (c取代; = 0xC85&安培;&安培;℃下= 0xC8C)|| (c取代; = 0xC8E&安培;&安培;℃下= 0xC90)|| (c取代; = 0xC92&安培;&安培;℃下= 0xCA8)|| (c取代; = 0xCAA&安培;&安培;℃下= 0xCB3)|| (c取代; = 0xCB5&安培;&安培;℃下= 0xCB9)|| (c取代; = 0xCBD&安培;&安培;℃下= 0xCC4)|| (c取代; = 0xCC6&安培;&安培;℃下= 0xCC8)|| (c取代; = 0xCCA&安培;&安培;℃下= 0xCCB)|| (c取代; = 0xCD5&安培;&安培;℃下= 0xCD6)|| (三== 0xCDE)|| (c取代; = 0xCE0&安培;&安培;℃下= 0xCE1)|| (c取代; = 0xCE6&安培;&安培;℃下= 0xCEF)|| (c取代; = 0xCF1&安培;&安培;℃下= 0xCF2)|| (c取代; = 0xD02&安培;&安培;℃下= 0xD03)|| (c取代; = 0xD05&安培;&安培;℃下=量0xD0C)|| (c取代; = 0xD0E&安培;&安培;℃下=量0xD10)|| (c取代; = 0xD12&安培;&安培;℃下= 0xD3A)|| (c取代; = 0xD3D&安培;&安培;℃下= 0xD40)|| (c取代; = 0xD46&安培;&安培;℃下= 0xD48)|| (c取代; = 0xD4A&安培;&安培;℃下= 0xD4C)|| (三== 0xD4E)|| (三== 0xD57)|| (c取代; = 0xD60&安培;&安培;℃下= 0xD61)|| (c取代; = 0xD66&安培;&安培;℃下= 0xD75)|| (c取代; = 0xD79&安培;&安培;℃下= 0xD7F)|| (c取代; = 0xD82&安培;&安培;℃下= 0xD83)|| (c取代; = 0xD85&安培;&安培;℃下= 0xD96)|| (c取代; = 0xD9A&安培;&安培;℃下= 0xDB1)|| (c取代; = 0xDB3&安培;&安培;℃下= 0xDBB)|| (三== 0xDBD)|| (c取代; = 0xDC0&安培;&安培;℃下= 0xDC6)|| (c取代; = 0xDCF&安培;&安培;℃下= 0xDD1)|| (c取代; = 0xDD8&安培;&安培;℃下= 0xDDF)|| (c取代; = 0xDF2&安培;&安培;℃下= 0xDF4)|| (c取代; = 0xE01&安培;&安培;℃下= 0xE30)|| (c取代; = 0xE32&安培;&安培;℃下= 0xE33)|| (c取代; = 0xE40&安培;&安培;℃下= 0xE46)|| (c取代; = 0xE4F&安培;&安培;℃下= 0xE5B)|| (c取代; = 0xE81&安培;&安培;℃下= 0xE82)|| (三== 0xE84)|| (c取代; = 0xE87&安培;&安培;℃下= 0xE88)|| (三== 0xE8A)|| (三== 0xE8D)|| (c取代; = 0xE94&安培;&安培;℃下= 0xE97)|| (c取代; = 0xE99&安培;&安培;℃下= 0xE9F)|| (c取代; = 0xEA1&安培;&安培;℃下= 0xEA3)|| (三== 0xEA5)|| (三== 0xEA7)|| (c取代; = 0xEAA&安培;&安培;℃下= 0xEAB)|| (c取代; = 0xEAD&安培;&安培;℃下= 0xEB0)|| (c取代; = 0xEB2&安培;&安培;℃下= 0xEB3)|| (三== 0xEBD)|| (c取代; = 0xEC0&安培;&安培;℃下= 0xEC4)|| (三== 0xEC6)|| (c取代; = 0xED0&安培;&安培;℃下= 0xED9)|| (c取代; = 0xEDC&安培;&安培;℃下= 0xEDF)|| (c取代; = 0xF00&安培;&安培;℃下= 0xF17)|| (c取代; = 0xF1A&安培;&安培;℃下= 0xF34)|| (三== 0xF36)|| (三== 0xF38)|| (c取代; = 0xF3E&安培;&安培;℃下= 0xF47)|| (c取代; = 0xF49&安培;&安培;℃下= 0xF6C)|| (三== 0xF7F)|| (三== 0xF85)|| (c取代; = 0xF88&安培;&安培;℃下= 0xF8C)|| (c取代; = 0xFBE&安培;&安培;℃下= 0xFC5)|| (c取代; = 0xFC7&安培;&安培;℃下= 0xFCC)|| (c取代; = 0xFCE&安培;&安培;℃下= 0xFDA)|| (C> = 0×1000和放大器;和C< = 0x102C)|| (三== 0x1031)|| (三== 0x1038)|| (c取代; = 0x103B&安培;&安培;℃下= 0x103C)|| (c取代; = 0x103F&安培;&安培;℃下= 0x1057)|| (c取代; = 0x105A&安培;&安培;℃下= 0x105D)|| (c取代; = 0x1061&安培;&安培;℃下= 0x1070)|| (c取代; = 0x1075&安培;&安培;℃下= 0x1081)|| (c取代; = 0x1083&安培;&安培;℃下= 0x1084)|| (c取代; = 0x1087&安培;&安培;℃下= 0x108C)|| (c取代; = 0x108E&安培;&安培;℃下= 0x109C)|| (c取代; = 0x109E&安培;&安培;℃下= 0x10C5)|| (三== 0x10C7)|| (三== 0x10CD)|| (c取代; = 0x10D0&安培;&安培;℃下= 0x1248)|| (c取代; = 0x124A&安培;&安培;℃下= 0x124D)|| (c取代; = 0x1250&安培;&安培;℃下= 0x1256)|| (三== 0x1258)|| (c取代; = 0x125A&安培;&安培;℃下= 0x125D)|| (c取代; = 0x1260&安培;&安培;℃下= 0x1288)|| (c取代; = 0x128A&安培;&安培;℃下= 0x128D)|| (c取代; = 0x1290&安培;&安培;℃下= 0x12B0)|| (c取代; = 0x12B2&安培;&安培;℃下= 0x12B5)|| (c取代; = 0x12B8&安培;&安培;℃下= 0x12BE)|| (三== 0x12C0)|| (c取代; = 0x12C2&安培;&安培;℃下= 0x12C5)|| (c取代; = 0x12C8&安培;&安培;℃下= 0x12D6)|| (c取代; = 0x12D8&安培;&安培;℃下= 0x1310)|| (c取代; = 0x1312&安培;&安培;℃下= 0x1315)|| (c取代; = 0x1318&安培;&安培;℃下= 0x135A)|| (c取代; = 0x1360&安培;&安培;℃下= 0x137C)|| (c取代; = 0x1380&安培;&安培;℃下= 0x138F)|| (c取代; = 0x13A0&安培;&安培;℃下= 0x13F4)|| (c取代; = 0x1401&安培;&安培;℃下= 0x167F)|| (c取代; = 0x1681&安培;&安培;℃下= 0x169A)|| (c取代; = 0x16A0&安培;&安培;℃下= 0x16F0)|| (c取代; = 0x1700&安培;&安培;℃下= 0x170C)|| (c取代; = 0x170E&安培;&安培;℃下= 0x1711)|| (c取代; = 0x1720&安培;&安培;℃下= 0x1731)|| (c取代; = 0x1735&安培;&安培;℃下= 0x1736)|| (c取代; = 0x1740&安培;&安培;℃下= 0x1751)|| (c取代; = 0x1760&安培;&安培;℃下= 0x176C)|| (c取代; = 0x176E&安培;&安培;℃下= 0x1770)|| (c取代; = 0x1780&安培;&安培;℃下= 0x17B3)|| (三== 0x17B6)|| (c取代; = 0x17BE&安培;&安培;℃下= 0x17C5)|| (c取代; = 0x17C7&安培;&安培;℃下= 0x17C8)|| (c取代; = 0x17D4&安培;&安培;℃下= 0x17DA)|| (三== 0x17DC)|| (c取代; = 0x17E0&安培;&安培;℃下= 0x17E9)|| (c取代; = 0x1810&安培;&安培;℃下= 0x1819)|| (c取代; = 0x1820&安培;&安培;℃下= 0x1877)|| (c取代; = 0x1880&安培;&安培;℃下= 0x18A8)|| (三== 0x18AA)|| (c取代; = 0x18B0&安培;&安培;℃下= 0x18F5)|| (c取代; = 0x1900&安培;&安培;℃下= 0x191C)|| (c取代; = 0x1923&安培;&安培;℃下= 0x1926)|| (c取代; = 0x1929&安培;&安培;℃下= 0x192B)|| (c取代; = 0x1930&安培;&安培;℃下= 0x1931)|| (c取代; = 0x1933&安培;&安培;℃下= 0x1938)|| (c取代; = 0x1946&安培;&安培;℃下= 0x196D)|| (c取代; = 0x1970&安培;&安培;℃下= 0x1974)|| (c取代; = 0x1980&安培;&安培;℃下= 0x19AB)|| (c取代; = 0x19B0&安培;&安培;℃下= 0x19C9)|| (c取代; = 0x19D0&安培;&安培;℃下= 0x19DA)|| (c取代; = 0x1A00&安培;&安培;℃下= 0x1A16)|| (c取代; = 0x1A19&安培;&安培;℃下= 0x1A1B)|| (c取代; = 0x1A1E&安培;&安培;℃下= 0x1A55)|| (三== 0x1A57)|| (三== 0x1A61)|| (c取代; = 0x1A63&安培;&安培;℃下= 0x1A64)|| (c取代; = 0x1A6D&安培;&安培;℃下= 0x1A72)|| (c取代; = 0x1A80&安培;&安培;℃下= 0x1A89)|| (c取代; = 0x1A90&安培;&安培;℃下= 0x1A99)|| (c取代; = 0x1AA0&安培;&安培;℃下= 0x1AAD)|| (c取代; = 0x1B04&安培;&安培;℃下= 0x1B33)|| (三== 0x1B35)|| (三== 0x1B3B)|| (c取代; = 0x1B3D&安培;&安培;℃下= 0x1B41)|| (c取代; = 0x1B43&安培;&安培;℃下= 0x1B4B)|| (c取代; = 0x1B50&安培;&安培;℃下= 0x1B6A)|| (c取代; = 0x1B74&安培;&安培;℃下= 0x1B7C)|| (c取代; = 0x1B82&安培;&安培;℃下= 0x1BA1)|| (c取代; = 0x1BA6&安培;&安培;℃下= 0x1BA7)|| (三== 0x1BAA)|| (c取代; = 0x1BAC&安培;&安培;℃下= 0x1BE5)|| (三== 0x1BE7)|| (c取代; = 0x1BEA&安培;&安培;℃下= 0x1BEC)|| (三== 0x1BEE)|| (c取代; = 0x1BF2&安培;&安培;℃下= 0x1BF3)|| (c取代; = 0x1BFC&安培;&安培;℃下= 0x1C2B)|| (c取代; = 0x1C34&安培;&安培;℃下= 0x1C35)|| (c取代; = 0x1C3B&安培;&安培;℃下= 0x1C49)|| (c取代; = 0x1C4D&安培;&安培;℃下= 0x1C7F)|| (c取代; = 0x1CC0&安培;&安培;℃下= 0x1CC7)|| (三== 0x1CD3)|| (三== 0x1CE1)|| (c取代; = 0x1CE9&安培;&安培;℃下= 0x1CEC)|| (c取代; = 0x1CEE&安培;&安培;℃下= 0x1CF3)|| (c取代; = 0x1CF5&安培;&安培;℃下= 0x1CF6)|| (c取代; = 0x1D00&安培;&安培;℃下= 0x1DBF)|| (c取代; = 0x1E00&安培;&安培;℃下= 0x1F15)|| (c取代; = 0x1F18&安培;&安培;℃下= 0x1F1D)|| (c取代; = 0x1F20&安培;&安培;℃下= 0x1F45)|| (c取代; = 0x1F48&安培;&安培;℃下= 0x1F4D)|| (c取代; = 0x1F50&安培;&安培;℃下= 0x1F57)|| (三== 0x1F59)|| (三== 0x1F5B)|| (三== 0x1F5D)|| (c取代; = 0x1F5F&安培;&安培;℃下= 0x1F7D)|| (c取代; = 0x1F80&安培;&安培;℃下= 0x1FB4)|| (c取代; = 0x1FB6&安培;&安培;℃下= 0x1FBC)|| (三== 0x1FBE)|| (c取代; = 0x1FC2&安培;&安培;℃下= 0x1FC4)|| (c取代; = 0x1FC6&安培;&安培;℃下= 0x1FCC)|| (c取代; = 0x1FD0&安培;&安培;℃下= 0x1FD3)|| (c取代; = 0x1FD6&安培;&安培;℃下= 0x1FDB)|| (c取代; = 0x1FE0&安培;&安培;℃下= 0x1FEC)|| (c取代; = 0x1FF2&安培;&安培;℃下= 0x1FF4)|| (c取代; = 0x1FF6&安培;&安培;℃下= 0x1FFC)|| (三== 0x200E)|| (三== 0x2071)|| (三== 0x207F)|| (c取代; = 0x2090&安培;&安培;℃下= 0x209C)|| (三==为0x2102)|| (三== 0x2107)|| (c取代; = 0x210A&安培;&安培;℃下= 0x2113)|| (三== 0x2115)|| (c取代; = 0x2119&安培;&安培;℃下= 0x211D)|| (三== 0x2124)|| (三== 0x2126)|| (三== 0x2128)|| (c取代; = 0x212A&安培;&安培;℃下= 0x212D)|| (c取代; = 0x212F&安培;&安培;℃下= 0x2139)|| (c取代; = 0x213C&安培;&安培;℃下= 0x213F)|| (c取代; = 0x2145&安培;&安培;℃下= 0x2149)|| (c取代; = 0x214E&安培;&安培;℃下= 0x214F)|| (c取代; = 0x2160&安培;&安培;℃下= 0x2188)|| (c取代; = 0x2336&安培;&安培;℃下= 0x237A)|| (三== 0x2395)|| (c取代; = 0x249C&安培;&安培;℃下= 0x24E9)|| (三== 0x26AC)|| (c取代; = 0x2800&安培;&安培;℃下= 0x28FF)|| (c取代; = 0x2C00&安培;&安培;℃下= 0x2C2E)|| (c取代; = 0x2C30&安培;&安培;℃下= 0x2C5E)|| (c取代; = 0x2C60&安培;&安培;℃下= 0x2CE4)|| (c取代; = 0x2CEB&安培;&安培;℃下= 0x2CEE)|| (c取代; = 0x2CF2&安培;&安培;℃下= 0x2CF3)|| (c取代; = 0x2D00&安培;&安培;℃下= 0x2D25)|| (三== 0x2D27)|| (三== 0x2D2D)|| (c取代; = 0x2D30&安培;&安培;℃下= 0x2D67)|| (c取代; = 0x2D6F&安培;&安培;℃下= 0x2D70)|| (c取代; = 0x2D80&安培;&安培;℃下= 0x2D96)|| (c取代; = 0x2DA0&安培;&安培;℃下= 0x2DA6)|| (c取代; = 0x2DA8&安培;&安培;℃下= 0x2DAE)|| (c取代; = 0x2DB0&安培;&安培;℃下= 0x2DB6)|| (c取代; = 0x2DB8&安培;&安培;℃下= 0x2DBE)|| (c取代; = 0x2DC0&安培;&安培;℃下= 0x2DC6)|| (c取代; = 0x2DC8&安培;&安培;℃下= 0x2DCE)|| (c取代; = 0x2DD0&安培;&安培;℃下= 0x2DD6)|| (c取代; = 0x2DD8&安培;&安培;℃下= 0x2DDE)|| (c取代; = 0x3005&安培;&安培;℃下= 0x3007)|| (c取代; = 0x3021&安培;&安培;℃下= 0x3029)|| (c取代; = 0x302E&安培;&安培;℃下= 0x302F)|| (c取代; = 0x3031&安培;&安培;℃下= 0x3035)|| (c取代; = 0x3038&安培;&安培;℃下= 0x303C)|| (c取代; = 0x3041&安培;&安培;℃下= 0x3096)|| (c取代; = 0x309D&安培;&安培;℃下= 0x309F)|| (c取代; = 0x30A1&安培;&安培;℃下= 0x30FA)|| (c取代; = 0x30FC&安培;&安培;℃下= 0x30FF)|| (c取代; = 0x3105&安培;&安培;℃下= 0x312D)|| (c取代; = 0x3131&安培;&安培;℃下= 0x318E)|| (c取代; = 0x3190&安培;&安培;℃下= 0x31BA)|| (c取代; = 0x31F0&安培;&安培;℃下= 0x321C)|| (c取代; = 0x3220&安培;&安培;℃下= 0x324F)|| (c取代; = 0x3260&安培;&安培;℃下= 0x327B)|| (c取代; = 0x327F&安培;&安培;℃下= 0x32B0)|| (c取代; = 0x32C0&安培;&安培;℃下= 0x32CB)|| (c取代; = 0x32D0&安培;&安培;℃下= 0x32FE)|| (c取代; = 0x3300&安培;&安培;℃下= 0x3376)|| (c取代; = 0x337B&安培;&安培;℃下= 0x33DD)|| (c取代; = 0x33E0&安培;&安培;℃下= 0x33FE)|| (三== 0x3400)|| (三== 0x4DB5)|| (三== 0x4E00)|| (三== 0x9FCC)|| (c取代; = 0xA000&安培;&安培;℃下= 0xA48C)|| (c取代; = 0xA4D0&安培;&安培;℃下= 0xA60C)|| (c取代; = 0xA610&安培;&安培;℃下= 0xA62B)|| (c取代; = 0xA640&安培;&安培;℃下= 0xA66E)|| (c取代; = 0xA680&安培;&安培;℃下= 0xA697)|| (c取代; = 0xA6A0&安培;&安培;℃下= 0xA6EF)|| (c取代; = 0xA6F2&安培;&安培;℃下= 0xA6F7)|| (c取代; = 0xA722&安培;&安培;℃下= 0xA787)|| (c取代; = 0xA789&安培;&安培;℃下= 0xA78E)|| (c取代; = 0xA790&安培;&安培;℃下= 0xA793)|| (c取代; = 0xA7A0&安培;&安培;℃下= 0xA7AA)|| (c取代; = 0xA7F8&安培;&安培;℃下= 0xA801)|| (c取代; = 0xA803&安培;&安培;℃下= 0xA805)|| (c取代; = 0xA807&安培;&安培;℃下= 0xA80A)|| (c取代; = 0xA80C&安培;&安培;℃下= 0xA824)|| (三== 0xA827)|| (c取代; = 0xA830&安培;&安培;℃下= 0xA837)|| (c取代; = 0xA840&安培;&安培;℃下= 0xA873)|| (c取代; = 0xA880&安培;&安培;℃下= 0xA8C3)|| (c取代; = 0xA8CE&安培;&安培;℃下= 0xA8D9)|| (c取代; = 0xA8F2&安培;&安培;℃下= 0xA8FB)|| (c取代; = 0xA900&安培;&安培;℃下= 0xA925)|| (c取代; = 0xA92E&安培;&安培;℃下= 0xA946)|| (c取代; = 0xA952&安培;&安培;℃下= 0xA953)|| (c取代; = 0xA95F&安培;&安培;℃下= 0xA97C)|| (c取代; = 0xA983&安培;&安培;℃下= 0xA9B2)|| (c取代; = 0xA9B4&安培;&安培;℃下= 0xA9B5)|| (c取代; = 0xA9BA&安培;&安培;℃下= 0xA9BB)|| (c取代; = 0xA9BD&安培;&安培;℃下= 0xA9CD)|| (c取代; = 0xA9CF&安培;&安培;℃下= 0xA9D9)|| (c取代; = 0xA9DE&安培;&安培;℃下= 0xA9DF)|| (c取代; = 0xAA00&安培;&安培;℃下= 0xAA28)|| (c取代; = 0xAA2F&安培;&安培;℃下= 0xAA30)|| (c取代; = 0xAA33&安培;&安培;℃下= 0xAA34)|| (c取代; = 0xAA40&安培;&安培;℃下= 0xAA42)|| (c取代; = 0xAA44&安培;&安培;℃下= 0xAA4B)|| (三== 0xAA4D)|| (c取代; = 0xAA50&安培;&安培;℃下= 0xAA59)|| (c取代; = 0xAA5C&安培;&安培;℃下= 0xAA7B)|| (c取代; = 0xAA80&安培;&安培;℃下= 0xAAAF)|| (三== 0xAAB1)|| (c取代; = 0xAAB5&安培;&安培;℃下= 0xAAB6)|| (c取代; = 0xAAB9&安培;&安培;℃下= 0xAABD)|| (三== 0xAAC0)|| (三== 0xAAC2)|| (c取代; = 0xAADB&安培;&安培;℃下= 0xAAEB)|| (c取代; = 0xAAEE&安培;&安培;℃下= 0xAAF5)|| (c取代; = 0xAB01&安培;&安培;℃下= 0xAB06)|| (c取代; = 0xAB09&安培;&安培;℃下= 0xAB0E)|| (c取代; = 0xAB11&安培;&安培;℃下= 0xAB16)|| (c取代; = 0xAB20&安培;&安培;℃下= 0xAB26)|| (c取代; = 0xAB28&安培;&安培;℃下= 0xAB2E)|| (c取代; = 0xABC0&安培;&安培;℃下= 0xABE4)|| (c取代; = 0xABE6&安培;&安培;℃下= 0xABE7)|| (c取代; = 0xABE9&安培;&安培;℃下= 0xABEC)|| (c取代; = 0xABF0&安培;&安培;℃下= 0xABF9)|| (三== 0xAC00)|| (三== 0xD7A3)|| (c取代; = 0xD7B0&安培;&安培;℃下= 0xD7C6)|| (c取代; = 0xD7CB&安培;&安培;℃下= 0xD7FB)|| (三== 0xD800)|| (c取代; = 0xDB7F&安培;&安培;℃下= 0xDB80)|| (c取代; = 0xDBFF&安培;&安培;℃下= 0xDC00)|| (c取代; = 0xDFFF&安培;&安培;℃下= 0xE000)|| (c取代; = 0xF8FF&安培;&安培;℃下= 0xFA6D)|| (c取代; = 0xFA70&安培;&安培;℃下= 0xFAD9)|| (c取代; = 0xFB00&安培;&安培;℃下= 0xFB06)|| (c取代; = 0xFB13&安培;&安培;℃下= 0xFB17)|| (c取代; = 0xFF21&安培;&安培;℃下= 0xFF3A)|| (c取代; = 0xFF41&安培;&安培;℃下= 0xFF5A)|| (c取代; = 0xFF66&安培;&安培;℃下= 0xFFBE)|| (c取代; = 0xFFC2&安培;&安培;℃下= 0xFFC7)|| (c取代; = 0xFFCA&安培;&安培;℃下= 0xFFCF)|| (c取代; = 0xFFD2&安培;&安培;℃下= 0xFFD7)|| (c取代; = 0xFFDA&安培;&安培;℃下= 0xFFDC)|| (C> = 0x10000的&放大器;和C< = 0x1000B)|| (c取代; = 0x1000D&安培;&安培;℃下= 0x10026)|| (c取代; = 0x10028&安培;&安培;℃下= 0x1003A)|| (c取代; = 0x1003C&安培;&安培;℃下= 0x1003D)|| (c取代; = 0x1003F&安培;&安培;℃下= 0x1004D)|| (c取代; = 0x10050&安培;&安培;℃下= 0x1005D)|| (c取代; = 0x10080&安培;&安培;℃下= 0x100FA)|| (三== 0x10100)|| (三== 0x10102)|| (c取代; = 0x10107&安培;&安培;℃下= 0x10133)|| (c取代; = 0x10137&安培;&安培;℃下= 0x1013F)|| (c取代; = 0x101D0&安培;&安培;℃下= 0x101FC)|| (c取代; = 0x10280&安培;&安培;℃下= 0x1029C)|| (c取代; = 0x102A0&安培;&安培;℃下= 0x102D0)|| (c取代; = 0x10300&安培;&安培;℃下= 0x1031E)|| (c取代; = 0x10320&安培;&安培;℃下= 0x10323)|| (c取代; = 0x10330&安培;&安培;℃下= 0x1034A)|| (c取代; = 0x10380&安培;&安培;℃下= 0x1039D)|| (c取代; = 0x1039F&安培;&安培;℃下= 0x103C3)|| (c取代; = 0x103C8&安培;&安培;℃下= 0x103D5)|| (c取代; = 0x10400&安培;&安培;℃下= 0x1049D)|| (c取代; = 0x104A0&安培;&安培;℃下= 0x104A9)|| (C == 0×11000)|| (c取代; = 0x11002&安培;&安培;℃下= 0x11037)|| (c取代; = 0x11047&安培;&安培;℃下= 0x1104D)|| (c取代; = 0x11066&安培;&安培;℃下= 0x1106F)|| (c取代; = 0x11082&安培;&安培;℃下= 0x110B2)|| (c取代; = 0x110B7&安培;&安培;℃下= 0x110B8)|| (c取代; = 0x110BB&安培;&安培;℃下= 0x110C1)|| (c取代; = 0x110D0&安培;&安培;℃下= 0x110E8)|| (c取代; = 0x110F0&安培;&安培;℃下= 0x110F9)|| (c取代; = 0x11103&安培;&安培;℃下= 0x11126)|| (三== 0x1112C)|| (c取代; = 0x11136&安培;&安培;℃下= 0x11143)|| (c取代; = 0x11182&安培;&安培;℃下= 0x111B5)|| (c取代; = 0x111BF&安培;&安培;℃下= 0x111C8)|| (c取代; = 0x111D0&安培;&安培;℃下= 0x111D9)|| (c取代; = 0x11680&安培;&安培;℃下= 0x116AA)|| (三== 0x116AC)|| (c取代; = 0x116AE&安培;&安培;℃下= 0x116AF)|| (三== 0x116B6)|| (c取代; = 0x116C0&安培;&安培;℃下= 0x116C9)|| (c取代; = 0x12000&安培;&安培;℃下= 0x1236E)|| (c取代; = 0x12400&安培;&安培;℃下= 0x12462)|| (c取代; = 0x12470&安培;&安培;℃下= 0x12473)|| (c取代; = 0x13000&安培;&安培;℃下= 0x1342E)|| (c取代; = 0x16800&安培;&安培;℃下= 0x16A38)|| (c取代; = 0x16F00&安培;&安培;℃下= 0x16F44)|| (c取代; = 0x16F50&安培;&安培;℃下= 0x16F7E)|| (c取代; = 0x16F93&安培;&安培;℃下= 0x16F9F)|| (c取代; = 0x1B000&安培;&安培;℃下= 0x1B001)|| (c取代; = 0x1D000&安培;&安培;℃下= 0x1D0F5)|| (c取代; = 0x1D100&安培;&安培;℃下= 0x1D126)|| (c取代; = 0x1D129&安培;&安培;℃下= 0x1D166)|| (c取代; = 0x1D16A&安培;&安培;℃下= 0x1D172)|| (c取代; = 0x1D183&安培;&安培;℃下= 0x1D184)|| (c取代; = 0x1D18C&安培;&安培;℃下= 0x1D1A9)|| (c取代; = 0x1D1AE&安培;&安培;℃下= 0x1D1DD)|| (c取代; = 0x1D360&安培;&安培;℃下= 0x1D371)|| (c取代; = 0x1D400&安培;&安培;℃下= 0x1D454)|| (c取代; = 0x1D456&安培;&安培;℃下= 0x1D49C)|| (c取代; = 0x1D49E&安培;&安培;℃下= 0x1D49F)|| (三== 0x1D4A2)|| (c取代; = 0x1D4A5&安培;&安培;℃下= 0x1D4A6)|| (c取代; = 0x1D4A9&安培;&安培;℃下= 0x1D4AC)|| (c取代; = 0x1D4AE&安培;&安培;℃下= 0x1D4B9)|| (三== 0x1D4BB)|| (c取代; = 0x1D4BD&安培;&安培;℃下= 0x1D4C3)|| (c取代; = 0x1D4C5&安培;&安培;℃下= 0x1D505)|| (c取代; = 0x1D507&安培;&安培;℃下= 0x1D50A)|| (c取代; = 0x1D50D&安培;&安培;℃下= 0x1D514)|| (c取代; = 0x1D516&安培;&安培;℃下= 0x1D51C)|| (c取代; = 0x1D51E&安培;&安培;℃下= 0x1D539)|| (c取代; = 0x1D53B&安培;&安培;℃下= 0x1D53E)|| (c取代; = 0x1D540&安培;&安培;℃下= 0x1D544)|| (三== 0x1D546)|| (c取代; = 0x1D54A&安培;&安培;℃下= 0x1D550)|| (c取代; = 0x1D552&安培;&安培;℃下= 0x1D6A5)|| (c取代; = 0x1D6A8&安培;&安培;℃下= 0x1D6DA)|| (c取代; = 0x1D6DC&安培;&安培;℃下= 0x1D714)|| (c取代; = 0x1D716&安培;&安培;℃下= 0x1D74E)|| (c取代; = 0x1D750&安培;&安培;℃下= 0x1D788)|| (c取代; = 0x1D78A&安培;&安培;℃下= 0x1D7C2)|| (c取代; = 0x1D7C4&安培;&安培;℃下= 0x1D7CB)|| (c取代; = 0x1F110&安培;&安培;℃下= 0x1F12E)|| (c取代; = 0x1F130&安培;&安培;℃下= 0x1F169)|| (c取代; = 0x1F170&安培;&安培;℃下= 0x1F19A)|| (c取代; = 0x1F1E6&安培;&安培;℃下= 0x1F202)|| (c取代; = 0x1F210&安培;&安培;℃下= 0x1F23A)|| (c取代; = 0x1F240&安培;&安培;℃下= 0x1F248)|| (c取代; = 0x1F250&安培;&安培;℃下= 0x1F251)|| (三== 0x20000的)|| (三== 0x2A6D6)|| (三== 0x2A700)|| (三== 0x2B734)|| (三== 0x2B740)|| (三== 0x2B81D)|| (c取代; = 0x2F800&安培;&安培;℃下= 0x2FA1D)|| (三== 0xF0000)|| (三== 0xFFFFD)|| (三==的0x100000)|| (三== 0x10FFFD);
}

 - (INT)getBaseDirection {
  //德code字符串转换成UTF32。
  的NSData * utf32data = [个体经营dataUsingEncoding:NSUTF32StringEncoding]。
  // NSUTF32StringEncoding有平台的字节顺序,应
  //可以相同UTF32Char的。
  UTF32Char * utf32chars =(UTF32Char *)[utf32data字节];

  对于(NSUInteger I = 0; I< self.length;我++){
    // UTF32是一个固定长度的编码,所以utf3​​2chars [Ⅰ]将
    //总是给我们的第i个字符。
    如果(是codePointStrongRTL(utf32chars [I]))
      返回-1;
    如果(是codePointStrongLTR(utf32chars [I]))
      返回1;
  }
  返回0;
}

@结束
 

长回答:生成功能是codePointStrong {RTL,LTR}

创建一个脚本 hex_numbers_to_dec_ranges_py

进口SYS 从itertools进口GROUPBY,算 numberlist = [] 在sys.stdin行:     numberlist.append(INT(线,16)); 高清as_range(迭代器):#不知道如何优雅地做这部分     L =列表(迭代器)     如果len(L)> 1:         返回'(c取代; = 0X {0:X}&安培;&安培;℃下= 0x的{1:X})'。格式(升[0],L [-1])     其他:         返回'(三== 0X {0:X})'。格式(升[0]) 打印'|| '。加入(as_range(G)为_,在GROUPBY(numberlist,拉姆达N,C = COUNT()G:正下一个(C)))

(code无耻地从被窃StackExhange的code审查这个优秀的答案)。

从终端上运行:

卷曲的http://www.uni$c$c.org/Public/UNIDATA/Uni$c$cData。 TXT> /tmp/Uni$c$cData.txt 猫/tmp/Uni$c$cData.txt | AWK -F; '$ ==您R|| $ ==您AL'|切-d; -f1 |蟒蛇hex_numbers_to_dec_ranges.py> rtl.m 猫/tmp/Uni$c$cData.txt | AWK -F; '$ ==您的L'|切-d; -f1 |蟒蛇hex_numbers_to_dec_ranges.py> ltr.m RM /tmp/Uni$c$cData.txt 教你把握PPT文字的 间隙 松紧 对齐

编辑:作为@masmor正确地指出,在环路 getBaseDirection 扫描字符,而不是字节。因此,应该终止后字符迭代次数,而不是字节迭代数目。换句话说, self.length 倍,而不是 utf32data.length 次。在code为现予以更正。

I'm interested in setting some text into a UILabel, and depending on the directionality of the language (e.g., Hebrew - right-to-left [RTL], English - left-to-right [LTR]) set the alignment of the UILabel.

Note that using iOS 6's NSTextAlignmentNatural does not solve the problem, as it chooses alignment according to the current locale, experiments show.

解决方案

Ended up following the advice given in this SO answer: write a short script that will parse the Unicode data publicly available here, and generate code to identify whether a code-point has a strong R or AL directionality attribute. Then, the string is searched for the first such character. This is exactly what ubidi_getBaseDirection from the ICU package does.

Since the internal representation of NSString is UTF16 (which is a variable-length encoding), it is first converted to UTF32 in order to simplify the scanning code. An alternative approach would be to decode the string on the fly, which requires dealing with BOM and Unicode surrogates. Yet another approach is simply ignoring characters not representable by one unichar. For more details, see Wikipedia's UTF16 article.

Short Answer

@interface NSString (TextDirectionality)

/* Return 1 if the string is strongly LTR, -1 if strongly RTL, or 0 if neutral. */
/* See http://icu-project.org/apiref/icu4c/ubidi_8h.html#aeb1fd15743833278cc11906cd5a48aef */
-(int)getBaseDirection;

@end

@implementation NSString (TextDirectionality)

// Function takes UTF32 character, and not a unichar (=UTF16 character),
// because some Unicode characters need full 32 bits to represent.
BOOL isCodePointStrongRTL(UTF32Char c) {
  return ((c == 0x5BE) || (c == 0x5C0) || (c == 0x5C3) || (c == 0x5C6) || (c >= 0x5D0 && c <= 0x5EA) || (c >= 0x5F0 && c <= 0x5F4) || (c == 0x608) || (c == 0x60B) || (c == 0x60D) || (c == 0x61B) || (c >= 0x61E && c <= 0x64A) || (c >= 0x66D && c <= 0x66F) || (c >= 0x671 && c <= 0x6D5) || (c >= 0x6E5 && c <= 0x6E6) || (c >= 0x6EE && c <= 0x6EF) || (c >= 0x6FA && c <= 0x70D) || (c >= 0x70F && c <= 0x710) || (c >= 0x712 && c <= 0x72F) || (c >= 0x74D && c <= 0x7A5) || (c == 0x7B1) || (c >= 0x7C0 && c <= 0x7EA) || (c >= 0x7F4 && c <= 0x7F5) || (c == 0x7FA) || (c >= 0x800 && c <= 0x815) || (c == 0x81A) || (c == 0x824) || (c == 0x828) || (c >= 0x830 && c <= 0x83E) || (c >= 0x840 && c <= 0x858) || (c == 0x85E) || (c == 0x8A0) || (c >= 0x8A2 && c <= 0x8AC) || (c == 0x200F) || (c == 0xFB1D) || (c >= 0xFB1F && c <= 0xFB28) || (c >= 0xFB2A && c <= 0xFB36) || (c >= 0xFB38 && c <= 0xFB3C) || (c == 0xFB3E) || (c >= 0xFB40 && c <= 0xFB41) || (c >= 0xFB43 && c <= 0xFB44) || (c >= 0xFB46 && c <= 0xFBC1) || (c >= 0xFBD3 && c <= 0xFD3D) || (c >= 0xFD50 && c <= 0xFD8F) || (c >= 0xFD92 && c <= 0xFDC7) || (c >= 0xFDF0 && c <= 0xFDFC) || (c >= 0xFE70 && c <= 0xFE74) || (c >= 0xFE76 && c <= 0xFEFC) || (c >= 0x10800 && c <= 0x10805) || (c == 0x10808) || (c >= 0x1080A && c <= 0x10835) || (c >= 0x10837 && c <= 0x10838) || (c == 0x1083C) || (c >= 0x1083F && c <= 0x10855) || (c >= 0x10857 && c <= 0x1085F) || (c >= 0x10900 && c <= 0x1091B) || (c >= 0x10920 && c <= 0x10939) || (c == 0x1093F) || (c >= 0x10980 && c <= 0x109B7) || (c >= 0x109BE && c <= 0x109BF) || (c == 0x10A00) || (c >= 0x10A10 && c <= 0x10A13) || (c >= 0x10A15 && c <= 0x10A17) || (c >= 0x10A19 && c <= 0x10A33) || (c >= 0x10A40 && c <= 0x10A47) || (c >= 0x10A50 && c <= 0x10A58) || (c >= 0x10A60 && c <= 0x10A7F) || (c >= 0x10B00 && c <= 0x10B35) || (c >= 0x10B40 && c <= 0x10B55) || (c >= 0x10B58 && c <= 0x10B72) || (c >= 0x10B78 && c <= 0x10B7F) || (c >= 0x10C00 && c <= 0x10C48) || (c >= 0x1EE00 && c <= 0x1EE03) || (c >= 0x1EE05 && c <= 0x1EE1F) || (c >= 0x1EE21 && c <= 0x1EE22) || (c == 0x1EE24) || (c == 0x1EE27) || (c >= 0x1EE29 && c <= 0x1EE32) || (c >= 0x1EE34 && c <= 0x1EE37) || (c == 0x1EE39) || (c == 0x1EE3B) || (c == 0x1EE42) || (c == 0x1EE47) || (c == 0x1EE49) || (c == 0x1EE4B) || (c >= 0x1EE4D && c <= 0x1EE4F) || (c >= 0x1EE51 && c <= 0x1EE52) || (c == 0x1EE54) || (c == 0x1EE57) || (c == 0x1EE59) || (c == 0x1EE5B) || (c == 0x1EE5D) || (c == 0x1EE5F) || (c >= 0x1EE61 && c <= 0x1EE62) || (c == 0x1EE64) || (c >= 0x1EE67 && c <= 0x1EE6A) || (c >= 0x1EE6C && c <= 0x1EE72) || (c >= 0x1EE74 && c <= 0x1EE77) || (c >= 0x1EE79 && c <= 0x1EE7C) || (c == 0x1EE7E) || (c >= 0x1EE80 && c <= 0x1EE89) || (c >= 0x1EE8B && c <= 0x1EE9B) || (c >= 0x1EEA1 && c <= 0x1EEA3) || (c >= 0x1EEA5 && c <= 0x1EEA9) || (c >= 0x1EEAB && c <= 0x1EEBB));
}

BOOL isCodePointStrongLTR(UTF32Char c) {
  return (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A) || (c == 0xAA) || (c == 0xB5) || (c == 0xBA) || (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || (c >= 0xF8 && c <= 0x2B8) || (c >= 0x2BB && c <= 0x2C1) || (c >= 0x2D0 && c <= 0x2D1) || (c >= 0x2E0 && c <= 0x2E4) || (c == 0x2EE) || (c >= 0x370 && c <= 0x373) || (c >= 0x376 && c <= 0x377) || (c >= 0x37A && c <= 0x37D) || (c == 0x386) || (c >= 0x388 && c <= 0x38A) || (c == 0x38C) || (c >= 0x38E && c <= 0x3A1) || (c >= 0x3A3 && c <= 0x3F5) || (c >= 0x3F7 && c <= 0x482) || (c >= 0x48A && c <= 0x527) || (c >= 0x531 && c <= 0x556) || (c >= 0x559 && c <= 0x55F) || (c >= 0x561 && c <= 0x587) || (c == 0x589) || (c >= 0x903 && c <= 0x939) || (c == 0x93B) || (c >= 0x93D && c <= 0x940) || (c >= 0x949 && c <= 0x94C) || (c >= 0x94E && c <= 0x950) || (c >= 0x958 && c <= 0x961) || (c >= 0x964 && c <= 0x977) || (c >= 0x979 && c <= 0x97F) || (c >= 0x982 && c <= 0x983) || (c >= 0x985 && c <= 0x98C) || (c >= 0x98F && c <= 0x990) || (c >= 0x993 && c <= 0x9A8) || (c >= 0x9AA && c <= 0x9B0) || (c == 0x9B2) || (c >= 0x9B6 && c <= 0x9B9) || (c >= 0x9BD && c <= 0x9C0) || (c >= 0x9C7 && c <= 0x9C8) || (c >= 0x9CB && c <= 0x9CC) || (c == 0x9CE) || (c == 0x9D7) || (c >= 0x9DC && c <= 0x9DD) || (c >= 0x9DF && c <= 0x9E1) || (c >= 0x9E6 && c <= 0x9F1) || (c >= 0x9F4 && c <= 0x9FA) || (c == 0xA03) || (c >= 0xA05 && c <= 0xA0A) || (c >= 0xA0F && c <= 0xA10) || (c >= 0xA13 && c <= 0xA28) || (c >= 0xA2A && c <= 0xA30) || (c >= 0xA32 && c <= 0xA33) || (c >= 0xA35 && c <= 0xA36) || (c >= 0xA38 && c <= 0xA39) || (c >= 0xA3E && c <= 0xA40) || (c >= 0xA59 && c <= 0xA5C) || (c == 0xA5E) || (c >= 0xA66 && c <= 0xA6F) || (c >= 0xA72 && c <= 0xA74) || (c == 0xA83) || (c >= 0xA85 && c <= 0xA8D) || (c >= 0xA8F && c <= 0xA91) || (c >= 0xA93 && c <= 0xAA8) || (c >= 0xAAA && c <= 0xAB0) || (c >= 0xAB2 && c <= 0xAB3) || (c >= 0xAB5 && c <= 0xAB9) || (c >= 0xABD && c <= 0xAC0) || (c == 0xAC9) || (c >= 0xACB && c <= 0xACC) || (c == 0xAD0) || (c >= 0xAE0 && c <= 0xAE1) || (c >= 0xAE6 && c <= 0xAF0) || (c >= 0xB02 && c <= 0xB03) || (c >= 0xB05 && c <= 0xB0C) || (c >= 0xB0F && c <= 0xB10) || (c >= 0xB13 && c <= 0xB28) || (c >= 0xB2A && c <= 0xB30) || (c >= 0xB32 && c <= 0xB33) || (c >= 0xB35 && c <= 0xB39) || (c >= 0xB3D && c <= 0xB3E) || (c == 0xB40) || (c >= 0xB47 && c <= 0xB48) || (c >= 0xB4B && c <= 0xB4C) || (c == 0xB57) || (c >= 0xB5C && c <= 0xB5D) || (c >= 0xB5F && c <= 0xB61) || (c >= 0xB66 && c <= 0xB77) || (c == 0xB83) || (c >= 0xB85 && c <= 0xB8A) || (c >= 0xB8E && c <= 0xB90) || (c >= 0xB92 && c <= 0xB95) || (c >= 0xB99 && c <= 0xB9A) || (c == 0xB9C) || (c >= 0xB9E && c <= 0xB9F) || (c >= 0xBA3 && c <= 0xBA4) || (c >= 0xBA8 && c <= 0xBAA) || (c >= 0xBAE && c <= 0xBB9) || (c >= 0xBBE && c <= 0xBBF) || (c >= 0xBC1 && c <= 0xBC2) || (c >= 0xBC6 && c <= 0xBC8) || (c >= 0xBCA && c <= 0xBCC) || (c == 0xBD0) || (c == 0xBD7) || (c >= 0xBE6 && c <= 0xBF2) || (c >= 0xC01 && c <= 0xC03) || (c >= 0xC05 && c <= 0xC0C) || (c >= 0xC0E && c <= 0xC10) || (c >= 0xC12 && c <= 0xC28) || (c >= 0xC2A && c <= 0xC33) || (c >= 0xC35 && c <= 0xC39) || (c == 0xC3D) || (c >= 0xC41 && c <= 0xC44) || (c >= 0xC58 && c <= 0xC59) || (c >= 0xC60 && c <= 0xC61) || (c >= 0xC66 && c <= 0xC6F) || (c == 0xC7F) || (c >= 0xC82 && c <= 0xC83) || (c >= 0xC85 && c <= 0xC8C) || (c >= 0xC8E && c <= 0xC90) || (c >= 0xC92 && c <= 0xCA8) || (c >= 0xCAA && c <= 0xCB3) || (c >= 0xCB5 && c <= 0xCB9) || (c >= 0xCBD && c <= 0xCC4) || (c >= 0xCC6 && c <= 0xCC8) || (c >= 0xCCA && c <= 0xCCB) || (c >= 0xCD5 && c <= 0xCD6) || (c == 0xCDE) || (c >= 0xCE0 && c <= 0xCE1) || (c >= 0xCE6 && c <= 0xCEF) || (c >= 0xCF1 && c <= 0xCF2) || (c >= 0xD02 && c <= 0xD03) || (c >= 0xD05 && c <= 0xD0C) || (c >= 0xD0E && c <= 0xD10) || (c >= 0xD12 && c <= 0xD3A) || (c >= 0xD3D && c <= 0xD40) || (c >= 0xD46 && c <= 0xD48) || (c >= 0xD4A && c <= 0xD4C) || (c == 0xD4E) || (c == 0xD57) || (c >= 0xD60 && c <= 0xD61) || (c >= 0xD66 && c <= 0xD75) || (c >= 0xD79 && c <= 0xD7F) || (c >= 0xD82 && c <= 0xD83) || (c >= 0xD85 && c <= 0xD96) || (c >= 0xD9A && c <= 0xDB1) || (c >= 0xDB3 && c <= 0xDBB) || (c == 0xDBD) || (c >= 0xDC0 && c <= 0xDC6) || (c >= 0xDCF && c <= 0xDD1) || (c >= 0xDD8 && c <= 0xDDF) || (c >= 0xDF2 && c <= 0xDF4) || (c >= 0xE01 && c <= 0xE30) || (c >= 0xE32 && c <= 0xE33) || (c >= 0xE40 && c <= 0xE46) || (c >= 0xE4F && c <= 0xE5B) || (c >= 0xE81 && c <= 0xE82) || (c == 0xE84) || (c >= 0xE87 && c <= 0xE88) || (c == 0xE8A) || (c == 0xE8D) || (c >= 0xE94 && c <= 0xE97) || (c >= 0xE99 && c <= 0xE9F) || (c >= 0xEA1 && c <= 0xEA3) || (c == 0xEA5) || (c == 0xEA7) || (c >= 0xEAA && c <= 0xEAB) || (c >= 0xEAD && c <= 0xEB0) || (c >= 0xEB2 && c <= 0xEB3) || (c == 0xEBD) || (c >= 0xEC0 && c <= 0xEC4) || (c == 0xEC6) || (c >= 0xED0 && c <= 0xED9) || (c >= 0xEDC && c <= 0xEDF) || (c >= 0xF00 && c <= 0xF17) || (c >= 0xF1A && c <= 0xF34) || (c == 0xF36) || (c == 0xF38) || (c >= 0xF3E && c <= 0xF47) || (c >= 0xF49 && c <= 0xF6C) || (c == 0xF7F) || (c == 0xF85) || (c >= 0xF88 && c <= 0xF8C) || (c >= 0xFBE && c <= 0xFC5) || (c >= 0xFC7 && c <= 0xFCC) || (c >= 0xFCE && c <= 0xFDA) || (c >= 0x1000 && c <= 0x102C) || (c == 0x1031) || (c == 0x1038) || (c >= 0x103B && c <= 0x103C) || (c >= 0x103F && c <= 0x1057) || (c >= 0x105A && c <= 0x105D) || (c >= 0x1061 && c <= 0x1070) || (c >= 0x1075 && c <= 0x1081) || (c >= 0x1083 && c <= 0x1084) || (c >= 0x1087 && c <= 0x108C) || (c >= 0x108E && c <= 0x109C) || (c >= 0x109E && c <= 0x10C5) || (c == 0x10C7) || (c == 0x10CD) || (c >= 0x10D0 && c <= 0x1248) || (c >= 0x124A && c <= 0x124D) || (c >= 0x1250 && c <= 0x1256) || (c == 0x1258) || (c >= 0x125A && c <= 0x125D) || (c >= 0x1260 && c <= 0x1288) || (c >= 0x128A && c <= 0x128D) || (c >= 0x1290 && c <= 0x12B0) || (c >= 0x12B2 && c <= 0x12B5) || (c >= 0x12B8 && c <= 0x12BE) || (c == 0x12C0) || (c >= 0x12C2 && c <= 0x12C5) || (c >= 0x12C8 && c <= 0x12D6) || (c >= 0x12D8 && c <= 0x1310) || (c >= 0x1312 && c <= 0x1315) || (c >= 0x1318 && c <= 0x135A) || (c >= 0x1360 && c <= 0x137C) || (c >= 0x1380 && c <= 0x138F) || (c >= 0x13A0 && c <= 0x13F4) || (c >= 0x1401 && c <= 0x167F) || (c >= 0x1681 && c <= 0x169A) || (c >= 0x16A0 && c <= 0x16F0) || (c >= 0x1700 && c <= 0x170C) || (c >= 0x170E && c <= 0x1711) || (c >= 0x1720 && c <= 0x1731) || (c >= 0x1735 && c <= 0x1736) || (c >= 0x1740 && c <= 0x1751) || (c >= 0x1760 && c <= 0x176C) || (c >= 0x176E && c <= 0x1770) || (c >= 0x1780 && c <= 0x17B3) || (c == 0x17B6) || (c >= 0x17BE && c <= 0x17C5) || (c >= 0x17C7 && c <= 0x17C8) || (c >= 0x17D4 && c <= 0x17DA) || (c == 0x17DC) || (c >= 0x17E0 && c <= 0x17E9) || (c >= 0x1810 && c <= 0x1819) || (c >= 0x1820 && c <= 0x1877) || (c >= 0x1880 && c <= 0x18A8) || (c == 0x18AA) || (c >= 0x18B0 && c <= 0x18F5) || (c >= 0x1900 && c <= 0x191C) || (c >= 0x1923 && c <= 0x1926) || (c >= 0x1929 && c <= 0x192B) || (c >= 0x1930 && c <= 0x1931) || (c >= 0x1933 && c <= 0x1938) || (c >= 0x1946 && c <= 0x196D) || (c >= 0x1970 && c <= 0x1974) || (c >= 0x1980 && c <= 0x19AB) || (c >= 0x19B0 && c <= 0x19C9) || (c >= 0x19D0 && c <= 0x19DA) || (c >= 0x1A00 && c <= 0x1A16) || (c >= 0x1A19 && c <= 0x1A1B) || (c >= 0x1A1E && c <= 0x1A55) || (c == 0x1A57) || (c == 0x1A61) || (c >= 0x1A63 && c <= 0x1A64) || (c >= 0x1A6D && c <= 0x1A72) || (c >= 0x1A80 && c <= 0x1A89) || (c >= 0x1A90 && c <= 0x1A99) || (c >= 0x1AA0 && c <= 0x1AAD) || (c >= 0x1B04 && c <= 0x1B33) || (c == 0x1B35) || (c == 0x1B3B) || (c >= 0x1B3D && c <= 0x1B41) || (c >= 0x1B43 && c <= 0x1B4B) || (c >= 0x1B50 && c <= 0x1B6A) || (c >= 0x1B74 && c <= 0x1B7C) || (c >= 0x1B82 && c <= 0x1BA1) || (c >= 0x1BA6 && c <= 0x1BA7) || (c == 0x1BAA) || (c >= 0x1BAC && c <= 0x1BE5) || (c == 0x1BE7) || (c >= 0x1BEA && c <= 0x1BEC) || (c == 0x1BEE) || (c >= 0x1BF2 && c <= 0x1BF3) || (c >= 0x1BFC && c <= 0x1C2B) || (c >= 0x1C34 && c <= 0x1C35) || (c >= 0x1C3B && c <= 0x1C49) || (c >= 0x1C4D && c <= 0x1C7F) || (c >= 0x1CC0 && c <= 0x1CC7) || (c == 0x1CD3) || (c == 0x1CE1) || (c >= 0x1CE9 && c <= 0x1CEC) || (c >= 0x1CEE && c <= 0x1CF3) || (c >= 0x1CF5 && c <= 0x1CF6) || (c >= 0x1D00 && c <= 0x1DBF) || (c >= 0x1E00 && c <= 0x1F15) || (c >= 0x1F18 && c <= 0x1F1D) || (c >= 0x1F20 && c <= 0x1F45) || (c >= 0x1F48 && c <= 0x1F4D) || (c >= 0x1F50 && c <= 0x1F57) || (c == 0x1F59) || (c == 0x1F5B) || (c == 0x1F5D) || (c >= 0x1F5F && c <= 0x1F7D) || (c >= 0x1F80 && c <= 0x1FB4) || (c >= 0x1FB6 && c <= 0x1FBC) || (c == 0x1FBE) || (c >= 0x1FC2 && c <= 0x1FC4) || (c >= 0x1FC6 && c <= 0x1FCC) || (c >= 0x1FD0 && c <= 0x1FD3) || (c >= 0x1FD6 && c <= 0x1FDB) || (c >= 0x1FE0 && c <= 0x1FEC) || (c >= 0x1FF2 && c <= 0x1FF4) || (c >= 0x1FF6 && c <= 0x1FFC) || (c == 0x200E) || (c == 0x2071) || (c == 0x207F) || (c >= 0x2090 && c <= 0x209C) || (c == 0x2102) || (c == 0x2107) || (c >= 0x210A && c <= 0x2113) || (c == 0x2115) || (c >= 0x2119 && c <= 0x211D) || (c == 0x2124) || (c == 0x2126) || (c == 0x2128) || (c >= 0x212A && c <= 0x212D) || (c >= 0x212F && c <= 0x2139) || (c >= 0x213C && c <= 0x213F) || (c >= 0x2145 && c <= 0x2149) || (c >= 0x214E && c <= 0x214F) || (c >= 0x2160 && c <= 0x2188) || (c >= 0x2336 && c <= 0x237A) || (c == 0x2395) || (c >= 0x249C && c <= 0x24E9) || (c == 0x26AC) || (c >= 0x2800 && c <= 0x28FF) || (c >= 0x2C00 && c <= 0x2C2E) || (c >= 0x2C30 && c <= 0x2C5E) || (c >= 0x2C60 && c <= 0x2CE4) || (c >= 0x2CEB && c <= 0x2CEE) || (c >= 0x2CF2 && c <= 0x2CF3) || (c >= 0x2D00 && c <= 0x2D25) || (c == 0x2D27) || (c == 0x2D2D) || (c >= 0x2D30 && c <= 0x2D67) || (c >= 0x2D6F && c <= 0x2D70) || (c >= 0x2D80 && c <= 0x2D96) || (c >= 0x2DA0 && c <= 0x2DA6) || (c >= 0x2DA8 && c <= 0x2DAE) || (c >= 0x2DB0 && c <= 0x2DB6) || (c >= 0x2DB8 && c <= 0x2DBE) || (c >= 0x2DC0 && c <= 0x2DC6) || (c >= 0x2DC8 && c <= 0x2DCE) || (c >= 0x2DD0 && c <= 0x2DD6) || (c >= 0x2DD8 && c <= 0x2DDE) || (c >= 0x3005 && c <= 0x3007) || (c >= 0x3021 && c <= 0x3029) || (c >= 0x302E && c <= 0x302F) || (c >= 0x3031 && c <= 0x3035) || (c >= 0x3038 && c <= 0x303C) || (c >= 0x3041 && c <= 0x3096) || (c >= 0x309D && c <= 0x309F) || (c >= 0x30A1 && c <= 0x30FA) || (c >= 0x30FC && c <= 0x30FF) || (c >= 0x3105 && c <= 0x312D) || (c >= 0x3131 && c <= 0x318E) || (c >= 0x3190 && c <= 0x31BA) || (c >= 0x31F0 && c <= 0x321C) || (c >= 0x3220 && c <= 0x324F) || (c >= 0x3260 && c <= 0x327B) || (c >= 0x327F && c <= 0x32B0) || (c >= 0x32C0 && c <= 0x32CB) || (c >= 0x32D0 && c <= 0x32FE) || (c >= 0x3300 && c <= 0x3376) || (c >= 0x337B && c <= 0x33DD) || (c >= 0x33E0 && c <= 0x33FE) || (c == 0x3400) || (c == 0x4DB5) || (c == 0x4E00) || (c == 0x9FCC) || (c >= 0xA000 && c <= 0xA48C) || (c >= 0xA4D0 && c <= 0xA60C) || (c >= 0xA610 && c <= 0xA62B) || (c >= 0xA640 && c <= 0xA66E) || (c >= 0xA680 && c <= 0xA697) || (c >= 0xA6A0 && c <= 0xA6EF) || (c >= 0xA6F2 && c <= 0xA6F7) || (c >= 0xA722 && c <= 0xA787) || (c >= 0xA789 && c <= 0xA78E) || (c >= 0xA790 && c <= 0xA793) || (c >= 0xA7A0 && c <= 0xA7AA) || (c >= 0xA7F8 && c <= 0xA801) || (c >= 0xA803 && c <= 0xA805) || (c >= 0xA807 && c <= 0xA80A) || (c >= 0xA80C && c <= 0xA824) || (c == 0xA827) || (c >= 0xA830 && c <= 0xA837) || (c >= 0xA840 && c <= 0xA873) || (c >= 0xA880 && c <= 0xA8C3) || (c >= 0xA8CE && c <= 0xA8D9) || (c >= 0xA8F2 && c <= 0xA8FB) || (c >= 0xA900 && c <= 0xA925) || (c >= 0xA92E && c <= 0xA946) || (c >= 0xA952 && c <= 0xA953) || (c >= 0xA95F && c <= 0xA97C) || (c >= 0xA983 && c <= 0xA9B2) || (c >= 0xA9B4 && c <= 0xA9B5) || (c >= 0xA9BA && c <= 0xA9BB) || (c >= 0xA9BD && c <= 0xA9CD) || (c >= 0xA9CF && c <= 0xA9D9) || (c >= 0xA9DE && c <= 0xA9DF) || (c >= 0xAA00 && c <= 0xAA28) || (c >= 0xAA2F && c <= 0xAA30) || (c >= 0xAA33 && c <= 0xAA34) || (c >= 0xAA40 && c <= 0xAA42) || (c >= 0xAA44 && c <= 0xAA4B) || (c == 0xAA4D) || (c >= 0xAA50 && c <= 0xAA59) || (c >= 0xAA5C && c <= 0xAA7B) || (c >= 0xAA80 && c <= 0xAAAF) || (c == 0xAAB1) || (c >= 0xAAB5 && c <= 0xAAB6) || (c >= 0xAAB9 && c <= 0xAABD) || (c == 0xAAC0) || (c == 0xAAC2) || (c >= 0xAADB && c <= 0xAAEB) || (c >= 0xAAEE && c <= 0xAAF5) || (c >= 0xAB01 && c <= 0xAB06) || (c >= 0xAB09 && c <= 0xAB0E) || (c >= 0xAB11 && c <= 0xAB16) || (c >= 0xAB20 && c <= 0xAB26) || (c >= 0xAB28 && c <= 0xAB2E) || (c >= 0xABC0 && c <= 0xABE4) || (c >= 0xABE6 && c <= 0xABE7) || (c >= 0xABE9 && c <= 0xABEC) || (c >= 0xABF0 && c <= 0xABF9) || (c == 0xAC00) || (c == 0xD7A3) || (c >= 0xD7B0 && c <= 0xD7C6) || (c >= 0xD7CB && c <= 0xD7FB) || (c == 0xD800) || (c >= 0xDB7F && c <= 0xDB80) || (c >= 0xDBFF && c <= 0xDC00) || (c >= 0xDFFF && c <= 0xE000) || (c >= 0xF8FF && c <= 0xFA6D) || (c >= 0xFA70 && c <= 0xFAD9) || (c >= 0xFB00 && c <= 0xFB06) || (c >= 0xFB13 && c <= 0xFB17) || (c >= 0xFF21 && c <= 0xFF3A) || (c >= 0xFF41 && c <= 0xFF5A) || (c >= 0xFF66 && c <= 0xFFBE) || (c >= 0xFFC2 && c <= 0xFFC7) || (c >= 0xFFCA && c <= 0xFFCF) || (c >= 0xFFD2 && c <= 0xFFD7) || (c >= 0xFFDA && c <= 0xFFDC) || (c >= 0x10000 && c <= 0x1000B) || (c >= 0x1000D && c <= 0x10026) || (c >= 0x10028 && c <= 0x1003A) || (c >= 0x1003C && c <= 0x1003D) || (c >= 0x1003F && c <= 0x1004D) || (c >= 0x10050 && c <= 0x1005D) || (c >= 0x10080 && c <= 0x100FA) || (c == 0x10100) || (c == 0x10102) || (c >= 0x10107 && c <= 0x10133) || (c >= 0x10137 && c <= 0x1013F) || (c >= 0x101D0 && c <= 0x101FC) || (c >= 0x10280 && c <= 0x1029C) || (c >= 0x102A0 && c <= 0x102D0) || (c >= 0x10300 && c <= 0x1031E) || (c >= 0x10320 && c <= 0x10323) || (c >= 0x10330 && c <= 0x1034A) || (c >= 0x10380 && c <= 0x1039D) || (c >= 0x1039F && c <= 0x103C3) || (c >= 0x103C8 && c <= 0x103D5) || (c >= 0x10400 && c <= 0x1049D) || (c >= 0x104A0 && c <= 0x104A9) || (c == 0x11000) || (c >= 0x11002 && c <= 0x11037) || (c >= 0x11047 && c <= 0x1104D) || (c >= 0x11066 && c <= 0x1106F) || (c >= 0x11082 && c <= 0x110B2) || (c >= 0x110B7 && c <= 0x110B8) || (c >= 0x110BB && c <= 0x110C1) || (c >= 0x110D0 && c <= 0x110E8) || (c >= 0x110F0 && c <= 0x110F9) || (c >= 0x11103 && c <= 0x11126) || (c == 0x1112C) || (c >= 0x11136 && c <= 0x11143) || (c >= 0x11182 && c <= 0x111B5) || (c >= 0x111BF && c <= 0x111C8) || (c >= 0x111D0 && c <= 0x111D9) || (c >= 0x11680 && c <= 0x116AA) || (c == 0x116AC) || (c >= 0x116AE && c <= 0x116AF) || (c == 0x116B6) || (c >= 0x116C0 && c <= 0x116C9) || (c >= 0x12000 && c <= 0x1236E) || (c >= 0x12400 && c <= 0x12462) || (c >= 0x12470 && c <= 0x12473) || (c >= 0x13000 && c <= 0x1342E) || (c >= 0x16800 && c <= 0x16A38) || (c >= 0x16F00 && c <= 0x16F44) || (c >= 0x16F50 && c <= 0x16F7E) || (c >= 0x16F93 && c <= 0x16F9F) || (c >= 0x1B000 && c <= 0x1B001) || (c >= 0x1D000 && c <= 0x1D0F5) || (c >= 0x1D100 && c <= 0x1D126) || (c >= 0x1D129 && c <= 0x1D166) || (c >= 0x1D16A && c <= 0x1D172) || (c >= 0x1D183 && c <= 0x1D184) || (c >= 0x1D18C && c <= 0x1D1A9) || (c >= 0x1D1AE && c <= 0x1D1DD) || (c >= 0x1D360 && c <= 0x1D371) || (c >= 0x1D400 && c <= 0x1D454) || (c >= 0x1D456 && c <= 0x1D49C) || (c >= 0x1D49E && c <= 0x1D49F) || (c == 0x1D4A2) || (c >= 0x1D4A5 && c <= 0x1D4A6) || (c >= 0x1D4A9 && c <= 0x1D4AC) || (c >= 0x1D4AE && c <= 0x1D4B9) || (c == 0x1D4BB) || (c >= 0x1D4BD && c <= 0x1D4C3) || (c >= 0x1D4C5 && c <= 0x1D505) || (c >= 0x1D507 && c <= 0x1D50A) || (c >= 0x1D50D && c <= 0x1D514) || (c >= 0x1D516 && c <= 0x1D51C) || (c >= 0x1D51E && c <= 0x1D539) || (c >= 0x1D53B && c <= 0x1D53E) || (c >= 0x1D540 && c <= 0x1D544) || (c == 0x1D546) || (c >= 0x1D54A && c <= 0x1D550) || (c >= 0x1D552 && c <= 0x1D6A5) || (c >= 0x1D6A8 && c <= 0x1D6DA) || (c >= 0x1D6DC && c <= 0x1D714) || (c >= 0x1D716 && c <= 0x1D74E) || (c >= 0x1D750 && c <= 0x1D788) || (c >= 0x1D78A && c <= 0x1D7C2) || (c >= 0x1D7C4 && c <= 0x1D7CB) || (c >= 0x1F110 && c <= 0x1F12E) || (c >= 0x1F130 && c <= 0x1F169) || (c >= 0x1F170 && c <= 0x1F19A) || (c >= 0x1F1E6 && c <= 0x1F202) || (c >= 0x1F210 && c <= 0x1F23A) || (c >= 0x1F240 && c <= 0x1F248) || (c >= 0x1F250 && c <= 0x1F251) || (c == 0x20000) || (c == 0x2A6D6) || (c == 0x2A700) || (c == 0x2B734) || (c == 0x2B740) || (c == 0x2B81D) || (c >= 0x2F800 && c <= 0x2FA1D) || (c == 0xF0000) || (c == 0xFFFFD) || (c == 0x100000) || (c == 0x10FFFD);
}   

-(int)getBaseDirection {
  // Decode string into UTF32.
  NSData *utf32data = [self dataUsingEncoding:NSUTF32StringEncoding];
  // NSUTF32StringEncoding has the platform's byte-order, which should
  // be the same as UTF32Char's.
  UTF32Char *utf32chars = (UTF32Char *)[utf32data bytes];

  for (NSUInteger i = 0; i < self.length; i++) {
    // UTF32 is a fixed-length encoding, so utf32chars[i] will
    // always give us the i'th character.
    if (isCodePointStrongRTL(utf32chars[i]))
      return -1;
    if (isCodePointStrongLTR(utf32chars[i]))
      return 1;
  }
  return 0;
}

@end

Longer answer: Generating the functions isCodePointStrong{RTL,LTR}

Create a script hex_numbers_to_dec_ranges_py,

import sys
from itertools import groupby, count

numberlist = []
for line in sys.stdin:
    numberlist.append(int(line, 16));

def as_range(iterable): # not sure how to do this part elegantly
    l = list(iterable)
    if len(l) > 1:
        return '(c >= 0x{0:X} && c <= 0x{1:X})'.format(l[0], l[-1])
    else:
        return '(c == 0x{0:X})'.format(l[0])

print ' || '.join(as_range(g) for _, g in groupby(numberlist, lambda n, c=count(): n-next(c)))

(code shamelessly stolen from this excellent answer at StackExhange's Code Review).

Run from a terminal:

curl http://www.unicode.org/Public/UNIDATA/UnicodeData.txt > /tmp/UnicodeData.txt
cat /tmp/UnicodeData.txt | awk -F';' '$5 == "R" || $5 == "AL"' | cut -d';' -f1 | python hex_numbers_to_dec_ranges.py > rtl.m
cat /tmp/UnicodeData.txt | awk -F';' '$5 == "L"' | cut -d';' -f1 | python hex_numbers_to_dec_ranges.py > ltr.m
rm /tmp/UnicodeData.txt

EDIT: As @masmor correctly noted, the for loop in getBaseDirection scans characters, and not bytes. Therefore, it should terminate after "character" number of iterations, and not "bytes" number of iterations. In other words, self.length times and not utf32data.length times. The code is now corrected.

 
精彩推荐
图片推荐