First off, let me say that I agree with what everyone else is saying. Would you consider the character é
to be a valid US English character? It gets used pretty often but its not in the normal "a-z".
That said, here's some code (VB2010). This code calls into the unmanaged function GetLocaleInfoW
and asks for a LOCALESIGNATURE
structure which contains Unicode code point ranges. This information is used to determine what ranges are needed for a given font.
The Char
structure doesn't support all of the Unicode code points so the function returns String
s instead. Look for "Surrogate pair" at the bottom of that link for more info.
This code doesn't do everything that you want, unfortunately. For example, the oft cited Finnish language doesn't have the letter W
but in Windows the character exists in the valid code-point range. I don't know a way of getting down to the nitty-gritty on that.
Option Strict On
Option Explicit On
Imports System.Runtime.InteropServices
Public Class Form1
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
''//Create our helper function
Dim UCR As New UnicodeCharRanges()
''//Get our ranges for ja-JP
Dim Ranges = UCR.GetUnicodeRanges("ja-JP")
If Ranges IsNot Nothing Then
''//Get our characters (strings actually)
Dim Chars = UCR.GetCharactersForUnicodeRanges(Ranges)
Trace.WriteLine(Chars.Count) ''//28351
''//Include surrogate pairs as letters. .Net does not have a way to determine if these should be considered letters
Chars = UCR.GetCharactersForUnicodeRanges(Ranges, True)
Trace.WriteLine(Chars.Count) ''//71615
End If
''//Get our ranges for en-US
Ranges = UCR.GetUnicodeRanges("en-US")
If Ranges IsNot Nothing Then
''//Get our characters (strings actually)
Dim Chars = UCR.GetCharactersForUnicodeRanges(Ranges)
Trace.WriteLine(Chars.Count) ''//117
End If
End Sub
End Class
Public Class UnicodeCharRanges
#Region " Unmanaged "
<DllImport("kernel32.dll", SetLastError:=True, CharSet:=CharSet.Unicode)> _
Private Shared Function GetLocaleInfoW(ByVal locale As Integer, ByVal LCType As Integer, ByRef lpLCData As LOCALESIGNATURE, ByVal cchData As Integer) As Integer
End Function
Private LOCALE_FONTSIGNATURE As Integer = &H58
<StructLayout(LayoutKind.Sequential)>
Private Structure LOCALESIGNATURE
<MarshalAs(System.Runtime.InteropServices.UnmanagedType.ByValArray, SizeConst:=4)> Dim lsUsb() As Integer
<MarshalAs(System.Runtime.InteropServices.UnmanagedType.ByValArray, SizeConst:=2)> Dim lsCsbDefault() As Integer
<MarshalAs(System.Runtime.InteropServices.UnmanagedType.ByValArray, SizeConst:=2)> Dim lsCsbSupported() As Integer
Public Sub Initialize()
ReDim lsUsb(3)
ReDim lsCsbDefault(1)
ReDim lsCsbSupported(1)
End Sub
End Structure
#End Region
#Region " Locals "
Private AllRanges As List(Of UnicodeRangeInfo)
#End Region
Private Sub LoadRanges()
''//Ranges from http://msdn.microsoft.com/en-us/library/dd374090%28VS.85%29.aspx
AllRanges = New List(Of UnicodeRangeInfo)
AllRanges.Add(New UnicodeRangeInfo(0, &H0, &H7F, "Basic Latin"))
AllRanges.Add(New UnicodeRangeInfo(1, &H80, &HFF, "Latin-1 Supplement"))
AllRanges.Add(New UnicodeRangeInfo(2, &H100, &H17F, "Latin Extended-A"))
AllRanges.Add(New UnicodeRangeInfo(3, &H180, &H24F, "Latin Extended-B"))
AllRanges.Add(New UnicodeRangeInfo(4, &H250, &H2AF, "IPA Extensions"))
AllRanges.Add(New UnicodeRangeInfo(4, &H1D00, &H1D7F, "Phonetic Extensions"))
AllRanges.Add(New UnicodeRangeInfo(4, &H1D80, &H1DBF, "Phonetic Extensions Supplement"))
AllRanges.Add(New UnicodeRangeInfo(5, &H2B0, &H2FF, "Spacing Modifier Letters"))
AllRanges.Add(New UnicodeRangeInfo(5, &HA700, &HA71F, "Modifier Tone Letters"))
AllRanges.Add(New UnicodeRangeInfo(6, &H300, &H36F, "Combining Diacritical Marks"))
AllRanges.Add(New UnicodeRangeInfo(6, &H1DC0, &H1DFF, "Combining Diacritical Marks Supplement"))
AllRanges.Add(New UnicodeRangeInfo(7, &H370, &H3FF, "Greek and Coptic"))
AllRanges.Add(New UnicodeRangeInfo(8, &H2C80, &H2CFF, "Coptic"))
AllRanges.Add(New UnicodeRangeInfo(9, &H400, &H4FF, "Cyrillic"))
AllRanges.Add(New UnicodeRangeInfo(9, &H500, &H52F, "Cyrillic Supplement"))
AllRanges.Add(New UnicodeRangeInfo(9, &H2DE0, &H2DFF, "Cyrillic Extended-A"))
AllRanges.Add(New UnicodeRangeInfo(9, &HA640, &HA69F, "Cyrillic Extended-B"))
AllRanges.Add(New UnicodeRangeInfo(10, &H530, &H58F, "Armenian"))
AllRanges.Add(New UnicodeRangeInfo(11, &H590, &H5FF, "Hebrew"))
AllRanges.Add(New UnicodeRangeInfo(12, &HA500, &HA63F, "&hVai"))
AllRanges.Add(New UnicodeRangeInfo(13, &H600, &H6FF, "Arabic"))
AllRanges.Add(New UnicodeRangeInfo(13, &H750, &H77F, "Arabic Supplement"))
AllRanges.Add(New UnicodeRangeInfo(14, &H7C0, &H7FF, "NKo"))
AllRanges.Add(New UnicodeRangeInfo(15, &H900, &H97F, "Devanagari"))
AllRanges.Add(New UnicodeRangeInfo(16, &H980, &H9FF, "Bengali"))
AllRanges.Add(New UnicodeRangeInfo(17, &HA00, &HA7F, "Gurmukhi"))
AllRanges.Add(New UnicodeRangeInfo(18, &HA80, &HAFF, "Gujarati"))
AllRanges.Add(New UnicodeRangeInfo(19, &HB00, &HB7F, "Oriya"))
AllRanges.Add(New UnicodeRangeInfo(20, &HB80, &HBFF, "Tamil"))
AllRanges.Add(New UnicodeRangeInfo(21, &HC00, &HC7F, "Telugu"))
AllRanges.Add(New UnicodeRangeInfo(22, &HC80, &HCFF, "Kannada"))
AllRanges.Add(New UnicodeRangeInfo(23, &HD00, &HD7F, "Malayalam"))
AllRanges.Add(New UnicodeRangeInfo(24, &HE00, &HE7F, "Thai"))
AllRanges.Add(New UnicodeRangeInfo(25, &HE80, &HEFF, "Lao"))
AllRanges.Add(New UnicodeRangeInfo(26, &H10A0, &H10FF, "Georgian"))
AllRanges.Add(New UnicodeRangeInfo(26, &H2D00, &H2D2F, "Georgian Supplement"))
AllRanges.Add(New UnicodeRangeInfo(27, &H1B00, &H1B7F, "Balinese"))
AllRanges.Add(New UnicodeRangeInfo(28, &H1100, &H11FF, "Hangul Jamo"))
AllRanges.Add(New UnicodeRangeInfo(29, &H1E00, &H1EFF, "Latin Extended Additional"))
AllRanges.Add(New UnicodeRangeInfo(29, &H2C60, &H2C7F, "Latin Extended-C"))
AllRanges.Add(New UnicodeRangeInfo(29, &HA720, &HA7FF, "Latin Extended-D"))
AllRanges.Add(New UnicodeRangeInfo(30, &H1F00, &H1FFF, "Greek Extended"))
AllRanges.Add(New UnicodeRangeInfo(31, &H2000, &H206F, "General Punctuation"))
AllRanges.Add(New UnicodeRangeInfo(31, &H2E00, &H2E7F, "Supplemental Punctuation"))
AllRanges.Add(New UnicodeRangeInfo(32, &H2070, &H209F, "Superscripts And Subscripts"))
AllRanges.Add(New UnicodeRangeInfo(33, &H20A0, &H20CF, "Currency Symbols"))
AllRanges.Add(New UnicodeRangeInfo(34, &H20D0, &H20FF, "Combining Diacritical Marks For Symbols"))
AllRanges.Add(New UnicodeRangeInfo(35, &H2100, &H214F, "Letterlike Symbols"))
AllRanges.Add(New UnicodeRangeInfo(36, &H2150, &H218F, "Number Forms"))
AllRanges.Add(New UnicodeRangeInfo(37, &H2190, &H21FF, "Arrows"))
AllRanges.Add(New UnicodeRangeInfo(37, &H27F0, &H27FF, "Supplemental Arrows-A"))
AllRanges.Add(New UnicodeRangeInfo(37, &H2900, &H297F, "Supplemental Arrows-B"))
AllRanges.Add(New UnicodeRangeInfo(37, &H2B00, &H2BFF, "Miscellaneous Symbols and Arrows"))
AllRanges.Add(New UnicodeRangeInfo(38, &H2200, &H22FF, "Mathematical Operators"))
AllRanges.Add(New UnicodeRangeInfo(38, &H27C0, &H27EF, "Miscellaneous Mathematical Symbols-A"))
AllRanges.Add(New UnicodeRangeInfo(38, &H2980, &H29FF, "Miscellaneous Mathematical Symbols-B"))
AllRanges.Add(New UnicodeRangeInfo(38, &H2A00, &H2AFF, "Supplemental Mathematical Operators"))
AllRanges.Add(New UnicodeRangeInfo(39, &H2300, &H23FF, "Miscellaneous Technical"))
AllRanges.Add(New UnicodeRangeInfo(40, &H2400, &H243F, "Control Pictures"))
AllRanges.Add(New UnicodeRangeInfo(41, &H2440, &H245F, "Optical Character Recognition"))
AllRanges.Add(New UnicodeRangeInfo(42, &H2460, &H24FF, "Enclosed Alphanumerics"))
AllRanges.Add(New UnicodeRangeInfo(43, &H2500, &H257F, "Box Drawing"))
AllRanges.Add(New UnicodeRangeInfo(44, &H2580, &H259F, "Block Elements"))
AllRanges.Add(New UnicodeRangeInfo(45, &H25A0, &H25FF, "Geometric Shapes"))
AllRanges.Add(New UnicodeRangeInfo(46, &H2600, &H26FF, "Miscellaneous Symbols"))
AllRanges.Add(New UnicodeRangeInfo(47, &H2700, &H27BF, "Dingbats"))
AllRanges.Add(New UnicodeRangeInfo(48, &H3000, &H303F, "CJK Symbols And Punctuation"))
AllRanges.Add(New UnicodeRangeInfo(49, &H3040, &H309F, "Hiragana"))
AllRanges.Add(New UnicodeRangeInfo(50, &H30A0, &H30FF, "Katakana"))
AllRanges.Add(New UnicodeRangeInfo(50, &H31F0, &H31FF, "Katakana Phonetic Extensions"))
AllRanges.Add(New UnicodeRangeInfo(51, &H3100, &H312F, "Bopomofo"))
AllRanges.Add(New UnicodeRangeInfo(51, &H31A0, &H31BF, "Bopomofo Extended"))
AllRanges.Add(New UnicodeRangeInfo(52, &H3130, &H318F, "Hangul Compatibility Jamo"))
AllRanges.Add(New UnicodeRangeInfo(53, &HA840, &HA87F, "Phags-pa"))
AllRanges.Add(New UnicodeRangeInfo(54, &H3200, &H32FF, "Enclosed CJK Letters And Months"))
AllRanges.Add(New UnicodeRangeInfo(55, &H3300, &H33FF, "CJK Compatibility"))
AllRanges.Add(New UnicodeRangeInfo(56, &HAC00, &HD7AF, "Hangul Syllables"))
AllRanges.Add(New UnicodeRangeInfo(57, &HD800, &HDFFF, "Non-Plane 0. Note that setting this bit implies that there is at least one supplementary code point beyond the Basic Multilingual Plane (BMP) that is supported by this font. See Surrogates and Supplementary Characters."))
AllRanges.Add(New UnicodeRangeInfo(58, &H10900, &H1091F, "Phoenician"))
AllRanges.Add(New UnicodeRangeInfo(59, &H2E80, &H2EFF, "CJK Radicals Supplement"))
AllRanges.Add(New UnicodeRangeInfo(59, &H2F00, &H2FDF, "Kangxi Radicals"))
AllRanges.Add(New UnicodeRangeInfo(59, &H2FF0, &H2FFF, "Ideographic Description Characters"))
AllRanges.Add(New UnicodeRangeInfo(59, &H3190, &H319F, "Kanbun"))
AllRanges.Add(New UnicodeRangeInfo(59, &H3400, &H4DBF, "CJK Unified Ideographs Extension A"))
AllRanges.Add(New UnicodeRangeInfo(59, &H4E00, &H9FFF, "CJK Unified Ideographs"))
AllRanges.Add(New UnicodeRangeInfo(59, &H20000, &H2A6DF, "CJK Unified Ideographs Extension B"))
AllRanges.Add(New UnicodeRangeInfo(60, &HE000, &HF8FF, "Private Use Area"))
AllRanges.Add(New UnicodeRangeInfo(61, &H31C0, &H31EF, "CJK St