forked from starwing/luagbk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_bestfit.lua
222 lines (197 loc) · 5.42 KB
/
parse_bestfit.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
-- download bestfit936.txt from:
-- http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit936.txt
local CP = arg[1] or "936"
local NAME = arg[2] or "gbk"
local INVALID = tonumber(arg[3] or 0xFFFE)
io.input("bestfit"..CP..".txt")
io.output(NAME..".h")
local info = {}
local cp_codes = {}
local uni_codes = {}
local to_cp = {}
local from_cp = {}
local linum = 0
local mb_count
local dbcs_count
local dbcs_range
local dbcs_table
local wc_count
for line in io.lines() do
linum = linum + 1
if line:match "^%s*$" then goto next end
line = line:gsub("%s*;.*$", "")
if mb_count then
local cp_code, uni_code =
line:match "0x(%x+)%s+0x(%x+)"
if cp_code then
cp_code = tonumber(cp_code, 16)
uni_code = tonumber(uni_code, 16)
from_cp[cp_code] = uni_code
cp_codes[#cp_codes + 1] = cp_code
mb_count = mb_count - 1
if mb_count == 0 then mb_count = nil end
goto next
end
end
if dbcs_table then
local cp_code, uni_code =
line:match "0x(%x+)%s+0x(%x+)"
if cp_code then
cp_code = tonumber(cp_code, 16)
uni_code = tonumber(uni_code, 16)
cp_code = cp_code + dbcs_range.current*2^8
from_cp[cp_code] = uni_code
cp_codes[#cp_codes + 1] = cp_code
dbcs_table = dbcs_table - 1
if dbcs_table == 0 then
dbcs_table = nil
if dbcs_range.current == dbcs_range.last then
dbcs_count = dbcs_count - 1
if dbcs_count == 0 then dbcs_count = nil end
dbcs_range = nil
end
end
goto next
end
end
if wc_count then
local uni_code, cp_code =
line:match "0x(%x+)%s+0x(%x+)"
if uni_code then
uni_code = tonumber(uni_code, 16)
cp_code = tonumber(cp_code, 16)
to_cp[uni_code] = cp_code
uni_codes[#uni_codes + 1] = uni_code
wc_count = wc_count - 1
if wc_count == 0 then wc_count = nil end
goto next
end
end
local cp = line:match "CODEPAGE%s+(%d+)"
if cp then
info.cp = tonumber(cp)
goto next
end
local endcodepage =
line:match "ENDCODEPAGE"
if endcodepage then
break
end
local dbcs, def_cp, def_uni =
line:match "CPINFO%s+(%d+)%s+0x(%x+)%s+0x(%x+)"
if dbcs then
info.dbcs, info.def_cp, info.def_uni =
tonumber(dbcs),
tonumber(def_cp, 16),
tonumber(def_uni, 16)
goto next
end
local mbtable =
line:match "MBTABLE%s+(%d+)"
if mbtable then
mb_count = tonumber(mbtable)
--print("mbtable: ", mb_count)
goto next
end
local dbcsrange =
line:match "DBCSRANGE%s+(%d+)"
if dbcsrange then
dbcs_count = tonumber(dbcsrange)
--print("dbcs_count: ", dbcs_count)
goto next
end
if dbcs_count and not dbcs_range then
local first, last =
line:match "0x(%x+)%s+0x(%x+)"
first = tonumber(first, 16)
last = tonumber(last, 16)
dbcs_range = {
first = first,
last = last,
current = first - 1
}
--print("dbcs_range: ", first, last)
goto next
end
if dbcs_range then
local dbcstable =
line:match "DBCSTABLE%s+(%d+)"
if dbcstable then
dbcs_table = tonumber(dbcstable)
dbcs_range.current = dbcs_range.current + 1
--print("dbcs_table: ", dbcs_table, dbcs_range.current)
goto next
end
end
local wctable =
line:match "WCTABLE%s+(%d+)"
if wctable then
wc_count = tonumber(wctable)
--print("wc_count:", wc_count)
goto next
end
error(linum..': '..line)
::next::
end
local function output_code(idx, cp)
if idx % 8 == 0 then io.write " " end
if type(cp) == 'number' then
io.write(("0x%04X, "):format(cp))
else
io.write(tostring(cp), ", ")
end
if idx % 8 == 7 then io.write "\n" end
end
local function write_tables(prefix, codes, maps, def)
def = def or INVALID
table.sort(codes)
local leaders = {}
local leader
local last_cp = 0
for _, cp in ipairs(codes) do
local cl = math.floor(cp/2^8)
if cl ~= leader then
if leader ~= nil then
for i = last_cp + 1, (leader+1)*2^8-1 do
output_code(i, def)
end
io.write "};\n\n"
end
leaders[cl] = true
leader = cl
io.write(("static const unsigned short %s_%02X[256] = {\n")
:format(prefix, leader))
last_cp = leader*2^8-1
end
for i = last_cp + 1, cp-1 do
output_code(i, def)
end
output_code(cp, assert(maps[cp]))
last_cp = cp
end
for i = last_cp + 1, (leader+1)*2^8-1 do
output_code(i, def)
end
io.write "};\n\n"
io.write(("const unsigned short *%s[256] = {\n"):format(prefix))
for i = 0, 255 do
local leader = leaders[i]
if leader then
output_code(i, ("%s_%02X"):format(prefix, i))
else
output_code(i, "NULL")
end
end
io.write "};\n\n\n"
end
io.write(([[
#ifndef ]]..NAME..[[_h
#define ]]..NAME..[[_h
#include <stddef.h>
#define DBCS_DEFAULT_CODE %#X
#define UNI_DEFAULT_CODE %#X
#define UNI_INVALID_CODE %#X
]]):format(info.def_cp, info.def_uni, INVALID))
write_tables("to_uni", cp_codes, from_cp)
write_tables("from_uni", uni_codes, to_cp, info.def_cp)
io.write("#endif /* "..NAME.."_h */\n")