4 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
8 proc preamble_trie {ofilehandle ifiles ofile} {
11 set totype {unsigned }
13 puts $f "/** \\file $ofile"
14 puts $f " \\brief Character conversion, generated from [
lindex $ifiles 0]"
16 puts $f " Generated automatically by charconv.tcl"
18 puts $f "\#include <string.h>"
20 puts $f "\#if HAVE_CONFIG_H"
21 puts $f "\#include <config.h>"
25 struct yaz_iconv_trie_flat {
27 unsigned combining : 1;
30 struct yaz_iconv_trie_dir {
32 unsigned combining : 1;
36 struct yaz_iconv_trie {
37 struct yaz_iconv_trie_flat *flat;
38 struct yaz_iconv_trie_dir *dir;
42 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
43 size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, unsigned int boffset)
45 struct yaz_iconv_trie *t = ptrs[ptr-1];
50 size_t ch = (inp[0] & mask) + boffset;
54 code = lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining, mask, boffset);
64 *combining = t->dir[ch].combining;
71 struct yaz_iconv_trie_flat *flat = t->flat;
74 size_t len = strlen(flat->from);
75 if (len <= inbytesleft)
78 for (i = 0; i < len; i++)
80 if (((unsigned char *) flat->from)[i] != (inp[i] & mask) + boffset)
86 *combining = flat->combining;
101 foreach x [
array names trie] {
112 proc ins_trie {from to combining codename} {
114 if {![
info exists trie(no)]} {
119 if {$trie(max) < $to} {
123 ins_trie_r [
split $from] $to $combining $codename 0
126 proc split_trie {this} {
128 set trie($this,type) d
129 foreach e $trie($this,content) {
130 set from [
lindex $e 0]
132 set combining [
lindex $e 2]
133 set codename [
lindex $e 3]
135 set ch [
lindex $from 0]
136 set rest [
lrange $from 1 end]
138 if {[
llength $rest]} {
139 if {![
info exist trie($this,ptr,$ch)]} {
140 set trie($this,ptr,$ch) $trie(no)
143 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
145 set trie($this,to,$ch) $to
146 set trie($this,combining,$ch) $combining
147 set trie($this,codename,$ch) $codename
150 set trie($this,content) missing
153 proc ins_trie_r {from to combining codename this} {
156 if {![
info exist trie($this,type)]} {
157 set trie($this,type) f
159 if {$trie($this,type) == "f"} {
161 if {[
info exists trie($this,content)]} {
162 foreach e $trie($this,content) {
163 set efrom [
lindex $e 0]
164 if { $efrom == $from } {
170 lappend trie($this,content) [list $from $to $combining $codename]
174 if {[
llength $trie($this,content)] > $trie(split)} {
176 return [
ins_trie_r $from $to $combining $codename $this]
179 set ch [
lindex $from 0]
180 set rest [
lrange $from 1 end]
182 if {[
llength $rest]} {
183 if {![
info exist trie($this,ptr,$ch)]} {
184 set trie($this,ptr,$ch) $trie(no)
187 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
189 if {![
info exist trie($this,to,$ch)]} {
190 set trie($this,to,$ch) $to
191 set trie($this,combining,$ch) $combining
192 set trie($this,codename,$ch) $codename
198 proc dump_trie {ofilehandle} {
203 puts $f "/* TRIE: size $trie(size) */"
206 while { [
incr this -1] >= 0 } {
207 puts $f "/* PAGE $this */"
208 if {$trie($this,type) == "f"} {
209 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
210 foreach m $trie($this,content) {
211 puts -nonewline $f " \{\""
212 foreach d [
lindex $m 0] {
213 puts -nonewline $f "\\x$d"
215 puts -nonewline $f "\", [
lindex $m 2], 0x[
lindex $m 1]"
217 puts $f "\}, /* $v */"
219 puts $f " \{\"\", 0, 0\}"
221 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
222 puts $f " $trie(prefix)page${this}_flat, 0"
225 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
226 for {
set i 0} {$i < 256} {
incr i} {
227 puts -nonewline $f " \{"
228 set ch [
format %02X $i]
230 if {[
info exist trie($this,ptr,$ch)]} {
231 puts -nonewline $f "[
expr $trie($this,ptr,$ch)+1], "
234 puts -nonewline $f "0, "
236 if {[
info exist trie($this,combining,$ch)]} {
237 puts -nonewline $f "$trie($this,combining,$ch), "
239 puts -nonewline $f "0, "
241 if {[
info exist trie($this,to,$ch)]} {
242 puts -nonewline $f "0x$trie($this,to,$ch)\}"
245 puts -nonewline $f "0\}"
247 if {[
info exist trie($this,codename,$ch)]} {
248 set v $trie($this,codename,$ch)
249 puts -nonewline $f " /* $v */"
258 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
259 puts $f " 0, $trie(prefix)page${this}_dir"
264 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
265 for {
set this 0} {$this < $trie(no)} {
incr this} {
266 puts $f " &$trie(prefix)page$this,"
271 puts $f "unsigned long yaz_$trie(prefix)_conv
272 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, unsigned int boffset)
276 code = lookup($trie(prefix)ptrs, 1, inp, inbytesleft, no_read, combining, mask, boffset);
286 proc readfile {fname ofilehandle prefix omits reverse} {
295 set f [open $fname r]
302 set cnt [
gets $f line]
306 if {[regexp {</characterSet>} $line s]} {
308 }
elseif {[regexp {<characterSet .*ISOcode="([0-9A-Fa-f]+)"} $line s tablenumber]} {
310 set trie(prefix) "${prefix}_$tablenumber"
312 }
elseif {[regexp {</code>} $line s]} {
313 if {[
string length $ucs]} {
315 for {
set i 0} {$i < [
string length $utf]} {
incr i 2} {
316 lappend hex [
string range $utf $i [
expr $i+1]]
319 ins_trie $hex $marc $combining $codename
323 for {
set i 0} {$i < [
string length $marc]} {
incr i 2} {
324 lappend hex [
string range $marc $i [
expr $i+1]]
327 ins_trie $hex $ucs $combining $codename
331 if {$reverse && [
string length $marc]} {
332 for {
set i 0} {$i < [
string length $altutf]} {
incr i 2} {
333 lappend hex [
string range $altutf $i [
expr $i+1]]
335 if {[
info exists hex]} {
336 ins_trie $hex $marc $combining $codename
345 }
elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
347 }
elseif {[regexp {<name>(.*)</name>} $line s codename]} {
349 }
elseif {[regexp {<name>(.*)} $line s codename]} {
352 set cnt [
gets $f line]
356 if {[regexp {(.*)</name>} $line s codename_ex]} {
357 set codename "${codename} ${codename_ex}"
359 }
elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
361 }
elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
363 }
elseif {[regexp {<utf-8>([0-9A-Fa-f]*)</utf-8>} $line s utf]} {
365 }
elseif {[regexp {<altutf-8>([0-9A-Fa-f]*)</altutf-8>} $line s altutf]} {
378 set l [
llength $argv]
382 set arg [
lindex $argv $i]
383 switch -glob -- $arg {
388 if {[
string length $arg]} {
389 set arg [
lindex $argv [
incr i]]
394 if {[
string length $arg]} {
395 set arg [
lindex $argv [
incr i]]
400 if {[
string length $arg]} {
401 set arg [
lindex $argv [
incr i]]
406 if {[
string length $arg]} {
407 set arg [
lindex $argv [
incr i]]
420 if {![
info exists ifiles]} {
421 puts "charconv.tcl: missing input file(s)"
425 set ofilehandle [open ${ofile}.tmp w]
428 foreach ifile $ifiles {
429 readfile $ifile $ofilehandle $prefix $omits $reverse_map
433 file rename -force ${ofile}.tmp ${ofile}