YAZ 5.35.1
tokenizer.c
Go to the documentation of this file.
1/* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
9#if HAVE_CONFIG_H
10#include <config.h>
11#endif
12
13#include <assert.h>
14#include <stdio.h>
15#include <string.h>
16#include <yaz/log.h>
17#include <yaz/wrbuf.h>
18#include <yaz/tokenizer.h>
19
29
38
39void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
40{
42 t->single_tokens = xstrdup(simple);
43}
44
46{
47 yaz_tok_cfg_t t = (yaz_tok_cfg_t) xmalloc(sizeof(*t));
48 t->white_space = xstrdup(" \t\r\n");
49 t->single_tokens = xstrdup("");
50 t->quote_tokens_begin = xstrdup("\"");
51 t->quote_tokens_end = xstrdup("\"");
52 t->comment = xstrdup("#");
53 t->ref_count = 1;
54 return t;
55}
56
58{
59 t->ref_count--;
60 if (t->ref_count == 0)
61 {
66 xfree(t->comment);
67 xfree(t);
68 }
69}
70
71static int read_buf(void **vp)
72{
73 const char *cp = *(const char **) vp;
74 int ch = *cp;
75 if (ch)
76 {
77 cp++;
78 *(const char **)vp = cp;
79 }
80 return ch;
81}
82
84{
85 return yaz_tok_parse_create(t, read_buf, (void *) buf);
86}
87
89{
90 int ch = tp->unget_byte;
91 assert(tp->get_byte_func);
92 if (ch)
93 tp->unget_byte = 0;
94 else
95 ch = tp->get_byte_func(&tp->get_byte_data);
96 return ch;
97}
98
99static void unget_byte(yaz_tok_parse_t tp, int ch)
100{
101 tp->unget_byte = ch;
102}
103
106 void *vp)
107{
108 yaz_tok_parse_t tp = (yaz_tok_parse_t) xmalloc(sizeof(*tp));
109
110 tp->cfg = t;
111 tp->cfg->ref_count++;
112 tp->get_byte_func = h;
113 tp->get_byte_data = vp;
114
115 tp->look = YAZ_TOK_ERROR;
116 tp->unget_byte = 0;
117
118 tp->wr_string = wrbuf_alloc();
119 return tp;
120}
121
122
129
131{
132 yaz_tok_cfg_t t = tp->cfg;
133 const char *cp;
134 int ch = get_byte(tp);
135
136 /* skip white space */
137 while (ch && strchr(t->white_space, ch))
138 ch = get_byte(tp);
139 if (!ch)
140 ch = YAZ_TOK_EOF;
141 else if (strchr(t->comment, ch))
142 ch = YAZ_TOK_EOF;
143 else if ((cp = strchr(t->single_tokens, ch)))
144 ch = *cp; /* single token match */
145 else if ((cp = strchr(t->quote_tokens_begin, ch)))
146 { /* quoted string */
147 int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
148 ch = get_byte(tp);
150 while (ch && ch != end_ch)
151 wrbuf_putc(tp->wr_string, ch);
152 if (!ch)
153 ch = YAZ_TOK_ERROR;
154 else
155 ch = YAZ_TOK_QSTRING;
156 }
157 else
158 { /* unquoted string */
160 while (ch && !strchr(t->white_space, ch)
161 && !strchr(t->single_tokens, ch)
162 && !strchr(t->comment, ch))
163 {
164 wrbuf_putc(tp->wr_string, ch);
165 ch = get_byte(tp);
166 }
167 unget_byte(tp, ch);
168 ch = YAZ_TOK_STRING;
169 }
170 tp->look = ch;
171 return ch;
172}
173
175{
176 return wrbuf_cstr(tp->wr_string);
177}
178
179/*
180 * Local variables:
181 * c-basic-offset: 4
182 * c-file-style: "Stroustrup"
183 * indent-tabs-mode: nil
184 * End:
185 * vim: shiftwidth=4 tabstop=8 expandtab
186 */
187
Logging utility.
string buffer
Definition wrbuf.h:43
char * quote_tokens_end
Definition tokenizer.c:36
char * white_space
Definition tokenizer.c:33
char * quote_tokens_begin
Definition tokenizer.c:35
char * single_tokens
Definition tokenizer.c:34
int ref_count
Definition tokenizer.c:31
char * comment
Definition tokenizer.c:32
yaz_tok_get_byte_t get_byte_func
Definition tokenizer.c:26
WRBUF wr_string
Definition tokenizer.c:22
void * get_byte_data
Definition tokenizer.c:27
yaz_tok_cfg_t cfg
Definition tokenizer.c:25
yaz_tok_parse_t yaz_tok_parse_buf(yaz_tok_cfg_t t, const char *buf)
Definition tokenizer.c:83
yaz_tok_cfg_t yaz_tok_cfg_create(void)
Definition tokenizer.c:45
static void unget_byte(yaz_tok_parse_t tp, int ch)
Definition tokenizer.c:99
static int get_byte(yaz_tok_parse_t tp)
Definition tokenizer.c:88
static int read_buf(void **vp)
Definition tokenizer.c:71
void yaz_tok_parse_destroy(yaz_tok_parse_t tp)
Definition tokenizer.c:123
void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
Definition tokenizer.c:39
void yaz_tok_cfg_destroy(yaz_tok_cfg_t t)
Definition tokenizer.c:57
const char * yaz_tok_parse_string(yaz_tok_parse_t tp)
Definition tokenizer.c:174
int yaz_tok_move(yaz_tok_parse_t tp)
Definition tokenizer.c:130
yaz_tok_parse_t yaz_tok_parse_create(yaz_tok_cfg_t t, yaz_tok_get_byte_t h, void *vp)
Definition tokenizer.c:104
Header with public definitions about YAZ' tokenizer.
struct yaz_tok_parse * yaz_tok_parse_t
Definition tokenizer.h:44
#define YAZ_TOK_QSTRING
Definition tokenizer.h:41
#define YAZ_TOK_EOF
Definition tokenizer.h:38
#define YAZ_TOK_STRING
Definition tokenizer.h:40
struct yaz_tok_cfg * yaz_tok_cfg_t
Definition tokenizer.h:43
int(* yaz_tok_get_byte_t)(void **vp)
Definition tokenizer.h:46
#define YAZ_TOK_ERROR
Definition tokenizer.h:39
void wrbuf_destroy(WRBUF b)
destroy WRBUF and its buffer
Definition wrbuf.c:38
const char * wrbuf_cstr(WRBUF b)
returns WRBUF content as C-string
Definition wrbuf.c:299
void wrbuf_rewind(WRBUF b)
empty WRBUF content (length of buffer set to 0)
Definition wrbuf.c:47
WRBUF wrbuf_alloc(void)
construct WRBUF
Definition wrbuf.c:25
Header for WRBUF (growing buffer)
#define wrbuf_putc(b, c)
Definition wrbuf.h:287
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition xmalloc.h:55
#define xfree(x)
utility macro which calls xfree_f
Definition xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition xmalloc.h:49