metaproxy 1.22.1
Public Member Functions | Public Attributes | Friends | List of all members
metaproxy_1::HTMLParser::Rep Class Reference
Collaboration diagram for metaproxy_1::HTMLParser::Rep:
Collaboration graph

Public Member Functions

void parse_str (HTMLParserEvent &event, const char *cp)
 
void tagText (HTMLParserEvent &event, const char *text_start, const char *text_end)
 
int tagEnd (HTMLParserEvent &event, const char *tag, int tag_len, const char *cp)
 
int tagAttrs (HTMLParserEvent &event, const char *name, int len, const char *cp)
 
int skipAttribute (HTMLParserEvent &event, const char *cp, int *attr_len, const char **value, int *val_len, int *tr)
 
 Rep ()
 
 ~Rep ()
 

Public Attributes

int m_verbose
 
bool nest
 

Friends

class HTMLParser
 

Detailed Description

Definition at line 34 of file html_parser.cpp.

Constructor & Destructor Documentation

◆ Rep()

metaproxy_1::HTMLParser::Rep::Rep ( )

Definition at line 57 of file html_parser.cpp.

◆ ~Rep()

metaproxy_1::HTMLParser::Rep::~Rep ( )

Definition at line 63 of file html_parser.cpp.

64{
65}

Member Function Documentation

◆ parse_str()

void metaproxy_1::HTMLParser::Rep::parse_str ( HTMLParserEvent event,
const char *  cp 
)

Definition at line 221 of file html_parser.cpp.

222{
223 const char *text_start = cp;
224 while (*cp)
225 {
226 if (*cp++ != '<')
227 continue;
228
229 if (nest && *cp == '!')
230 {
231 int i;
232 tagText(event, text_start, cp - 1);
233 if (cp[1] == '-' && cp[2] == '-')
234 {
235 for (i = 3; cp[i]; i++)
236 if (cp[i] == '-' && cp[i+1] == '-' && cp[i+2] == '>')
237 {
238 i+= 2;
239 event.openTagStart(cp, i);
240 break;
241 }
242 }
243 else
244 {
245 for (i = 1; cp[i] && cp[i] != '>'; i++)
246 ;
247 event.openTagStart(cp, i);
248 }
249 if (m_verbose)
250 printf("------ dtd %.*s\n", i, cp);
251 i += tagEnd(event, cp, i, cp + i);
252 cp += i;
253 text_start = cp;
254 }
255 else if (nest && *cp == '?')
256 {
257 int i;
258 tagText(event, text_start, cp - 1);
259 for (i = 1; cp[i] && cp[i] != '>'; i++)
260 ;
261 event.openTagStart(cp, i);
262 if (m_verbose)
263 printf("------ pi %.*s\n", i, cp);
264 i += tagEnd(event, cp, i, cp + i);
265 cp += i;
266 text_start = cp;
267 }
268 else if (*cp == '/' && isAlpha(cp[1]))
269 {
270 int i;
271
272 i = skipName(++cp);
273
274 if (!nest)
275 {
276 if (i == 6 && !yaz_strncasecmp(cp, "script", i))
277 {
278 int ws = skipSpace(cp + 6);
279 if (cp[ws + 6] == '>')
280 nest = true; /* really terminated */
281 }
282 if (!nest)
283 continue;
284 }
285 tagText(event, text_start, cp - 2);
286 event.closeTag(cp, i);
287 if (m_verbose)
288 printf("------ tag close %.*s\n", i, cp);
289 i += tagEnd(event, cp, i, cp + i);
290 cp += i;
291 text_start = cp;
292 }
293 else if (nest && isAlpha(*cp))
294 {
295 int i, j;
296 tagText(event, text_start, cp - 1);
297 i = skipName(cp);
298 event.openTagStart(cp, i);
299 if (m_verbose)
300 printf("------ tag open %.*s\n", i, cp);
301 j = tagAttrs(event, cp, i, cp + i);
302 j += tagEnd(event, cp, i, cp + i + j);
303
304 if (i == 6 && !yaz_strncasecmp(cp, "script", i))
305 nest = false;
306
307 cp += i + j;
308 text_start = cp;
309 }
310 }
311 tagText(event, text_start, cp);
312}
int tagEnd(HTMLParserEvent &event, const char *tag, int tag_len, const char *cp)
void tagText(HTMLParserEvent &event, const char *text_start, const char *text_end)
int tagAttrs(HTMLParserEvent &event, const char *name, int len, const char *cp)
static int skipName(const char *cp)
static int isAlpha(int c)
static int skipSpace(const char *cp)

References isAlpha(), skipName(), and skipSpace().

Here is the call graph for this function:

◆ skipAttribute()

int metaproxy_1::HTMLParser::Rep::skipAttribute ( HTMLParserEvent event,
const char *  cp,
int *  attr_len,
const char **  value,
int *  val_len,
int *  tr 
)

Definition at line 107 of file html_parser.cpp.

111{
112 int v0, v1;
113 int i = skipName(cp);
114 *attr_len = i;
115 *value = NULL;
116 if (!i)
117 return skipSpace(cp);
118 i += skipSpace(cp + i);
119 if (cp[i] == '=')
120 {
121 i++;
122 i += skipSpace(cp + i);
123 if (cp[i] == '\"' || cp[i] == '\'')
124 {
125 *tr = cp[i];
126 v0 = ++i;
127 while (cp[i] != *tr && cp[i])
128 i++;
129 v1 = i;
130 if (cp[i])
131 i++;
132 }
133 else
134 {
135 *tr = 0;
136 v0 = i;
137 while (cp[i] && !strchr(SPACECHR ">", cp[i]))
138 i++;
139 v1 = i;
140 }
141 *value = cp + v0;
142 *val_len = v1 - v0;
143 i += skipSpace(cp + i);
144 }
145 return i;
146}
#define SPACECHR

References skipName(), skipSpace(), and SPACECHR.

Here is the call graph for this function:

◆ tagAttrs()

int metaproxy_1::HTMLParser::Rep::tagAttrs ( HTMLParserEvent event,
const char *  name,
int  len,
const char *  cp 
)

Definition at line 148 of file html_parser.cpp.

151{
152 int i = skipSpace(cp);
153 while (cp[i] && !strchr("/><", cp[i]))
154 {
155 const char *attr_name = cp + i;
156 int attr_len;
157 const char *value;
158 int val_len;
159 int tr;
160 char x[2];
161 int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr);
162 if (!nor)
163 break;
164 i += nor;
165
166 x[0] = tr;
167 x[1] = 0;
168 if (m_verbose)
169 {
170 printf("------ attr %.*s", attr_len, attr_name);
171 if (value)
172 printf("=%.*s", val_len, value);
173 printf("\n");
174 }
175 event.attribute(name, len, attr_name, attr_len, value, val_len, x);
176 }
177 return i;
178}
int skipAttribute(HTMLParserEvent &event, const char *cp, int *attr_len, const char **value, int *val_len, int *tr)

References skipSpace().

Here is the call graph for this function:

◆ tagEnd()

int metaproxy_1::HTMLParser::Rep::tagEnd ( HTMLParserEvent event,
const char *  tag,
int  tag_len,
const char *  cp 
)

Definition at line 180 of file html_parser.cpp.

182{
183 int i = 0;
184 int close_it = 0;
185 for (; cp[i] && !strchr("/><", cp[i]); i++)
186 ;
187 if (i > 0)
188 {
189 if (m_verbose)
190 printf("------ text %.*s\n", i, cp);
191 event.text(cp, i);
192 }
193 if (cp[i] == '/')
194 {
195 close_it = 1;
196 i++;
197 }
198 if (cp[i] == '>')
199 {
200 if (m_verbose)
201 printf("------ any tag %s %.*s\n",
202 close_it ? "close" : "end", tag_len, tag);
203 event.anyTagEnd(tag, tag_len, close_it);
204 i++;
205 }
206 return i;
207}

◆ tagText()

void metaproxy_1::HTMLParser::Rep::tagText ( HTMLParserEvent event,
const char *  text_start,
const char *  text_end 
)

Definition at line 209 of file html_parser.cpp.

211{
212 if (text_end - text_start) //got text to flush
213 {
214 if (m_verbose)
215 printf("------ text %.*s\n",
216 (int) (text_end - text_start), text_start);
217 event.text(text_start, text_end-text_start);
218 }
219}

Friends And Related Symbol Documentation

◆ HTMLParser

friend class HTMLParser
friend

Definition at line 35 of file html_parser.cpp.

Member Data Documentation

◆ m_verbose

int metaproxy_1::HTMLParser::Rep::m_verbose

Definition at line 50 of file html_parser.cpp.

◆ nest

bool metaproxy_1::HTMLParser::Rep::nest

Definition at line 51 of file html_parser.cpp.


The documentation for this class was generated from the following file: