metaproxy  1.21.0
Public Member Functions | Public Attributes | Friends | List of all members
metaproxy_1::HTMLParser::Rep Class Reference
Collaboration diagram for metaproxy_1::HTMLParser::Rep:
Collaboration graph

Public Member Functions

void parse_str (HTMLParserEvent &event, const char *cp)
 
void tagText (HTMLParserEvent &event, const char *text_start, const char *text_end)
 
int tagEnd (HTMLParserEvent &event, const char *tag, int tag_len, const char *cp)
 
int tagAttrs (HTMLParserEvent &event, const char *name, int len, const char *cp)
 
int skipAttribute (HTMLParserEvent &event, const char *cp, int *attr_len, const char **value, int *val_len, int *tr)
 
 Rep ()
 
 ~Rep ()
 

Public Attributes

int m_verbose
 
bool nest
 

Friends

class HTMLParser
 

Detailed Description

Definition at line 34 of file html_parser.cpp.

Constructor & Destructor Documentation

◆ Rep()

metaproxy_1::HTMLParser::Rep::Rep ( )

Definition at line 57 of file html_parser.cpp.

58 {
59  m_verbose = 0;
60  nest = true;
61 }

◆ ~Rep()

metaproxy_1::HTMLParser::Rep::~Rep ( )

Definition at line 63 of file html_parser.cpp.

64 {
65 }

Member Function Documentation

◆ parse_str()

void metaproxy_1::HTMLParser::Rep::parse_str ( HTMLParserEvent event,
const char *  cp 
)

Definition at line 221 of file html_parser.cpp.

222 {
223  const char *text_start = cp;
224  while (*cp)
225  {
226  if (*cp++ != '<')
227  continue;
228 
229  if (nest && *cp == '!')
230  {
231  int i;
232  tagText(event, text_start, cp - 1);
233  if (cp[1] == '-' && cp[2] == '-')
234  {
235  for (i = 3; cp[i]; i++)
236  if (cp[i] == '-' && cp[i+1] == '-' && cp[i+2] == '>')
237  {
238  i+= 2;
239  event.openTagStart(cp, i);
240  break;
241  }
242  }
243  else
244  {
245  for (i = 1; cp[i] && cp[i] != '>'; i++)
246  ;
247  event.openTagStart(cp, i);
248  }
249  if (m_verbose)
250  printf("------ dtd %.*s\n", i, cp);
251  i += tagEnd(event, cp, i, cp + i);
252  cp += i;
253  text_start = cp;
254  }
255  else if (nest && *cp == '?')
256  {
257  int i;
258  tagText(event, text_start, cp - 1);
259  for (i = 1; cp[i] && cp[i] != '>'; i++)
260  ;
261  event.openTagStart(cp, i);
262  if (m_verbose)
263  printf("------ pi %.*s\n", i, cp);
264  i += tagEnd(event, cp, i, cp + i);
265  cp += i;
266  text_start = cp;
267  }
268  else if (*cp == '/' && isAlpha(cp[1]))
269  {
270  int i;
271 
272  i = skipName(++cp);
273 
274  if (!nest)
275  {
276  if (i == 6 && !yaz_strncasecmp(cp, "script", i))
277  {
278  int ws = skipSpace(cp + 6);
279  if (cp[ws + 6] == '>')
280  nest = true; /* really terminated */
281  }
282  if (!nest)
283  continue;
284  }
285  tagText(event, text_start, cp - 2);
286  event.closeTag(cp, i);
287  if (m_verbose)
288  printf("------ tag close %.*s\n", i, cp);
289  i += tagEnd(event, cp, i, cp + i);
290  cp += i;
291  text_start = cp;
292  }
293  else if (nest && isAlpha(*cp))
294  {
295  int i, j;
296  tagText(event, text_start, cp - 1);
297  i = skipName(cp);
298  event.openTagStart(cp, i);
299  if (m_verbose)
300  printf("------ tag open %.*s\n", i, cp);
301  j = tagAttrs(event, cp, i, cp + i);
302  j += tagEnd(event, cp, i, cp + i + j);
303 
304  if (i == 6 && !yaz_strncasecmp(cp, "script", i))
305  nest = false;
306 
307  cp += i + j;
308  text_start = cp;
309  }
310  }
311  tagText(event, text_start, cp);
312 }
int tagEnd(HTMLParserEvent &event, const char *tag, int tag_len, const char *cp)
void tagText(HTMLParserEvent &event, const char *text_start, const char *text_end)
int tagAttrs(HTMLParserEvent &event, const char *name, int len, const char *cp)
static int skipName(const char *cp)
Definition: html_parser.cpp:99
static int isAlpha(int c)
Definition: html_parser.cpp:86
static int skipSpace(const char *cp)
Definition: html_parser.cpp:91

References isAlpha(), skipName(), and skipSpace().

Here is the call graph for this function:

◆ skipAttribute()

int metaproxy_1::HTMLParser::Rep::skipAttribute ( HTMLParserEvent event,
const char *  cp,
int *  attr_len,
const char **  value,
int *  val_len,
int *  tr 
)

Definition at line 107 of file html_parser.cpp.

111 {
112  int v0, v1;
113  int i = skipName(cp);
114  *attr_len = i;
115  *value = NULL;
116  if (!i)
117  return skipSpace(cp);
118  i += skipSpace(cp + i);
119  if (cp[i] == '=')
120  {
121  i++;
122  i += skipSpace(cp + i);
123  if (cp[i] == '\"' || cp[i] == '\'')
124  {
125  *tr = cp[i];
126  v0 = ++i;
127  while (cp[i] != *tr && cp[i])
128  i++;
129  v1 = i;
130  if (cp[i])
131  i++;
132  }
133  else
134  {
135  *tr = 0;
136  v0 = i;
137  while (cp[i] && !strchr(SPACECHR ">", cp[i]))
138  i++;
139  v1 = i;
140  }
141  *value = cp + v0;
142  *val_len = v1 - v0;
143  i += skipSpace(cp + i);
144  }
145  return i;
146 }
#define SPACECHR
Definition: html_parser.cpp:29

References skipName(), skipSpace(), and SPACECHR.

Here is the call graph for this function:

◆ tagAttrs()

int metaproxy_1::HTMLParser::Rep::tagAttrs ( HTMLParserEvent event,
const char *  name,
int  len,
const char *  cp 
)

Definition at line 148 of file html_parser.cpp.

151 {
152  int i = skipSpace(cp);
153  while (cp[i] && !strchr("/><", cp[i]))
154  {
155  const char *attr_name = cp + i;
156  int attr_len;
157  const char *value;
158  int val_len;
159  int tr;
160  char x[2];
161  int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr);
162  if (!nor)
163  break;
164  i += nor;
165 
166  x[0] = tr;
167  x[1] = 0;
168  if (m_verbose)
169  {
170  printf("------ attr %.*s", attr_len, attr_name);
171  if (value)
172  printf("=%.*s", val_len, value);
173  printf("\n");
174  }
175  event.attribute(name, len, attr_name, attr_len, value, val_len, x);
176  }
177  return i;
178 }
int skipAttribute(HTMLParserEvent &event, const char *cp, int *attr_len, const char **value, int *val_len, int *tr)

References skipSpace().

Here is the call graph for this function:

◆ tagEnd()

int metaproxy_1::HTMLParser::Rep::tagEnd ( HTMLParserEvent event,
const char *  tag,
int  tag_len,
const char *  cp 
)

Definition at line 180 of file html_parser.cpp.

182 {
183  int i = 0;
184  int close_it = 0;
185  for (; cp[i] && !strchr("/><", cp[i]); i++)
186  ;
187  if (i > 0)
188  {
189  if (m_verbose)
190  printf("------ text %.*s\n", i, cp);
191  event.text(cp, i);
192  }
193  if (cp[i] == '/')
194  {
195  close_it = 1;
196  i++;
197  }
198  if (cp[i] == '>')
199  {
200  if (m_verbose)
201  printf("------ any tag %s %.*s\n",
202  close_it ? "close" : "end", tag_len, tag);
203  event.anyTagEnd(tag, tag_len, close_it);
204  i++;
205  }
206  return i;
207 }

◆ tagText()

void metaproxy_1::HTMLParser::Rep::tagText ( HTMLParserEvent event,
const char *  text_start,
const char *  text_end 
)

Definition at line 209 of file html_parser.cpp.

211 {
212  if (text_end - text_start) //got text to flush
213  {
214  if (m_verbose)
215  printf("------ text %.*s\n",
216  (int) (text_end - text_start), text_start);
217  event.text(text_start, text_end-text_start);
218  }
219 }

Friends And Related Function Documentation

◆ HTMLParser

friend class HTMLParser
friend

Definition at line 35 of file html_parser.cpp.

Member Data Documentation

◆ m_verbose

int metaproxy_1::HTMLParser::Rep::m_verbose

Definition at line 50 of file html_parser.cpp.

◆ nest

bool metaproxy_1::HTMLParser::Rep::nest

Definition at line 51 of file html_parser.cpp.


The documentation for this class was generated from the following file: