Write class HeadingParser that is a subclass of the HTMLParser class. It will fi
ID: 3676189 • Letter: W
Question
Write class HeadingParser that is a subclass of the HTMLParser class. It will find and collect the contents of all the headings in an HTML file fed to it. The parser works by identifying when a heading tag has been encountered and setting a boolean variable in the class to indicate that. When the data handler for the class is called and the boolean in the class indicates that a heading is currently open, the data inside the heading is added to a list. Finally, when a closing heading tag is encountered the boolean variable is unset. To implement this parser you will need to write the following methods (some override HTMLParser methods): __init__: calls the HTMLParser __init__, initializes an empty list and sets the boolean variable to False handle_starttag : If the tag that resulted in this method being called is a heading, the heading indicator should be set. handle_endtag : If the tag that resulted in this method being called is a heading, the heading indicator should be unset. handle_data : If the parser is currently inside a heading, then the data should be added to the list of headings contents. Make sure that you strip any extra spaces or newlines off the contents of the heading before adding it to the list. getheadings(): returns the list of headings
Explanation / Answer
String tagName = tq.consumeWord();
tq.chompTo(">");
- if (!tagName.isEmpty()) {
+ if (tagName.length() != 0) {
Tag tag = Tag.valueOf(tagName);
popStackToClose(tag);
}
@@ -117,7 +117,7 @@ private void parseStartTag() {
tq.consume("<");
String tagName = tq.consumeWord();
- if (tagName.isEmpty()) { // doesn't look like a start tag after all; put < back on stack and handle as text
+ if (tagName.length() == 0) { // doesn't look like a start tag after all; put < back on stack and handle as text
tq.addFirst("<");
parseTextNode();
return;
@@ -156,7 +156,7 @@ private void parseStartTag() {
// <base href>: update the base uri
if (child.tagName().equals("base")) {
String href = child.absUrl("href");
- if (!href.isEmpty()) { // ignore <base target> etc
+ if (href.length() != 0) { // ignore <base target> etc
baseUri = href;
doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base
}
@@ -187,7 +187,7 @@ private Attribute parseAttribute() {
}
tq.consumeWhitespace();
}
- if (!key.isEmpty())
+ if (key.length() != 0)
return Attribute.createFromEncoded(key, value);
else {
tq.consume(); // unknown char, keep popping so not get stuck
public boolean consumeWhitespace() {
boolean seen = false;
- while (!queue.isEmpty() && Character.isWhitespace(queue.peekFirst())) {
+ while (!queue.isEmpty() && Character.isWhitespace(queue.peek())) {
consume();
seen = true;
}
@@ -206,7 +206,7 @@ public boolean consumeWhitespace() {
*/
public String consumeWord() {
StringBuilder wordAccum = new StringBuilder();
- while (!queue.isEmpty() && Character.isLetterOrDigit(queue.peekFirst())) {
+ while (!queue.isEmpty() && Character.isLetterOrDigit(queue.peek())) {
wordAccum.append(queue.removeFirst());
}
return wordAccum.toString();
Selector test
assertEquals("div", els.get(2).tagName());
assertEquals("bar", els.get(2).attr("title"));
assertEquals("div", els.get(3).tagName());
- assertTrue(els.get(3).attr("title").isEmpty()); // missing attributes come back as empty string
+ assertTrue(els.get(3).attr("title").length() == 0); // missing attributes come back as empty string
assertFalse(els.get(3).hasAttr("title"));
assertEquals("span", els.get(4).tagName());
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.