Retrieving Html Encoded Text From Xml Using Saxparser
Solution 1:
Wonderful. This solution confused me a little, and I couldn't obtain a value for localName like you have, but I was still able to get StringBuilder approach to work.
I didn't replace in the method:
public void characters(char[] ch, int start, int length) throws SAXException {
tempVal = new String(ch,start,length);
But instead added the following line to the method:
tempSB = tempSB.append(new String(ch, start, length));
Where tempSB is a StringBuilder object. That meant I didn't need to alter my entire parser, and could simply switch to reading the SB when it was necessary. When I came to an element that contained html, in startElement, I used:
tempSB.delete(0, tempSB.length());
And in endElement I used:
tempText.setText(tempSB.toString()) ;
Simple as that. No complex boolean system required in my case, and no need to access localName, which is a concept that eludes me. I seem to do just fine accessing qName.
Thanks very much kcoppock for posting the solution you found. I've been looking for hours and this is the only article I could find concise and clear enough to help. The task I'm working on is really urgent, and I would have failed without your help.
Solution 2:
In case it helps anyone, I was able to solve this issue by using a boolean for every field in which I'm interested in the data. Then I just continued to append to a StringBuilder until I reached a closing tag, after which I took the StringBuilder value, then emptied it, and set my boolean to false.
@Overridepublic void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
sb.delete(0, sb.length());
if (localName.equalsIgnoreCase("channel")) {
inChannel = true;
newFeed = new Feed();
itemList = new ArrayList<Item>();
}
if (inChannel) {
if (localName.equalsIgnoreCase("image")) {
feedImage = new Image();
inImage = true;
return;
}
elseif (localName.equalsIgnoreCase("item")) {
newItem = new Item();
inItem = true;
return;
}
if(inImage) { //set booleans for image elementsif (localName.equalsIgnoreCase("title")) imgTitle = true;
elseif (localName.equalsIgnoreCase("link")) imgLink = true;
elseif (localName.equalsIgnoreCase("url")) imgURL = true;
return;
}
elseif(inItem) { //set booleans for item elementsif (localName.equalsIgnoreCase("title")) iTitle = true;
elseif (localName.equalsIgnoreCase("link")) iLink = true;
elseif (localName.equalsIgnoreCase("description")) iDescription = true;
elseif (localName.equalsIgnoreCase("author")) iAuthor = true;
elseif (localName.equalsIgnoreCase("category")) iCategory = true;
elseif (localName.equalsIgnoreCase("comments")) iComments = true;
elseif (localName.equalsIgnoreCase("guid")) iGuid = true;
elseif (localName.equalsIgnoreCase("pubdate")) iPubDate= true;
elseif (localName.equalsIgnoreCase("source")) iSource = true;
return;
} else { //set booleans for channel elementsif (localName.equalsIgnoreCase("title")) fTitle = true;
elseif (localName.equalsIgnoreCase("link")) fLink = true;
elseif (localName.equalsIgnoreCase("description")) fDescription = true;
elseif (localName.equalsIgnoreCase("language")) fLanguage= true;
elseif (localName.equalsIgnoreCase("copyright")) fCopyright = true;
elseif (localName.equalsIgnoreCase("category")) fCategory = true;
return;
}
}
}
@Overridepublic void endElement(String uri, String localName, String qName) throws SAXException {
if(inChannel) {
if(inImage) {
if (localName.equalsIgnoreCase("title")) {
feedImage.setTitle(sb.toString());
sb.delete(0, sb.length());
imgTitle = false;
return;
}
elseif (localName.equalsIgnoreCase("link")) {
feedImage.setLink(sb.toString());
sb.delete(0, sb.length());
imgLink = false;
return;
}
elseif (localName.equalsIgnoreCase("url")) {
feedImage.setUrl(sb.toString());
sb.delete(0, sb.length());
imgURL = false;
return;
}
elsereturn;
}
elseif(inItem) {
if (localName.equalsIgnoreCase("item")) {
itemList.add(newItem);
newItem = null;
inItem = false;
return;
} elseif (localName.equalsIgnoreCase("title")) {
newItem.setTitle(sb.toString());
sb.delete(0, sb.length());
iTitle = false;
return;
} elseif (localName.equalsIgnoreCase("link")) {
newItem.setLink(sb.toString());
sb.delete(0, sb.length());
iLink = false;
return;
} elseif (localName.equalsIgnoreCase("description")) {
newItem.setDescription(sb.toString());
sb.delete(0, sb.length());
iDescription = false;
return;
} elseif (localName.equalsIgnoreCase("author")) {
newItem.setAuthor(sb.toString());
sb.delete(0, sb.length());
iAuthor = false;
return;
} elseif (localName.equalsIgnoreCase("category")) {
newItem.addCategory(sb.toString());
sb.delete(0, sb.length());
iCategory = false;
return;
} elseif (localName.equalsIgnoreCase("comments")) {
newItem.setComments(sb.toString());
sb.delete(0, sb.length());
iComments = false;
return;
} /*else if (localName.equalsIgnoreCase("enclosure")) {
To be implemented later
}*/elseif (localName.equalsIgnoreCase("guid")) {
newItem.setGuid(sb.toString());
sb.delete(0, sb.length());
iGuid = false;
return;
} elseif (localName.equalsIgnoreCase("pubDate")) {
newItem.setPubDate(sb.toString());
sb.delete(0, sb.length());
iPubDate = false;
return;
}
}
else {
if(localName.equalsIgnoreCase("channel")) {
newFeed.setItems((ArrayList<Item>)itemList);
finalFeed = newFeed;
newFeed = null;
inChannel = false;
return;
} elseif(localName.equalsIgnoreCase("title")) {
newFeed.setTitle(currentValue);
sb.delete(0, sb.length());
fTitle = false;
return;
} elseif(localName.equalsIgnoreCase("link")) {
newFeed.setLink(currentValue);
sb.delete(0, sb.length());
fLink = false;
return;
} elseif(localName.equalsIgnoreCase("description")) {
newFeed.setDescription(sb.toString());
sb.delete(0, sb.length());
fDescription = false;
return;
} elseif(localName.equalsIgnoreCase("language")) {
newFeed.setLanguage(currentValue);
sb.delete(0, sb.length());
fLanguage = false;
return;
} elseif(localName.equalsIgnoreCase("copyright")) {
newFeed.setCopyright(currentValue);
sb.delete(0, sb.length());
fCopyright = false;
return;
} elseif(localName.equalsIgnoreCase("category")) {
newFeed.addCategory(currentValue);
sb.delete(0, sb.length());
fCategory = false;
return;
}
}
}
}
@Overridepublic void characters(char[] ch, int start, int length) {
sb.append(new String(ch, start, length));
}
Solution 3:
Special characters like that are enclosed in CDATA tags. You need to see that they are preserved , SAX Parser can then deal with them correctly.
Post a Comment for "Retrieving Html Encoded Text From Xml Using Saxparser"