From 7e23efa57061f26e003939e4814af63861db7804 Mon Sep 17 00:00:00 2001 From: rsiddharth Date: Tue, 12 Nov 2013 15:57:19 +0530 Subject: parser/QuoteSpecialText doesn't escape `<` if it is part of a HTML tag. I'm trying accomadate HTML markup into the scruffy markup. ;) --- parsers/DetectHTMLTag.java | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 parsers/DetectHTMLTag.java (limited to 'parsers/DetectHTMLTag.java') diff --git a/parsers/DetectHTMLTag.java b/parsers/DetectHTMLTag.java new file mode 100644 index 0000000..562b793 --- /dev/null +++ b/parsers/DetectHTMLTag.java @@ -0,0 +1,58 @@ +/*+ + * Copyright 2012, 2013 rsiddharth + * + * This file is part of Scruf. + * + * Scruf is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +package scruf.parsers; + +import java.util.*; +import java.util.regex.*; + +public class DetectHTMLTag { + private List closingHtmlTags = new ArrayList(); + private Pattern htmlTagPattern = Pattern.compile("^\\<((\\w+?).*?)\\>.*?(\\<\\/\\2\\>)", + Pattern.DOTALL); + private boolean insideHtmlBlock = false; + public boolean isHtmlTag(String text) { + Matcher htmlTagMatcher = htmlTagPattern.matcher(text); + + if(htmlTagMatcher.find()) { + closingHtmlTags.add(htmlTagMatcher.group(3)); + insideHtmlBlock = true; + return true; + }else { + return isClosingHtmlTag(text); + } + } + + private boolean isClosingHtmlTag(String text) { + for(String closingTag: closingHtmlTags) { + if(text.startsWith(closingTag)) { + closingHtmlTags.remove(closingTag); + return true; + } + } + if(closingHtmlTags.isEmpty()) + insideHtmlBlock = false; + return false; + } + + public boolean insideHtmlBlock() { + return insideHtmlBlock; + } +} -- cgit v1.2.3