summaryrefslogtreecommitdiffstats
path: root/parsers/DetectHTMLTag.java
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/DetectHTMLTag.java')
-rw-r--r--parsers/DetectHTMLTag.java58
1 files changed, 58 insertions, 0 deletions
diff --git a/parsers/DetectHTMLTag.java b/parsers/DetectHTMLTag.java
new file mode 100644
index 0000000..562b793
--- /dev/null
+++ b/parsers/DetectHTMLTag.java
@@ -0,0 +1,58 @@
+/*+
+ * Copyright 2012, 2013 rsiddharth <rsiddharth@ninthfloor.org>
+ *
+ * This file is part of Scruf.
+ *
+ * Scruf is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package scruf.parsers;
+
+import java.util.*;
+import java.util.regex.*;
+
+public class DetectHTMLTag {
+ private List<String> closingHtmlTags = new ArrayList<String>();
+ private Pattern htmlTagPattern = Pattern.compile("^\\<((\\w+?).*?)\\>.*?(\\<\\/\\2\\>)",
+ Pattern.DOTALL);
+ private boolean insideHtmlBlock = false;
+ public boolean isHtmlTag(String text) {
+ Matcher htmlTagMatcher = htmlTagPattern.matcher(text);
+
+ if(htmlTagMatcher.find()) {
+ closingHtmlTags.add(htmlTagMatcher.group(3));
+ insideHtmlBlock = true;
+ return true;
+ }else {
+ return isClosingHtmlTag(text);
+ }
+ }
+
+ private boolean isClosingHtmlTag(String text) {
+ for(String closingTag: closingHtmlTags) {
+ if(text.startsWith(closingTag)) {
+ closingHtmlTags.remove(closingTag);
+ return true;
+ }
+ }
+ if(closingHtmlTags.isEmpty())
+ insideHtmlBlock = false;
+ return false;
+ }
+
+ public boolean insideHtmlBlock() {
+ return insideHtmlBlock;
+ }
+}