summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorrsiddharth <rsiddharth@ninthfloor.org>2013-11-12 15:57:19 +0530
committerrsiddharth <rsiddharth@ninthfloor.org>2013-11-12 15:57:19 +0530
commit7e23efa57061f26e003939e4814af63861db7804 (patch)
treedecddaa5dea13fff95a911ceb7e587e83eb2346e /parsers
parentc10e8ce930b75e4f53d4aa627cbb7b56c44ed803 (diff)
parser/QuoteSpecialText doesn't escape `<` if it is part of a HTML tag.
I'm trying accomadate HTML markup into the scruffy markup. ;)
Diffstat (limited to 'parsers')
-rw-r--r--parsers/DetectHTMLTag.java58
-rw-r--r--parsers/QuoteSpecialText.java28
2 files changed, 80 insertions, 6 deletions
diff --git a/parsers/DetectHTMLTag.java b/parsers/DetectHTMLTag.java
new file mode 100644
index 0000000..562b793
--- /dev/null
+++ b/parsers/DetectHTMLTag.java
@@ -0,0 +1,58 @@
+/*+
+ * Copyright 2012, 2013 rsiddharth <rsiddharth@ninthfloor.org>
+ *
+ * This file is part of Scruf.
+ *
+ * Scruf is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package scruf.parsers;
+
+import java.util.*;
+import java.util.regex.*;
+
+public class DetectHTMLTag {
+ private List<String> closingHtmlTags = new ArrayList<String>();
+ private Pattern htmlTagPattern = Pattern.compile("^\\<((\\w+?).*?)\\>.*?(\\<\\/\\2\\>)",
+ Pattern.DOTALL);
+ private boolean insideHtmlBlock = false;
+ public boolean isHtmlTag(String text) {
+ Matcher htmlTagMatcher = htmlTagPattern.matcher(text);
+
+ if(htmlTagMatcher.find()) {
+ closingHtmlTags.add(htmlTagMatcher.group(3));
+ insideHtmlBlock = true;
+ return true;
+ }else {
+ return isClosingHtmlTag(text);
+ }
+ }
+
+ private boolean isClosingHtmlTag(String text) {
+ for(String closingTag: closingHtmlTags) {
+ if(text.startsWith(closingTag)) {
+ closingHtmlTags.remove(closingTag);
+ return true;
+ }
+ }
+ if(closingHtmlTags.isEmpty())
+ insideHtmlBlock = false;
+ return false;
+ }
+
+ public boolean insideHtmlBlock() {
+ return insideHtmlBlock;
+ }
+}
diff --git a/parsers/QuoteSpecialText.java b/parsers/QuoteSpecialText.java
index cf2fcd8..5e63bc9 100644
--- a/parsers/QuoteSpecialText.java
+++ b/parsers/QuoteSpecialText.java
@@ -20,24 +20,40 @@
package scruf.parsers;
+import scruf.status.*;
import java.util.*;
import java.util.regex.*;
public class QuoteSpecialText implements Parser {
- Map<String,String> qmap;
+ private Map<String,String> qmap;
+ private DetectHTMLTag detectTag;
public QuoteSpecialText() {
qmap = new HashMap<String,String>();
qmap.put("&","&amp;");
qmap.put("<","&lt;");
- qmap.put(">","&gt;");
}
public String parse(String fileContent) {
- Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\&)|(\\<)|(\\>)");
+ detectTag = new DetectHTMLTag();
+ Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\<)|(\\&)");
+ Pattern loneHtmlPattern = Pattern.compile("^\\<\\w+? .*?\\/\\>", Pattern.DOTALL);
Matcher matcher = pattern.matcher(fileContent);
StringBuffer sbuffer = new StringBuffer();
- while(matcher.find() && matcher.group(1)==null) {
- matcher.appendReplacement(sbuffer,
- qmap.get(matcher.group()));
+ while(matcher.find()) {
+ if(matcher.group(1)!=null) {
+ // found HTML code, don't do anything
+ // continue.
+ continue;
+ }
+ String subString = fileContent.substring(matcher.start());
+ boolean quote = !detectTag.isHtmlTag(subString) &&
+ !detectTag.insideHtmlBlock() &&
+ !loneHtmlPattern.matcher(subString).find();
+ if(quote) {
+ matcher.appendReplacement(sbuffer,
+ qmap.get(matcher.group()));
+ }else {
+ System.out.println("Not Escaping" + subString.split(">")[0] + ">");
+ }
}
matcher.appendTail(sbuffer);
return sbuffer.toString();