From 7e23efa57061f26e003939e4814af63861db7804 Mon Sep 17 00:00:00 2001 From: rsiddharth Date: Tue, 12 Nov 2013 15:57:19 +0530 Subject: parser/QuoteSpecialText doesn't escape `<` if it is part of a HTML tag. I'm trying accomadate HTML markup into the scruffy markup. ;) --- parsers/QuoteSpecialText.java | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'parsers/QuoteSpecialText.java') diff --git a/parsers/QuoteSpecialText.java b/parsers/QuoteSpecialText.java index cf2fcd8..5e63bc9 100644 --- a/parsers/QuoteSpecialText.java +++ b/parsers/QuoteSpecialText.java @@ -20,24 +20,40 @@ package scruf.parsers; +import scruf.status.*; import java.util.*; import java.util.regex.*; public class QuoteSpecialText implements Parser { - Map qmap; + private Map qmap; + private DetectHTMLTag detectTag; public QuoteSpecialText() { qmap = new HashMap(); qmap.put("&","&"); qmap.put("<","<"); - qmap.put(">",">"); } public String parse(String fileContent) { - Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\&)|(\\<)|(\\>)"); + detectTag = new DetectHTMLTag(); + Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\<)|(\\&)"); + Pattern loneHtmlPattern = Pattern.compile("^\\<\\w+? .*?\\/\\>", Pattern.DOTALL); Matcher matcher = pattern.matcher(fileContent); StringBuffer sbuffer = new StringBuffer(); - while(matcher.find() && matcher.group(1)==null) { - matcher.appendReplacement(sbuffer, - qmap.get(matcher.group())); + while(matcher.find()) { + if(matcher.group(1)!=null) { + // found HTML code, don't do anything + // continue. + continue; + } + String subString = fileContent.substring(matcher.start()); + boolean quote = !detectTag.isHtmlTag(subString) && + !detectTag.insideHtmlBlock() && + !loneHtmlPattern.matcher(subString).find(); + if(quote) { + matcher.appendReplacement(sbuffer, + qmap.get(matcher.group())); + }else { + System.out.println("Not Escaping" + subString.split(">")[0] + ">"); + } } matcher.appendTail(sbuffer); return sbuffer.toString(); -- cgit v1.2.3