From 7e23efa57061f26e003939e4814af63861db7804 Mon Sep 17 00:00:00 2001 From: rsiddharth Date: Tue, 12 Nov 2013 15:57:19 +0530 Subject: parser/QuoteSpecialText doesn't escape `<` if it is part of a HTML tag. I'm trying accomadate HTML markup into the scruffy markup. ;) --- etc/NOTES | 6 +++++ parsers/DetectHTMLTag.java | 58 +++++++++++++++++++++++++++++++++++++++++++ parsers/QuoteSpecialText.java | 28 ++++++++++++++++----- 3 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 etc/NOTES create mode 100644 parsers/DetectHTMLTag.java diff --git a/etc/NOTES b/etc/NOTES new file mode 100644 index 0000000..038bf9b --- /dev/null +++ b/etc/NOTES @@ -0,0 +1,6 @@ +NOTES -*- mode: org; -*- + +* HTML in scruffy file +* special characters are not escaped between HTMLblocks. +* HTML tags with no end tag must be closed properly. + eg: diff --git a/parsers/DetectHTMLTag.java b/parsers/DetectHTMLTag.java new file mode 100644 index 0000000..562b793 --- /dev/null +++ b/parsers/DetectHTMLTag.java @@ -0,0 +1,58 @@ +/*+ + * Copyright 2012, 2013 rsiddharth + * + * This file is part of Scruf. + * + * Scruf is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +package scruf.parsers; + +import java.util.*; +import java.util.regex.*; + +public class DetectHTMLTag { + private List closingHtmlTags = new ArrayList(); + private Pattern htmlTagPattern = Pattern.compile("^\\<((\\w+?).*?)\\>.*?(\\<\\/\\2\\>)", + Pattern.DOTALL); + private boolean insideHtmlBlock = false; + public boolean isHtmlTag(String text) { + Matcher htmlTagMatcher = htmlTagPattern.matcher(text); + + if(htmlTagMatcher.find()) { + closingHtmlTags.add(htmlTagMatcher.group(3)); + insideHtmlBlock = true; + return true; + }else { + return isClosingHtmlTag(text); + } + } + + private boolean isClosingHtmlTag(String text) { + for(String closingTag: closingHtmlTags) { + if(text.startsWith(closingTag)) { + closingHtmlTags.remove(closingTag); + return true; + } + } + if(closingHtmlTags.isEmpty()) + insideHtmlBlock = false; + return false; + } + + public boolean insideHtmlBlock() { + return insideHtmlBlock; + } +} diff --git a/parsers/QuoteSpecialText.java b/parsers/QuoteSpecialText.java index cf2fcd8..5e63bc9 100644 --- a/parsers/QuoteSpecialText.java +++ b/parsers/QuoteSpecialText.java @@ -20,24 +20,40 @@ package scruf.parsers; +import scruf.status.*; import java.util.*; import java.util.regex.*; public class QuoteSpecialText implements Parser { - Map qmap; + private Map qmap; + private DetectHTMLTag detectTag; public QuoteSpecialText() { qmap = new HashMap(); qmap.put("&","&"); qmap.put("<","<"); - qmap.put(">",">"); } public String parse(String fileContent) { - Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\&)|(\\<)|(\\>)"); + detectTag = new DetectHTMLTag(); + Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\<)|(\\&)"); + Pattern loneHtmlPattern = Pattern.compile("^\\<\\w+? .*?\\/\\>", Pattern.DOTALL); Matcher matcher = pattern.matcher(fileContent); StringBuffer sbuffer = new StringBuffer(); - while(matcher.find() && matcher.group(1)==null) { - matcher.appendReplacement(sbuffer, - qmap.get(matcher.group())); + while(matcher.find()) { + if(matcher.group(1)!=null) { + // found HTML code, don't do anything + // continue. + continue; + } + String subString = fileContent.substring(matcher.start()); + boolean quote = !detectTag.isHtmlTag(subString) && + !detectTag.insideHtmlBlock() && + !loneHtmlPattern.matcher(subString).find(); + if(quote) { + matcher.appendReplacement(sbuffer, + qmap.get(matcher.group())); + }else { + System.out.println("Not Escaping" + subString.split(">")[0] + ">"); + } } matcher.appendTail(sbuffer); return sbuffer.toString(); -- cgit v1.2.3