summaryrefslogtreecommitdiffstats
path: root/parsers/QuoteSpecialText.java
blob: 5e63bc99e6eac43b25786ed291537feae04e828a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/*+
 *   Copyright 2012, 2013 rsiddharth <rsiddharth@ninthfloor.org>
 * 
 *   This file is part of Scruf.
 *
 *   Scruf is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package scruf.parsers;

import scruf.status.*;
import java.util.*;
import java.util.regex.*;

public class QuoteSpecialText implements Parser {
	private Map<String,String> qmap;
	private DetectHTMLTag detectTag;
	public QuoteSpecialText() {
		qmap = new HashMap<String,String>();
		qmap.put("&","&amp;");
		qmap.put("<","&lt;");
	}
	public String parse(String fileContent) {
		detectTag = new DetectHTMLTag();
		Pattern pattern = Pattern.compile("(\\&(\\w+|\\#\\d+)\\;)|(\\<)|(\\&)");
		Pattern loneHtmlPattern = Pattern.compile("^\\<\\w+? .*?\\/\\>", Pattern.DOTALL);
		Matcher matcher = pattern.matcher(fileContent);
		StringBuffer sbuffer = new StringBuffer();
		while(matcher.find()) {
			if(matcher.group(1)!=null) {
				// found HTML code, don't do anything
				// continue.
				continue;
			}
			String subString = fileContent.substring(matcher.start());
			boolean quote = !detectTag.isHtmlTag(subString) &&
				!detectTag.insideHtmlBlock() &&
				!loneHtmlPattern.matcher(subString).find();
			if(quote) {
				matcher.appendReplacement(sbuffer,
										  qmap.get(matcher.group()));
			}else {
				System.out.println("Not Escaping" + subString.split(">")[0] + ">");
			}
		}
		matcher.appendTail(sbuffer);
		return sbuffer.toString();
	}
}