now, scruf does'nt wrap <p> around text inside HTML blocks in scruffy markup file.

With this, I guess, HTML can be written in scruffy marked up files.
author: rsiddharth <rsiddharth@ninthfloor.org> 2013-11-15 14:00:35 +0530
committer: rsiddharth <rsiddharth@ninthfloor.org> 2013-11-15 14:00:35 +0530
commit: 88b46e3db46ff8d8754797fcff8fe9eba19f7ddd (patch)
tree: bf25fc8171b1cd21907491fee2d9653587e70ddf
parent: 821d476a4a9d778e77486027d654492e0874a699 (diff)
2 files changed, 48 insertions, 25 deletions
diff --git a/parsers/DetectHTMLTag.java b/parsers/DetectHTMLTag.java
index 562b793..4572927 100644
--- a/parsers/DetectHTMLTag.java
+++ b/parsers/DetectHTMLTag.java
@@ -22,16 +22,20 @@ package scruf.parsers;
 
 import java.util.*;
 import java.util.regex.*;
+import scruf.status.*;
 
 public class DetectHTMLTag {
 	private List<String> closingHtmlTags = new ArrayList<String>();
 	private Pattern htmlTagPattern = Pattern.compile("^\\<((\\w+?).*?)\\>.*?(\\<\\/\\2\\>)",
 													 Pattern.DOTALL);
+	private String lastHtmlBlock = null;
 	private boolean insideHtmlBlock = false;
+	private boolean htmlInScruffy = false;
 	public boolean isHtmlTag(String text) {
 		Matcher htmlTagMatcher = htmlTagPattern.matcher(text);
 
 		if(htmlTagMatcher.find()) {
+			lastHtmlBlock = htmlTagMatcher.group();
 			closingHtmlTags.add(htmlTagMatcher.group(3));
 			insideHtmlBlock = true;
 			return true;
@@ -43,16 +47,45 @@ public class DetectHTMLTag {
 	private boolean isClosingHtmlTag(String text) {
 		for(String closingTag: closingHtmlTags) {
 			if(text.startsWith(closingTag)) {
+				lastHtmlBlock = closingTag;
 				closingHtmlTags.remove(closingTag);
+				if(closingHtmlTags.isEmpty())
+					insideHtmlBlock = false;
 				return true;
 			}
 		}
-		if(closingHtmlTags.isEmpty())
-			insideHtmlBlock = false;
 		return false;
 	}
 
+	public boolean isHtmlInScruffy(String text) {
+		Integer blankLineCounter;
+
+		if(isHtmlTag(text)) {
+			Pattern qText = Pattern.compile("\\Q"+lastHtmlBlock+"\\E");
+			htmlInScruffy = qText.matcher(PresentFile.fileContent).find();
+
+			Pattern pBlankLine = Pattern.compile("\\n\\n");
+			Matcher mBlankLine = pBlankLine.matcher(lastHtmlBlock);
+			blankLineCounter = 0;
+
+			while(mBlankLine.find())
+				++blankLineCounter;
+
+			if(blankLineCounter == 0) {
+				closingHtmlTags.clear();
+				resetEverything();
+			}
+		}
+		return insideHtmlBlock && htmlInScruffy;
+	}
+
 	public boolean insideHtmlBlock() {
 		return insideHtmlBlock;
 	}
-}
+
+	public void resetEverything() {
+		insideHtmlBlock = false;
+		htmlInScruffy = false;
+		lastHtmlBlock = null;
+	}
+}
+\ No newline at end of file
diff --git a/parsers/Paragraphs.java b/parsers/Paragraphs.java
index 7b21d69..d4ad31c 100644
--- a/parsers/Paragraphs.java
+++ b/parsers/Paragraphs.java
@@ -21,18 +21,14 @@
 package scruf.parsers;
 
 import java.util.regex.*;
-
+import scruf.status.*;
 public class Paragraphs implements Parser {
-    private String paragraph = "<p>\n$0</p>\n";
+    private String paragraph = "<p>$0</p>";
+	private DetectHTMLTag detectTag;
     public String parse(String fileContent) {
+	detectTag = new DetectHTMLTag();
 	/**
-	 * This regex contains two parts seperated by a '|'; the first
-	 * part is regex for a html Heading (See Heading.java) and the
-	 * second part is the regex for a paragraph. For an input, if
-	 * the first part of the regex is matched, then it is
-	 * necessarily a Heading, so, we ignore it; but if the second
-	 * part of the regex is matched for an input, then it is a
-	 * paragraph, so, we put the necessary tags in place.
+	 * The pattern for matching paragraphs
 	 */
 	Pattern pattern = Pattern.compile("(^.+$\\n)+",Pattern.MULTILINE);
 	/**
@@ -42,21 +38,15 @@ public class Paragraphs implements Parser {
 	Matcher matcher = pattern.matcher(fileContent);
 	Matcher htmlTag;
 	StringBuffer sbuffer = new StringBuffer();
+
 	while(matcher.find()) {
-	    /**
-	     * give the paragraph that is identified htmlTagPattern
-	     * and see whether the "paragraph" that is actually
-	     * deducted is some other html block like <h1> (heading)
-	     * or <blockquote>, etcetera. If "matcher.find()" has
-	     * actually found a html block then we don't need to do
-	     * the conversion.
-	     */
 	    htmlTag = htmlTagPattern.matcher(matcher.group());
-	    /**
-	     * if "matcher.find()" _has not_ deducted a html block,
-	     * then we do the conversion.
-	     */
-	    if(!htmlTag.find()) {
+
+		String subString = fileContent.substring(matcher.start());
+		boolean htmlTagP = htmlTag.find();
+		boolean htmlInScruffy = detectTag.isHtmlInScruffy(subString);
+		boolean pWrap = !htmlTagP && !htmlInScruffy;
+	    if(pWrap) {
 			matcher.appendReplacement(sbuffer,paragraph);
 	    }
 	}
author	rsiddharth <rsiddharth@ninthfloor.org>	2013-11-15 14:00:35 +0530
committer	rsiddharth <rsiddharth@ninthfloor.org>	2013-11-15 14:00:35 +0530
commit	88b46e3db46ff8d8754797fcff8fe9eba19f7ddd (patch)
tree	bf25fc8171b1cd21907491fee2d9653587e70ddf
parent	821d476a4a9d778e77486027d654492e0874a699 (diff)