summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/DetectHTMLTag.java39
-rw-r--r--parsers/Paragraphs.java34
2 files changed, 48 insertions, 25 deletions
diff --git a/parsers/DetectHTMLTag.java b/parsers/DetectHTMLTag.java
index 562b793..4572927 100644
--- a/parsers/DetectHTMLTag.java
+++ b/parsers/DetectHTMLTag.java
@@ -22,16 +22,20 @@ package scruf.parsers;
import java.util.*;
import java.util.regex.*;
+import scruf.status.*;
public class DetectHTMLTag {
private List<String> closingHtmlTags = new ArrayList<String>();
private Pattern htmlTagPattern = Pattern.compile("^\\<((\\w+?).*?)\\>.*?(\\<\\/\\2\\>)",
Pattern.DOTALL);
+ private String lastHtmlBlock = null;
private boolean insideHtmlBlock = false;
+ private boolean htmlInScruffy = false;
public boolean isHtmlTag(String text) {
Matcher htmlTagMatcher = htmlTagPattern.matcher(text);
if(htmlTagMatcher.find()) {
+ lastHtmlBlock = htmlTagMatcher.group();
closingHtmlTags.add(htmlTagMatcher.group(3));
insideHtmlBlock = true;
return true;
@@ -43,16 +47,45 @@ public class DetectHTMLTag {
private boolean isClosingHtmlTag(String text) {
for(String closingTag: closingHtmlTags) {
if(text.startsWith(closingTag)) {
+ lastHtmlBlock = closingTag;
closingHtmlTags.remove(closingTag);
+ if(closingHtmlTags.isEmpty())
+ insideHtmlBlock = false;
return true;
}
}
- if(closingHtmlTags.isEmpty())
- insideHtmlBlock = false;
return false;
}
+ public boolean isHtmlInScruffy(String text) {
+ Integer blankLineCounter;
+
+ if(isHtmlTag(text)) {
+ Pattern qText = Pattern.compile("\\Q"+lastHtmlBlock+"\\E");
+ htmlInScruffy = qText.matcher(PresentFile.fileContent).find();
+
+ Pattern pBlankLine = Pattern.compile("\\n\\n");
+ Matcher mBlankLine = pBlankLine.matcher(lastHtmlBlock);
+ blankLineCounter = 0;
+
+ while(mBlankLine.find())
+ ++blankLineCounter;
+
+ if(blankLineCounter == 0) {
+ closingHtmlTags.clear();
+ resetEverything();
+ }
+ }
+ return insideHtmlBlock && htmlInScruffy;
+ }
+
public boolean insideHtmlBlock() {
return insideHtmlBlock;
}
-}
+
+ public void resetEverything() {
+ insideHtmlBlock = false;
+ htmlInScruffy = false;
+ lastHtmlBlock = null;
+ }
+} \ No newline at end of file
diff --git a/parsers/Paragraphs.java b/parsers/Paragraphs.java
index 7b21d69..d4ad31c 100644
--- a/parsers/Paragraphs.java
+++ b/parsers/Paragraphs.java
@@ -21,18 +21,14 @@
package scruf.parsers;
import java.util.regex.*;
-
+import scruf.status.*;
public class Paragraphs implements Parser {
- private String paragraph = "<p>\n$0</p>\n";
+ private String paragraph = "<p>$0</p>";
+ private DetectHTMLTag detectTag;
public String parse(String fileContent) {
+ detectTag = new DetectHTMLTag();
/**
- * This regex contains two parts seperated by a '|'; the first
- * part is regex for a html Heading (See Heading.java) and the
- * second part is the regex for a paragraph. For an input, if
- * the first part of the regex is matched, then it is
- * necessarily a Heading, so, we ignore it; but if the second
- * part of the regex is matched for an input, then it is a
- * paragraph, so, we put the necessary tags in place.
+ * The pattern for matching paragraphs
*/
Pattern pattern = Pattern.compile("(^.+$\\n)+",Pattern.MULTILINE);
/**
@@ -42,21 +38,15 @@ public class Paragraphs implements Parser {
Matcher matcher = pattern.matcher(fileContent);
Matcher htmlTag;
StringBuffer sbuffer = new StringBuffer();
+
while(matcher.find()) {
- /**
- * give the paragraph that is identified htmlTagPattern
- * and see whether the "paragraph" that is actually
- * deducted is some other html block like <h1> (heading)
- * or <blockquote>, etcetera. If "matcher.find()" has
- * actually found a html block then we don't need to do
- * the conversion.
- */
htmlTag = htmlTagPattern.matcher(matcher.group());
- /**
- * if "matcher.find()" _has not_ deducted a html block,
- * then we do the conversion.
- */
- if(!htmlTag.find()) {
+
+ String subString = fileContent.substring(matcher.start());
+ boolean htmlTagP = htmlTag.find();
+ boolean htmlInScruffy = detectTag.isHtmlInScruffy(subString);
+ boolean pWrap = !htmlTagP && !htmlInScruffy;
+ if(pWrap) {
matcher.appendReplacement(sbuffer,paragraph);
}
}