summaryrefslogtreecommitdiffstats
path: root/parsers/Paragraphs.java
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/Paragraphs.java')
-rw-r--r--parsers/Paragraphs.java42
1 files changed, 29 insertions, 13 deletions
diff --git a/parsers/Paragraphs.java b/parsers/Paragraphs.java
index 295d14a..9b257a2 100644
--- a/parsers/Paragraphs.java
+++ b/parsers/Paragraphs.java
@@ -3,26 +3,42 @@ package scruf.parsers;
import java.util.regex.*;
public class Paragraphs implements Parser {
- private String paragraph = "<p>\n $5</p>\n";
+ private String paragraph = "<p>\n $0</p>\n";
public String parse(String fileContent) {
/**
- * This regex contains two parts seperated by a '|';
- * the first part is regex for a html Heading (See Heading.java)
- * and the second part is the regex for a paragraph. For an input,
- * if the first part of the regex is matched, then it is necessarily
- * a Heading, so, we ignore it; but if the second part of the regex is
- * matched for an input, then it is a paragraph, so, we put the necessary
- * tags in place.
+ * This regex contains two parts seperated by a '|'; the first
+ * part is regex for a html Heading (See Heading.java) and the
+ * second part is the regex for a paragraph. For an input, if
+ * the first part of the regex is matched, then it is
+ * necessarily a Heading, so, we ignore it; but if the second
+ * part of the regex is matched for an input, then it is a
+ * paragraph, so, we put the necessary tags in place.
*/
- Pattern pattern = Pattern.compile("((\\={10,})\\n(.+?)\\n(\\2))|((^.+$\\n)+)",Pattern.MULTILINE);
+ Pattern pattern = Pattern.compile("(^.+$\\n)+",Pattern.MULTILINE);
+ /**
+ * This htmlTagPattern has a regex to deduct a html tag.
+ */
+ Pattern htmlTagPattern = Pattern.compile("^\\<.+?\\>\\n");
Matcher matcher = pattern.matcher(fileContent);
+ Matcher htmlTag;
StringBuffer sbuffer = new StringBuffer();
while(matcher.find()) {
- // group 1 contains the regex for the Heading, so
- // if that is null, then it means that we have actually
- // found a paragraph.
- if(matcher.group(1)==null)
+ /**
+ * give the paragraph that is indentified htmlTagPattern
+ * and see whether the "paragraph" that is actuall
+ * deducted is some other html block like <h1> (heading)
+ * or <blockquote>, etcetera. "matcher.find()" has
+ * actually found a html block then we don't need to do
+ * the conversion.
+ */
+ htmlTag = htmlTagPattern.matcher(matcher.group());
+ /**
+ * if "matcher.find()" _has not_ deducted a html block,
+ * then we do the conversion.
+ */
+ if(!htmlTag.find()) {
matcher.appendReplacement(sbuffer,paragraph);
+ }
}
matcher.appendTail(sbuffer);
return sbuffer.toString();