diff options
author | rsiddharth <rsiddharth@ninthfloor.org> | 2012-06-26 22:06:51 +0530 |
---|---|---|
committer | rsiddharth <rsiddharth@ninthfloor.org> | 2012-06-26 22:06:51 +0530 |
commit | 726314a77a2138f8d6bca200af9972e006f43311 (patch) | |
tree | 1f0ca34734309f018754d4edb51493dbab36b5a7 /parsers/Paragraphs.java | |
parent | e3b2d9aa86d97154421e753d91856bc73a4be591 (diff) |
conversion/FileSieve.java: modified the regex to ignore backup file (#*#).
parsers/Footer.java: Aesthetic modification.
parsers/Header.java: add "meta" tags for the html output, mainly for
compliance sake. I have also re-arranged the html tags a bit -- the
"back button" is now the last thing in the <article> block.
parsers/Images.java (bug-fix): The regex had small precisely
deducting images, so the regex was slightly modified to make all
work properly. There was another significant change made to the way
the image tag is created -- "alt" is mandatory now, even if the
markup doesn't give a "alt" for the <img> tag, a default "alt" is
created -- the image-file-name.
parsers/Links.java: From now on, the links generated will not have
'target="_blank"`, I felt that the reader must choose whether to
open the link in a new window/tab, so the change. Period. In the
last revision, link description was made optional in the link
mark-up, the regex was slightly erroneous, I rectified it.
parsers/Paragraphs.java: As of this revision, the <p> generation is
still rough around the edges, I need to smoothen it in future
revisions.
parsers/ParserList.java: Order of parser list was modified -- now,
Paragraphs comes before Headings, it was the reverse before.
Diffstat (limited to 'parsers/Paragraphs.java')
-rw-r--r-- | parsers/Paragraphs.java | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/parsers/Paragraphs.java b/parsers/Paragraphs.java index 0d6fda6..295d14a 100644 --- a/parsers/Paragraphs.java +++ b/parsers/Paragraphs.java @@ -3,17 +3,28 @@ package scruf.parsers; import java.util.regex.*; public class Paragraphs implements Parser { - + private String paragraph = "<p>\n $5</p>\n"; public String parse(String fileContent) { - - Pattern pattern = Pattern.compile("((^.+$)\\n)+",Pattern.MULTILINE); + /** + * This regex contains two parts seperated by a '|'; + * the first part is regex for a html Heading (See Heading.java) + * and the second part is the regex for a paragraph. For an input, + * if the first part of the regex is matched, then it is necessarily + * a Heading, so, we ignore it; but if the second part of the regex is + * matched for an input, then it is a paragraph, so, we put the necessary + * tags in place. + */ + Pattern pattern = Pattern.compile("((\\={10,})\\n(.+?)\\n(\\2))|((^.+$\\n)+)",Pattern.MULTILINE); Matcher matcher = pattern.matcher(fileContent); - StringBuilder sbuilder = new StringBuilder(); + StringBuffer sbuffer = new StringBuffer(); while(matcher.find()) { - sbuilder.append("\n<p>\n"); - sbuilder.append(matcher.group()); - sbuilder.append("</p>\n"); + // group 1 contains the regex for the Heading, so + // if that is null, then it means that we have actually + // found a paragraph. + if(matcher.group(1)==null) + matcher.appendReplacement(sbuffer,paragraph); } - return sbuilder.toString(); + matcher.appendTail(sbuffer); + return sbuffer.toString(); } }
\ No newline at end of file |