summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorrsiddharth <rsiddharth@ninthfloor.org>2013-11-05 17:48:09 +0530
committerrsiddharth <rsiddharth@ninthfloor.org>2013-11-05 17:48:09 +0530
commit67e808b2c64f1b3c3837e5620876171e0999d9c4 (patch)
treefd648b159b2812272d37ccd7e05733351085f664 /parsers
parentb1fa5babf98e32c1b668d5b321ee9ae82769d61b (diff)
wrote a new parser: parsers/UList.java
now scruf can parse <ul> lists.
Diffstat (limited to 'parsers')
-rw-r--r--parsers/ParserList.java3
-rw-r--r--parsers/UList.java69
2 files changed, 71 insertions, 1 deletions
diff --git a/parsers/ParserList.java b/parsers/ParserList.java
index b155329..0ef5022 100644
--- a/parsers/ParserList.java
+++ b/parsers/ParserList.java
@@ -33,7 +33,8 @@ public class ParserList {
parsers.add(new DocumentDate());
parsers.add(new WordDecoration());
parsers.add(new Headings());
- parsers.add(new Links());
+ parsers.add(new Links());
+ parsers.add(new UList());
parsers.add(new Images());
parsers.add(new Audio());
parsers.add(new Footer());
diff --git a/parsers/UList.java b/parsers/UList.java
new file mode 100644
index 0000000..95e9393
--- /dev/null
+++ b/parsers/UList.java
@@ -0,0 +1,69 @@
+/*+
+ * Copyright 2012, 2013 rsiddharth <rsiddharth@ninthfloor.org>
+ *
+ * This file is part of Scruf.
+ *
+ * Scruf is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package scruf.parsers;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+public class UList implements Parser {
+ private Pattern pattern = Pattern.compile("^\\*\\*( )+((.+$\\n)+)",Pattern.MULTILINE);
+ private String list = "<li>\n$1$2</li>";
+ /**
+ * This htmlTagPattern has a regex to deduct a html tag.
+ */
+ private Pattern htmlTagPattern = Pattern.compile("^\\<.+?\\>(\\n?)");
+ private Matcher matcher;
+ private Matcher htmlTag;
+ public String parse(String fileContent) {
+ StringBuffer sbuffer = new StringBuffer();
+ matcher = pattern.matcher(fileContent);
+
+ int lastEnd=0;
+ while(matcher.find()) {
+ int diff = matcher.start() - lastEnd;
+
+ if(lastEnd == 0) {
+ // first list found
+ matcher.appendReplacement(sbuffer, "<ul>\n " + list);
+ }
+ else if(diff>1) {
+ // means, we are at a new list now.
+ // so, got close the ol' one.
+ sbuffer.append("\n</ul>\n");
+
+ // open the new list.
+ matcher.appendReplacement(sbuffer, "<ul>\n " + list);
+ }
+ else {
+ matcher.appendReplacement(sbuffer, list);
+ }
+ lastEnd = matcher.end();
+
+ }
+ // close the last found list
+ if(lastEnd !=0) // meaning a list was found.
+ sbuffer.append("\n</ul>\n");
+ matcher.appendTail(sbuffer);
+
+ return sbuffer.toString();
+ }
+
+}