From 6164ad0e4e0971194c1851f1d1785b08edeb808b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicola=CC=81s=20Hormaza=CC=81bal?= Date: Thu, 9 Jun 2016 12:24:33 +0200 Subject: [PATCH] changes and new tests --- .travis.yml | 11 + .../com/pnikosis/html2markdown/HTML2Md.java | 12 +- .../converters/ALineConverter.java | 25 ++ .../converters/BrLineConverter.java | 13 + .../converters/CodeLineConverter.java | 18 ++ .../converters/DivLineConverter.java | 15 ++ .../converters/EmphasisLineConverter.java | 12 + .../converters/HLineConverter.java | 26 ++ .../HorizontalRuleLineConverter.java | 11 + .../converters/ImgLineConverter.java | 27 +++ .../converters/LineConverter.java | 47 ++++ .../converters/PLineConverter.java | 15 ++ .../converters/StrongLineConverter.java | 12 + .../converters/TextConverter.java | 11 + .../extractor/TextExtractor.java | 8 + .../html2markdown/{ => md}/MDLine.java | 24 +- .../pnikosis/html2markdown/md/MDLines.java | 122 ++++++++++ .../converters/LineConverterSpec.groovy | 80 +++++++ .../html2markdown/md/MDLineSpec.groovy | 49 ++++ .../html2markdown/md/MDLinesSpec.groovy | 224 ++++++++++++++++++ 20 files changed, 745 insertions(+), 17 deletions(-) create mode 100644 .travis.yml create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/ALineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/BrLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/CodeLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/DivLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/EmphasisLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/HLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/HorizontalRuleLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/ImgLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/LineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/PLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/StrongLineConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/converters/TextConverter.java create mode 100644 src/main/java/com/pnikosis/html2markdown/extractor/TextExtractor.java rename src/main/java/com/pnikosis/html2markdown/{ => md}/MDLine.java (86%) create mode 100644 src/main/java/com/pnikosis/html2markdown/md/MDLines.java create mode 100644 src/test/groovy/com/pnikosis/html2markdown/converters/LineConverterSpec.groovy create mode 100644 src/test/groovy/com/pnikosis/html2markdown/md/MDLineSpec.groovy create mode 100644 src/test/groovy/com/pnikosis/html2markdown/md/MDLinesSpec.groovy diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..25dbb74 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,11 @@ +language: java +jdk: + - oraclejdk8 +script: + ./gradlew checkstyleMain checkstyleTest findbugsMain findbugsTest pmdMain pmdTest test; + ./gradlew build +cache: + directories: + - $HOME/.m2 + - $HOME/.gradle + diff --git a/src/main/java/com/pnikosis/html2markdown/HTML2Md.java b/src/main/java/com/pnikosis/html2markdown/HTML2Md.java index c4d1bec..6e425ed 100644 --- a/src/main/java/com/pnikosis/html2markdown/HTML2Md.java +++ b/src/main/java/com/pnikosis/html2markdown/HTML2Md.java @@ -1,6 +1,7 @@ package com.pnikosis.html2markdown; -import com.pnikosis.html2markdown.MDLine.MDLineType; +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLine.MDLineType; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; @@ -136,19 +137,20 @@ private static String parseDocument(Document dirtyDoc) { Document doc = cleaner.clean(dirtyDoc); doc.outputSettings().escapeMode(EscapeMode.xhtml); - if (!title.trim().equals("")) { - return "# " + title + "\n\n" + getTextContent(doc); + String trimmedTitle = title.trim(); + if (!"".equals(trimmedTitle)) { + return "# " + trimmedTitle + "\n\n" + getTextContent(doc); } else { return getTextContent(doc); } } private static String getTextContent(Element element) { - ArrayList lines = new ArrayList(); + ArrayList lines = new ArrayList<>(); List children = element.childNodes(); for (Node child : children) { - if (child instanceof TextNode) { + if (TextNode.class.isInstance(child)) { TextNode textNode = (TextNode) child; MDLine line = getLastLine(lines); if (line.getContent().equals("")) { diff --git a/src/main/java/com/pnikosis/html2markdown/converters/ALineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/ALineConverter.java new file mode 100644 index 0000000..a4f3c8b --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/ALineConverter.java @@ -0,0 +1,25 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class ALineConverter extends LineConverter { + private final String url; + private final String title; + + /* package */ ALineConverter(String url, String title) { + this.url = url; + this.title = title; + } + + @Override + public MDLines convert(String content) { + String mdLink = "[" + content + "](" + url; + if (title != null && title.length() > 0) { + mdLink = mdLink + " \"" + title + "\""; + } + mdLink = mdLink + ")"; + return new MDLines().addLine(MDLine.MDLineType.None, 0, mdLink); + } +} + diff --git a/src/main/java/com/pnikosis/html2markdown/converters/BrLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/BrLineConverter.java new file mode 100644 index 0000000..7383d09 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/BrLineConverter.java @@ -0,0 +1,13 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class BrLineConverter extends LineConverter { + @Override + public MDLines convert(String content) { + MDLines mdLines = new MDLines(); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + return mdLines; + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/CodeLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/CodeLineConverter.java new file mode 100644 index 0000000..f47ce07 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/CodeLineConverter.java @@ -0,0 +1,18 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class CodeLineConverter extends LineConverter { + @Override + public MDLines convert(String content) { + String[] contentLines = content.split("\\r?\\n", 0); + MDLines mdLines = new MDLines(); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + for (String line : contentLines) { + mdLines.add(new MDLine(MDLine.MDLineType.None, 1, line)); + } + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + return mdLines; + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/DivLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/DivLineConverter.java new file mode 100644 index 0000000..a3fdd85 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/DivLineConverter.java @@ -0,0 +1,15 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class DivLineConverter extends LineConverter { + @Override + public MDLines convert(String content) { + MDLines mdLines = new MDLines(); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, content)); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + return mdLines; + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/EmphasisLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/EmphasisLineConverter.java new file mode 100644 index 0000000..ade2363 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/EmphasisLineConverter.java @@ -0,0 +1,12 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class EmphasisLineConverter extends LineConverter { + + @Override + public MDLines convert(String content) { + return new MDLines().addLine(MDLine.MDLineType.None, 0, "*" + content + "*"); + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/HLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/HLineConverter.java new file mode 100644 index 0000000..af138b0 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/HLineConverter.java @@ -0,0 +1,26 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class HLineConverter extends LineConverter { + private final int level; + + /* package */ HLineConverter(int level) { + this.level = level; + } + + @Override + public MDLines convert(String content) { + switch (level) { + case 1: + return new MDLines().addLine(MDLine.MDLineType.Head1, 0, content); + case 2: + return new MDLines().addLine(MDLine.MDLineType.Head2, 0, content); + case 3: + return new MDLines().addLine(MDLine.MDLineType.Head3, 0, content); + default: + return new MDLines().addLine(MDLine.MDLineType.Head3, 0, content); + } + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/HorizontalRuleLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/HorizontalRuleLineConverter.java new file mode 100644 index 0000000..ac35b13 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/HorizontalRuleLineConverter.java @@ -0,0 +1,11 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class HorizontalRuleLineConverter extends LineConverter { + @Override + public MDLines convert(String content) { + return new MDLines().addLine(MDLine.MDLineType.HR, 0, ""); + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/ImgLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/ImgLineConverter.java new file mode 100644 index 0000000..fc96288 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/ImgLineConverter.java @@ -0,0 +1,27 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class ImgLineConverter extends LineConverter { + private final String src; + private final String alt; + private final String title; + + /* package */ ImgLineConverter(String src, String alt, String title) { + this.src = src; + this.alt = alt; + this.title = title; + } + + @Override + public MDLines convert(String content) { + String mdLink = "![" + alt + "](" + src; + if (title != null && title.length() > 0) { + mdLink = mdLink + " \"" + title + "\""; + } + mdLink = mdLink + ")"; + return new MDLines().addLine(MDLine.MDLineType.None, 0, mdLink); + } +} + diff --git a/src/main/java/com/pnikosis/html2markdown/converters/LineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/LineConverter.java new file mode 100644 index 0000000..4ec03d8 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/LineConverter.java @@ -0,0 +1,47 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.extractor.TextExtractor; +import com.pnikosis.html2markdown.md.MDLines; +import org.jsoup.nodes.Element; + +public abstract class LineConverter { + public static MDLines convert(Element element, TextExtractor textExtractor) { + LineConverter lineConverter = getConverter(element); + return lineConverter.convert(textExtractor.extract(element.childNodes())); + } + + private static LineConverter getConverter(Element element) { + String tag = element.tagName(); + + switch (tag) { + case "div": + return new DivLineConverter(); + case "p": + return new PLineConverter(); + case "br": + return new BrLineConverter(); + case "strong": + case "b": + return new StrongLineConverter(); + case "em": + return new EmphasisLineConverter(); + case "hr": + return new HorizontalRuleLineConverter(); + case "a": + return new ALineConverter(element.attr("href"), element.attr("title")); + case "img": + return new ImgLineConverter(element.attr("src"), element.attr("alt"), element.attr("title")); + case "code": + return new CodeLineConverter(); + } + + if (tag.matches("^h[0-9]+$")) { + int level = Integer.valueOf(element.tagName().substring(1)); + return new HLineConverter(level); + } + + return new TextConverter(); + } + + protected abstract MDLines convert(String content); +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/PLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/PLineConverter.java new file mode 100644 index 0000000..dc3d943 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/PLineConverter.java @@ -0,0 +1,15 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class PLineConverter extends LineConverter { + @Override + public MDLines convert(String content) { + MDLines mdLines = new MDLines(); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, content)); + mdLines.add(new MDLine(MDLine.MDLineType.None, 0, "")); + return mdLines; + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/StrongLineConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/StrongLineConverter.java new file mode 100644 index 0000000..ee88a65 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/StrongLineConverter.java @@ -0,0 +1,12 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/* package */ class StrongLineConverter extends LineConverter { + + @Override + public MDLines convert(String content) { + return new MDLines().addLine(MDLine.MDLineType.None, 0, "**" + content + "**"); + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/converters/TextConverter.java b/src/main/java/com/pnikosis/html2markdown/converters/TextConverter.java new file mode 100644 index 0000000..51122bd --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/converters/TextConverter.java @@ -0,0 +1,11 @@ +package com.pnikosis.html2markdown.converters; + +import com.pnikosis.html2markdown.md.MDLine; +import com.pnikosis.html2markdown.md.MDLines; + +/*package*/ class TextConverter extends LineConverter { + @Override + protected MDLines convert(String content) { + return new MDLines().addLine(MDLine.MDLineType.None, 0, content); + } +} diff --git a/src/main/java/com/pnikosis/html2markdown/extractor/TextExtractor.java b/src/main/java/com/pnikosis/html2markdown/extractor/TextExtractor.java new file mode 100644 index 0000000..abd09d3 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/extractor/TextExtractor.java @@ -0,0 +1,8 @@ +package com.pnikosis.html2markdown.extractor; + +import java.util.List; +import org.jsoup.nodes.Node; + +public interface TextExtractor { + String extract(List nodes); +} diff --git a/src/main/java/com/pnikosis/html2markdown/MDLine.java b/src/main/java/com/pnikosis/html2markdown/md/MDLine.java similarity index 86% rename from src/main/java/com/pnikosis/html2markdown/MDLine.java rename to src/main/java/com/pnikosis/html2markdown/md/MDLine.java index 8f2c296..19943cb 100644 --- a/src/main/java/com/pnikosis/html2markdown/MDLine.java +++ b/src/main/java/com/pnikosis/html2markdown/md/MDLine.java @@ -1,17 +1,17 @@ -package com.pnikosis.html2markdown; +package com.pnikosis.html2markdown.md; public class MDLine { private int level = 0; private MDLineType type; - private StringBuilder content; + private String content; public MDLine(MDLineType type, int level, String content) { this.type = type; this.level = level; - this.content = new StringBuilder(content); + this.content = content.trim(); } - public MDLine create(String line) { + public static MDLine create(String line) { int spaces = 0; while ((spaces < line.length()) && (line.charAt(spaces) == ' ')) { spaces++; @@ -60,7 +60,7 @@ public MDLine create(String line) { return new MDLine(MDLineType.None, newLevel, content); } - public MDLineType getListTypeName() { + public MDLineType getLineType() { return type; } @@ -98,21 +98,21 @@ public String toString() { } public String getContent() { - return content.toString(); + return content; } public void append(String appendContent) { if (content.length() == 0) { - int i = 0; - while (i < appendContent.length() && Character.isWhitespace(appendContent.charAt(i))) { - i++; - } - content.append(appendContent.substring(i)); + content = appendContent.trim(); } else { - content.append(appendContent); + content = content + appendContent; } } + public boolean isEmpty() { + return level == 0 && content.isEmpty() && type.equals(MDLineType.None); + } + @Override public boolean equals(Object o) { return o instanceof MDLine && ((MDLine) o).type.equals(this.type); diff --git a/src/main/java/com/pnikosis/html2markdown/md/MDLines.java b/src/main/java/com/pnikosis/html2markdown/md/MDLines.java new file mode 100644 index 0000000..9ce0855 --- /dev/null +++ b/src/main/java/com/pnikosis/html2markdown/md/MDLines.java @@ -0,0 +1,122 @@ +package com.pnikosis.html2markdown.md; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +public class MDLines extends ArrayList { + public void add(MDLine.MDLineType lineType, int level, String content) { + this.add(new MDLine(lineType, level, content)); + } + + public MDLines addLine(MDLine mdLine) { + this.add(mdLine); + return this; + } + + public MDLines addLine(MDLine.MDLineType lineType, int level, String content) { + this.add(lineType, level, content); + return this; + } + + @Override + public boolean add(MDLine mdLine) { + if (mdLine.isEmpty()) { + if (size() > 0) { + MDLine lastLine = get(size() - 1); + return !lastLine.isEmpty() && super.add(mdLine); + } + } + return super.add(mdLine); + } + + @Override + public void add(int index, MDLine mdLine) { + rangeCheckForAdd(index); + + if (mdLine.isEmpty()) { + if (size() > index) { + boolean surroundedByEmpty = false; + if (index > 0) { + surroundedByEmpty = get(index - 1).isEmpty(); + } + if (index < size() - 1 && !surroundedByEmpty) { + surroundedByEmpty = get(index).isEmpty(); + } + if (surroundedByEmpty) { + return; + } + } + } + + super.add(index, mdLine); + } + + @Override + public boolean addAll(Collection collection) { + boolean startingEmpty = true; + if (size() > 0) { + startingEmpty = get(size() - 1).isEmpty(); + } + + List cleanCollection = cleanCollection(collection, startingEmpty); + return super.addAll(cleanCollection); + } + + @Override + public boolean addAll(int index, Collection collection) { + rangeCheckForAdd(index); + + boolean startingEmpty = false; + if (index > 0) { + startingEmpty = get(index - 1).isEmpty(); + } + boolean endingEmpty = get(index).isEmpty(); + + List cleanCollection = cleanCollection(collection, startingEmpty); + if (endingEmpty && cleanCollection.get(cleanCollection.size() - 1).isEmpty()) { + cleanCollection.remove(cleanCollection.size() - 1); + } + + return cleanCollection.size() != 0 && super.addAll(index, cleanCollection); + + } + + private List cleanCollection(Collection collection, boolean fromEmpty) { + List newLines = new ArrayList<>(); + + boolean previousEmpty = fromEmpty; + for (MDLine mdLine : collection) { + boolean currentEmpty = mdLine.isEmpty(); + if (!(previousEmpty && currentEmpty)) { + newLines.add(mdLine); + } + previousEmpty = currentEmpty; + } + + return newLines; + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + for (int i = 0; i < size(); i++) { + String line = get(i).toString().trim(); + result.append(line); + if (i < size() - 1) { + result.append("\n"); + } + } + + return result.toString(); + } + + private void rangeCheckForAdd(int index) { + if (index > size() || index < 0) + throw new IndexOutOfBoundsException(outOfBoundsMsg(index)); + } + + private String outOfBoundsMsg(int index) { + return "Index: " + index + ", Size: " + size(); + } +} diff --git a/src/test/groovy/com/pnikosis/html2markdown/converters/LineConverterSpec.groovy b/src/test/groovy/com/pnikosis/html2markdown/converters/LineConverterSpec.groovy new file mode 100644 index 0000000..a8a32db --- /dev/null +++ b/src/test/groovy/com/pnikosis/html2markdown/converters/LineConverterSpec.groovy @@ -0,0 +1,80 @@ +package com.pnikosis.html2markdown.converters + +import com.pnikosis.html2markdown.extractor.TextExtractor +import com.pnikosis.html2markdown.md.MDLines +import org.jsoup.nodes.Element +import org.jsoup.nodes.Node +import org.jsoup.nodes.TextNode +import org.jsoup.parser.Tag +import spock.lang.Specification +import spock.lang.Unroll + +class LineConverterSpec extends Specification { + private static final String BASE_URL = "" + private static final String CONTENT = "Some content" + private static final String CONTENT_2 = "More content" + + @Unroll + def 'on #headerTag should create a markdown header #mdPrefix correctly'() { + given: + Element element = new Element(Tag.valueOf(headerTag), BASE_URL); + element.append(CONTENT); + + when: + String text = LineConverter.convert(element, getTextExtractor()).toString(); + + then: + text.equals(mdPrefix + " " + CONTENT); + + where: + headerTag | mdPrefix + "h1" | "#" + "h2" | "##" + "h3" | "###" + "h4" | "###" + "h10" | "###" + } + + def 'should convert code blocks to indented text correctly'() { + given: + Element element = new Element(Tag.valueOf("code"), BASE_URL); + element.append(CONTENT) + + when: + MDLines lines = LineConverter.convert(element, getTextExtractor()) + + then: + lines.size() == 3 + lines.get(0).isEmpty() + lines.get(1).getLevel() == 1 + lines.get(1).getContent().equals(CONTENT) + lines.get(2).isEmpty() + } + + def 'should convert multi line code blocks to indented text correctly'() { + given: + Element element = new Element(Tag.valueOf("code"), BASE_URL); + element.text(CONTENT + System.lineSeparator() + CONTENT_2) + + when: + MDLines lines = LineConverter.convert(element, getTextExtractor()) + + then: + lines.size() == 4 + lines.get(0).isEmpty() + lines.get(1).getLevel() == 1 + lines.get(1).getContent().equals(CONTENT) + lines.get(2).getLevel() == 1 + lines.get(2).getContent().equals(CONTENT_2) + lines.get(3).isEmpty() + } + + def TextExtractor getTextExtractor() { + return new TextExtractor() { + @Override + String extract(List nodes) { + return ((TextNode) nodes.get(0)).getWholeText(); + } + } + } +} diff --git a/src/test/groovy/com/pnikosis/html2markdown/md/MDLineSpec.groovy b/src/test/groovy/com/pnikosis/html2markdown/md/MDLineSpec.groovy new file mode 100644 index 0000000..2574963 --- /dev/null +++ b/src/test/groovy/com/pnikosis/html2markdown/md/MDLineSpec.groovy @@ -0,0 +1,49 @@ +package com.pnikosis.html2markdown.md; + +import spock.lang.Specification +import spock.lang.Unroll; + +class MDLineSpec extends Specification { + private static final String CONTENT = "SOME CONTENT" + + @Unroll + def 'should create correctly a level #level line when having #spaces leading spaces'() { + given: + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < spaces; i++) { + sb.append(" "); + } + sb.append(CONTENT) + MDLine mdLine = MDLine.create(sb.toString()) + level = mdLine.getLevel(); + + where: + spaces | level + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 1 + 5 | 1 + 6 | 1 + 7 | 1 + } + + @Unroll + def 'should be unordered list #isOrdered when giving content in a list starting with #prefix'() { + given: + String lineContent = prefix + " " + CONTENT + MDLine mdLine = MDLine.create(lineContent) + isOrdered = mdLine.getLineType().equals(MDLine.MDLineType.Unordered) + + where: + prefix | isOrdered + "*" | true + "-" | true + "+" | true + "." | false + "" | false + "x" | false + } + +} diff --git a/src/test/groovy/com/pnikosis/html2markdown/md/MDLinesSpec.groovy b/src/test/groovy/com/pnikosis/html2markdown/md/MDLinesSpec.groovy new file mode 100644 index 0000000..742aea0 --- /dev/null +++ b/src/test/groovy/com/pnikosis/html2markdown/md/MDLinesSpec.groovy @@ -0,0 +1,224 @@ +package com.pnikosis.html2markdown.md; + +import spock.lang.Specification +import spock.lang.Unroll; + +class MDLinesSpec extends Specification { + def 'should not add two empty lines on empty array'() { + given: + MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines(); + + when: + mdLines.add(line) + mdLines.add(line) + + then: + mdLines.size() == 1 + mdLines.get(0).isEmpty() + } + + def 'should not add consecutive empty lines'() { + given: + MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines(); + + when: + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(line) + mdLines.add(line) + + then: + mdLines.size() == 2 + } + + def 'should add non consecutive empty lines'() { + given: + MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines(); + + when: + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(line) + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(line) + + then: + mdLines.size() == 4 + } + + def 'should add on index 0 an empty line'() { + given: + MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines(); + + when: + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(line) + mdLines.add(MDLine.MDLineType.None, 0, "some more content"); + mdLines.add(0, line) + + then: + mdLines.size() == 4 + mdLines.get(0).isEmpty() + mdLines.get(1).getContent().equals("some content") + mdLines.get(2).isEmpty() + mdLines.get(3).getContent().equals("some more content") + } + + @Unroll + def 'should not add on indexed insertion #pos empty line'() { + given: + MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines(); + + when: + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(line) + mdLines.add(MDLine.MDLineType.None, 0, "some more content"); + mdLines.add(index, line) + + then: + mdLines.size() == 3 + + where: + pos | index + "before" | 1 + "after" | 2 + } + + def 'should not add empty lines on addAll() in empty MDLines'() { + given: + List lines = new ArrayList<>(); + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + MDLines mdLines = new MDLines() + + when: + mdLines.addAll(lines) + + then: + lines.size() == 3 + mdLines.size() == 0 + } + + def 'should not add duplicated consecutive empty lines on addAll()'() { + given: + List lines = new ArrayList<>(); + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "extra content") + MDLines mdLines = new MDLines().addLine(MDLine.MDLineType.None, 0, "some content") + + when: + mdLines.addAll(lines) + + then: + mdLines.size() == 3 + mdLines.get(0).getContent().equals("some content") + mdLines.get(1).isEmpty() + mdLines.get(2).getContent().equals("extra content") + } + + def 'should not add duplicated consecutive empty lines in between on addAll()'() { + given: + List lines = new ArrayList<>(); + lines << new MDLine(MDLine.MDLineType.None, 0, "first line") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "last line") + MDLines mdLines = new MDLines() + + when: + mdLines.addAll(lines) + + then: + lines.size() == 5 + mdLines.size() == 3 + } + + def 'should not add indexed duplicated consecutive empty lines in between on addAll()'() { + given: + MDLine emptyLine = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines() + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(emptyLine) + mdLines.add(MDLine.MDLineType.None, 0, "some more content"); + mdLines.add(emptyLine) + List lines = new ArrayList<>(); + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "first line") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "last line") + + when: + mdLines.addAll(2, lines) + + then: + mdLines.size() == 7 + mdLines.get(0).getContent().equals("some content") + mdLines.get(1).getContent().equals("") + mdLines.get(2).getContent().equals("first line") + mdLines.get(3).getContent().equals("") + mdLines.get(4).getContent().equals("last line") + mdLines.get(5).getContent().equals("some more content") + mdLines.get(6).getContent().equals("") + } + + def 'should not add indexed empty consecutive lines in between on addAll()'() { + given: + MDLine emptyLine = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines() + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(emptyLine) + mdLines.add(MDLine.MDLineType.None, 0, "some more content"); + mdLines.add(emptyLine) + List lines = new ArrayList<>(); + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + + when: + mdLines.addAll(2, lines) + + then: + mdLines.size() == 4 + mdLines.get(0).getContent().equals("some content") + mdLines.get(1).getContent().equals("") + mdLines.get(2).getContent().equals("some more content") + mdLines.get(3).getContent().equals("") + } + + def 'should not add indexed trailing empty consecutive lines in between on addAll()'() { + given: + MDLine emptyLine = new MDLine(MDLine.MDLineType.None, 0, ""); + MDLines mdLines = new MDLines() + mdLines.add(MDLine.MDLineType.None, 0, "some content"); + mdLines.add(emptyLine) + mdLines.add(MDLine.MDLineType.None, 0, "some more content"); + mdLines.add(emptyLine) + List lines = new ArrayList<>(); + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "interesting line") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + lines << new MDLine(MDLine.MDLineType.None, 0, "") + + when: + mdLines.addAll(1, lines) + + then: + mdLines.size() == 6 + mdLines.get(0).getContent().equals("some content") + mdLines.get(1).getContent().equals("") + mdLines.get(2).getContent().equals("interesting line") + mdLines.get(3).getContent().equals("") + mdLines.get(4).getContent().equals("some more content") + mdLines.get(5).getContent().equals("") + } +}