");
+ }
+
+ [Fact]
+ public async Task ConvertAsync_EmailWithHtmlContent_ConvertsHtmlToMarkdown()
+ {
+ // Arrange
+ var htmlEmail = @"Date: Mon, 15 Jan 2024 10:30:00 +0000
+From: sender@example.com
+To: recipient@example.com
+Subject: HTML Test
+MIME-Version: 1.0
+Content-Type: text/html; charset=UTF-8
+
+
+
+Welcome
+This is bold text and italic text.
+
+
+";
+
+ var converter = new EmlConverter();
+ var bytes = Encoding.UTF8.GetBytes(htmlEmail);
+ using var stream = new MemoryStream(bytes);
+ var streamInfo = new StreamInfo(mimeType: "message/rfc822");
+
+ // Act
+ var result = await converter.ConvertAsync(stream, streamInfo);
+
+ // Assert
+ result.ShouldNotBeNull();
+ result.Markdown.ShouldNotBeNullOrWhiteSpace();
+ result.Title.ShouldBe("HTML Test");
+
+ // Check that HTML was converted to Markdown
+ result.Markdown.ShouldContain("# Welcome");
+ result.Markdown.ShouldContain("**bold**");
+ result.Markdown.ShouldContain("*italic*");
+ }
+
+ [Fact]
+ public async Task MarkItDown_ConvertAsync_EmlFile_WorksEndToEnd()
+ {
+ // Arrange
+ var markItDown = new global::MarkItDown.MarkItDown();
+ var bytes = Encoding.UTF8.GetBytes(SampleEmail);
+ using var stream = new MemoryStream(bytes);
+ var streamInfo = new StreamInfo(mimeType: "message/rfc822", extension: ".eml");
+
+ // Act
+ var result = await markItDown.ConvertAsync(stream, streamInfo);
+
+ // Assert
+ result.ShouldNotBeNull();
+ result.Markdown.ShouldNotBeNullOrWhiteSpace();
+ result.Title.ShouldBe("Important Project Update");
+ result.Markdown.ShouldContain("**Subject:** Important Project Update");
+ result.Markdown.ShouldContain("Hello Jane");
+ }
+}
\ No newline at end of file
diff --git a/tests/MarkItDown.Tests/NewConvertersTests.cs b/tests/MarkItDown.Tests/NewConvertersTests.cs
index 302811770..8aa7f9b20 100644
--- a/tests/MarkItDown.Tests/NewConvertersTests.cs
+++ b/tests/MarkItDown.Tests/NewConvertersTests.cs
@@ -147,6 +147,7 @@ public void AllNewConverters_HaveCorrectPriorities()
[InlineData(".pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation")]
[InlineData(".jpg", "image/jpeg")]
[InlineData(".png", "image/png")]
+ [InlineData(".eml", "message/rfc822")]
public void MarkItDown_RegistersNewConverters_CanHandleNewFormats(string extension, string mimeType)
{
// Arrange
@@ -160,4 +161,64 @@ public void MarkItDown_RegistersNewConverters_CanHandleNewFormats(string extensi
// Assert
canHandle.ShouldBeTrue($"Should have a converter that can handle {extension} files");
}
+
+ [Fact]
+ public void EmlConverter_AcceptsInput_ValidEmlExtension_ReturnsTrue()
+ {
+ // Arrange
+ var converter = new EmlConverter();
+ var streamInfo = new StreamInfo(mimeType: "message/rfc822", extension: ".eml");
+
+ // Act
+ var result = converter.AcceptsInput(streamInfo);
+
+ // Assert
+ result.ShouldBeTrue();
+ }
+
+ [Fact]
+ public void EmlConverter_AcceptsInput_InvalidExtension_ReturnsFalse()
+ {
+ // Arrange
+ var converter = new EmlConverter();
+ var streamInfo = new StreamInfo(mimeType: "text/plain", extension: ".txt");
+
+ // Act
+ var result = converter.AcceptsInput(streamInfo);
+
+ // Assert
+ result.ShouldBeFalse();
+ }
+
+ [Theory]
+ [InlineData(".eml", "message/rfc822")]
+ [InlineData(".eml", "message/email")]
+ [InlineData(".eml", "application/email")]
+ [InlineData(".eml", "text/email")]
+ public void EmlConverter_AcceptsInput_ValidMimeTypes_ReturnsTrue(string extension, string mimeType)
+ {
+ // Arrange
+ var converter = new EmlConverter();
+ var streamInfo = new StreamInfo(mimeType: mimeType, extension: extension);
+
+ // Act
+ var result = converter.AcceptsInput(streamInfo);
+
+ // Assert
+ result.ShouldBeTrue($"Should accept {extension} files with MIME type {mimeType}");
+ }
+
+ [Fact]
+ public void EmlConverter_Priority_IsBetweenPptxAndEpub()
+ {
+ // Arrange
+ var emlConverter = new EmlConverter();
+ var epubConverter = new EpubConverter();
+ var pptxConverter = new PptxConverter();
+
+ // Act & Assert
+ // Lower number = higher priority, so EML (240) should be between PPTX (230) and EPUB (250)
+ emlConverter.Priority.ShouldBeGreaterThan(pptxConverter.Priority);
+ emlConverter.Priority.ShouldBeLessThan(epubConverter.Priority);
+ }
}
From 87cfbba2ebecbd37fbe5d2cb38a9e8902d136986 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 28 Sep 2025 15:06:39 +0000
Subject: [PATCH 3/3] Fix priority comment clarity and extract FormatFileSize
to FileUtilities to avoid code duplication
Co-authored-by: KSemenenko <4385716+KSemenenko@users.noreply.github.com>
---
src/MarkItDown/Converters/EmlConverter.cs | 21 +++---------------
src/MarkItDown/Converters/ZipConverter.cs | 19 ++--------------
src/MarkItDown/FileUtilities.cs | 27 +++++++++++++++++++++++
3 files changed, 32 insertions(+), 35 deletions(-)
create mode 100644 src/MarkItDown/FileUtilities.cs
diff --git a/src/MarkItDown/Converters/EmlConverter.cs b/src/MarkItDown/Converters/EmlConverter.cs
index 1efc2d81e..e7068a1e9 100644
--- a/src/MarkItDown/Converters/EmlConverter.cs
+++ b/src/MarkItDown/Converters/EmlConverter.cs
@@ -30,7 +30,7 @@ public sealed class EmlConverter : IDocumentConverter
private readonly HtmlConverter _htmlConverter;
- public int Priority => 240; // Between EPUB and PPTX
+ public int Priority => 240; // Between PPTX (230) and EPUB (250) - lower numbers = higher priority
public EmlConverter()
{
@@ -196,13 +196,13 @@ private static List ExtractAttachmentInfo(MimeMessage message)
// Try to get size from Content-Length header or content disposition
if (part.ContentDisposition?.Size.HasValue == true)
{
- size = FormatFileSize(part.ContentDisposition.Size.Value);
+ size = FileUtilities.FormatFileSize(part.ContentDisposition.Size.Value);
}
else if (part.Headers.Contains("Content-Length"))
{
if (long.TryParse(part.Headers["Content-Length"], out var contentLength))
{
- size = FormatFileSize(contentLength);
+ size = FileUtilities.FormatFileSize(contentLength);
}
}
}
@@ -261,20 +261,5 @@ private static string EscapeMarkdown(string text)
// Don't escape angle brackets, parentheses, and other characters in email contexts
}
- private static string FormatFileSize(long bytes)
- {
- string[] sizes = { "bytes", "KB", "MB", "GB" };
- double len = bytes;
- int order = 0;
-
- while (len >= 1024 && order < sizes.Length - 1)
- {
- order++;
- len /= 1024;
- }
-
- return $"{len:0.##} {sizes[order]}";
- }
-
private sealed record AttachmentInfo(string Name, string ContentType, string Size);
}
\ No newline at end of file
diff --git a/src/MarkItDown/Converters/ZipConverter.cs b/src/MarkItDown/Converters/ZipConverter.cs
index b23cc3657..6666f0adc 100644
--- a/src/MarkItDown/Converters/ZipConverter.cs
+++ b/src/MarkItDown/Converters/ZipConverter.cs
@@ -151,7 +151,7 @@ private async Task ProcessZipEntry(ZipArchiveEntry entry, StringBuilder markdown
// Add basic file information
if (entry.Length > 0)
{
- markdown.AppendLine($"**Size:** {FormatFileSize(entry.Length)}");
+ markdown.AppendLine($"**Size:** {FileUtilities.FormatFileSize(entry.Length)}");
}
if (entry.LastWriteTime != DateTimeOffset.MinValue)
@@ -173,7 +173,7 @@ private async Task ProcessZipEntry(ZipArchiveEntry entry, StringBuilder markdown
const long maxFileSize = 50 * 1024 * 1024; // 50MB
if (entry.Length > maxFileSize)
{
- markdown.AppendLine($"*File too large to process ({FormatFileSize(entry.Length)})*");
+ markdown.AppendLine($"*File too large to process ({FileUtilities.FormatFileSize(entry.Length)})*");
markdown.AppendLine();
return;
}
@@ -253,19 +253,4 @@ private async Task ProcessZipEntry(ZipArchiveEntry entry, StringBuilder markdown
return null;
}
-
- private static string FormatFileSize(long bytes)
- {
- string[] sizes = { "B", "KB", "MB", "GB" };
- double len = bytes;
- int order = 0;
-
- while (len >= 1024 && order < sizes.Length - 1)
- {
- order++;
- len /= 1024;
- }
-
- return $"{len:0.##} {sizes[order]}";
- }
}
diff --git a/src/MarkItDown/FileUtilities.cs b/src/MarkItDown/FileUtilities.cs
new file mode 100644
index 000000000..2a1c56e28
--- /dev/null
+++ b/src/MarkItDown/FileUtilities.cs
@@ -0,0 +1,27 @@
+namespace MarkItDown;
+
+///
+/// Utility class for common file operations and formatting.
+///
+internal static class FileUtilities
+{
+ ///
+ /// Formats a file size in bytes to a human-readable string with appropriate units.
+ ///
+ /// The size in bytes.
+ /// A formatted string with the size and appropriate unit (B, KB, MB, GB).
+ public static string FormatFileSize(long bytes)
+ {
+ string[] sizes = { "B", "KB", "MB", "GB" };
+ double len = bytes;
+ int order = 0;
+
+ while (len >= 1024 && order < sizes.Length - 1)
+ {
+ order++;
+ len /= 1024;
+ }
+
+ return $"{len:0.##} {sizes[order]}";
+ }
+}
\ No newline at end of file