-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathProgram.cs
More file actions
134 lines (113 loc) · 4.47 KB
/
Program.cs
File metadata and controls
134 lines (113 loc) · 4.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
namespace EpubToSplitTxt;
class Program
{
static async Task Main(string[] args)
{
Console.WriteLine("=== Epub 转文本与章节切分系统 ===");
Console.WriteLine();
try
{
// 加载配置
var config = LoadConfiguration();
// 确保目录存在
EnsureDirectoriesExist(config.Paths);
// 查找所有 Epub 文件
var epubFiles = Directory.GetFiles(config.Paths.RawEpubFolder, "*.epub", SearchOption.TopDirectoryOnly);
if (epubFiles.Length == 0)
{
Console.WriteLine($"[WARN] 在 {config.Paths.RawEpubFolder} 中未找到任何 .epub 文件");
Console.WriteLine("请将 Epub 文件放入该目录后重试。");
return;
}
Console.WriteLine($"[INFO] 找到 {epubFiles.Length} 个 Epub 文件");
Console.WriteLine();
// 初始化组件
var converter = new EpubConverter();
var splitter = new TextSplitter(
config.Splitter.ChapterRegex,
config.Splitter.SectionRegex,
config.Splitter.MinChapterLength);
// 处理每个 Epub 文件
foreach (var epubPath in epubFiles)
{
await ProcessEpubFile(epubPath, converter, splitter, config);
Console.WriteLine();
}
Console.WriteLine("[INFO] 所有文件处理完成!");
}
catch (Exception ex)
{
Console.WriteLine($"[ERROR] 发生错误: {ex.Message}");
Console.WriteLine($"详细信息: {ex.StackTrace}");
}
}
/// <summary>
/// 加载配置文件
/// </summary>
private static AppSettings LoadConfiguration()
{
var basePath = AppContext.BaseDirectory;
var configuration = new ConfigurationBuilder()
.SetBasePath(basePath)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: false)
.Build();
var settings = new AppSettings();
configuration.Bind(settings);
// 将相对路径转换为绝对路径(基于程序所在目录)
settings.Paths.RawEpubFolder = GetAbsolutePath(basePath, settings.Paths.RawEpubFolder);
settings.Paths.IntermediateTxtFolder = GetAbsolutePath(basePath, settings.Paths.IntermediateTxtFolder);
settings.Paths.SplitOutputFolder = GetAbsolutePath(basePath, settings.Paths.SplitOutputFolder);
return settings;
}
/// <summary>
/// 将相对路径转换为绝对路径
/// </summary>
private static string GetAbsolutePath(string basePath, string path)
{
if (Path.IsPathRooted(path))
{
return path;
}
return Path.GetFullPath(Path.Combine(basePath, path));
}
/// <summary>
/// 确保所有必要的目录存在
/// </summary>
private static void EnsureDirectoriesExist(PathsConfig paths)
{
Directory.CreateDirectory(paths.RawEpubFolder);
Directory.CreateDirectory(paths.IntermediateTxtFolder);
Directory.CreateDirectory(paths.SplitOutputFolder);
}
/// <summary>
/// 处理单个 Epub 文件
/// </summary>
private static async Task ProcessEpubFile(
string epubPath,
EpubConverter converter,
TextSplitter splitter,
AppSettings config)
{
var startTime = DateTime.Now;
string fileName = Path.GetFileNameWithoutExtension(epubPath);
Console.WriteLine($"[INFO] ========== 处理: {fileName} ==========");
try
{
// 阶段 1: Epub 转纯文本
string intermediatePath = Path.Combine(config.Paths.IntermediateTxtFolder, $"{fileName}_全本.txt");
var (chapterCount, totalChars) = await converter.ConvertToTextAsync(epubPath, intermediatePath);
// 阶段 2: 章节切分
var (fileCount, avgChapterLength) = await splitter.SplitTextAsync(
intermediatePath,
config.Paths.SplitOutputFolder,
fileName);
var elapsed = DateTime.Now - startTime;
Console.WriteLine($"[INFO] 处理完成,耗时: {elapsed.TotalSeconds:F2} 秒");
}
catch (Exception ex)
{
Console.WriteLine($"[ERROR] Epub 文件处理失败: {epubPath}");
Console.WriteLine($"[ERROR] 原因: {ex.Message}");
}
}
}