Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .vs/ConsoleApp7/DesignTimeBuild/.dtbcache
Binary file not shown.
11 changes: 11 additions & 0 deletions .vs/VSWorkspaceState.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"ExpandedNodes": [
"",
"\\ConsoleApp7",
"\\ConsoleApp7\\bin",
"\\ConsoleApp7\\bin\\Debug",
"\\ConsoleApp7\\Properties"
],
"SelectedNode": "\\WordCount.sln",
"PreviewInSolutionExplorer": false
}
Binary file added .vs/WordCount/DesignTimeBuild/.dtbcache
Binary file not shown.
Binary file added .vs/WordCount/v15/.suo
Binary file not shown.
Empty file.
Binary file added .vs/WordCount/v15/Server/sqlite3/storage.ide
Binary file not shown.
Binary file not shown.
Binary file added .vs/WordCount/v15/Server/sqlite3/storage.ide-wal
Binary file not shown.
Binary file added .vs/slnx.sqlite
Binary file not shown.
6 changes: 6 additions & 0 deletions ConsoleApp7/App.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1" />
</startup>
</configuration>
49 changes: 49 additions & 0 deletions ConsoleApp7/Compare.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using System;

namespace WordCount
{
class Compare
{
//单词比较算法
public int compare(String str1, String str2)
{
int length1 = str1.Length;
int length2 = str2.Length;

int limit = Math.Min(length1, length2);

char[] a = str1.ToCharArray();
char[] b = str2.ToCharArray();

for (int i = 0; i < limit; i++)
{
char c1 = (char)(a[i] >= 'a' ? a[i] : (a[i] + 32));
char c2 = (char)(b[i] >= 'a' ? b[i] : (b[i] + 32));
if (c1 != c2)
{
return c1 - c2;
}
}

return length1 - length2;

}

public string[] Orderedwords(string[] word)
{

for (int i=0;i<word.Length-1;i++)
{
for(int j=0;j<word.Length-i-1;j++)
if (compare(word[j], word[j + 1]) > 0)
{
string temp=null;
temp = word[j];
word[j] = word[j + 1];
word[j + 1] = temp;
}
}
return word;
}
}
}
129 changes: 129 additions & 0 deletions ConsoleApp7/Do.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
using System;
using System.Collections;
using System.IO;
using System.Text.RegularExpressions;

namespace WordCount
{
class Do
{
int count = 0;//单词数
int zifushu;
int hangshu;
int geshu = 1;
int cihui;
string txt = null;
int[] times;
string[] wordArray;
public Hashtable hashtable;
Read read;
public void doing(string path, int wordlength, int outnum, string outpath)
{
read = new Read();

hashtable = new Hashtable();
txt = read.ReadText(path,wordlength);
for (int i = 0; i < read.word.Length; i++)
{ read.word[i] = read.word[i].ToLower(); }
times = new int[read.word.Length];
count = Regex.Matches(txt, "\\w+").Count;
wordArray = Regex.Split(txt, "\"[^\"]*\"");




for (int i = 0; i < read.word.Length; i++)
{
times[i] = 1;
}


for (int i = 0; i < read.words.Length; i++)
{

if (hashtable.ContainsKey(read.words[i]))
{
geshu = (int)hashtable[read.words[i]];
geshu++;
hashtable[read.words[i]] = geshu;

}
else
{
if (read.words[i] != "")//取出split产生的空字符
hashtable.Add(read.words[i], times[i]);
}
}

cihui = hashtable.Count;
zifushu = read.sum;
hangshu = read.row;

Console.WriteLine("单词数:" + count);//单词数
Console.WriteLine("字符数:" + read.sum);
Console.WriteLine("行数:" + read.row);
Console.WriteLine("词汇量:" + hashtable.Count);
Console.WriteLine("词组统计(词频优先字典序):");
ICollection key = hashtable.Keys;//放入集合
string[] wd = new string[hashtable.Count];
hashtable.Keys.CopyTo(wd, 0);
for(int i = 0; i < hashtable.Count-1; i++)
{
for(int j=0;j<hashtable.Count-i-1;j++)
{
if((int)hashtable[wd[j]]<(int)hashtable[wd[j+1]])
{
string temp = null;
temp = wd[j];
wd[j] = wd[j + 1];
wd[j + 1] = temp;
}
else if((int)hashtable[wd[j]] == (int)hashtable[wd[j + 1]])
{
if (new Compare().compare(wd[j], wd[j + 1]) > 0)
{
string temp = null;
temp = wd[j];
wd[j] = wd[j + 1];
wd[j + 1] = temp;
}
}

}
}
//wd = new Compare().Orderedwords(wd);
new Write().write(wd, outpath, count, zifushu, hangshu, cihui);
/*if (outnum > 0)
for (int i = 0; i < outnum; i++)
{

for(int j=i; j<i+wordlength;j++)
{ Console.Write(wd[j] + ":" + hashtable[wd[j]]); }
Console.WriteLine();
}*/
for (int i = 0; i < outnum; i++)
{
Console.WriteLine(wd[i] + ":" + hashtable[wd[i]]);
}


using (StreamWriter sw = new StreamWriter(outpath))
{
sw.WriteLine("单词数:" + count);//单词数
sw.WriteLine("字符数:" + zifushu);
sw.WriteLine("行数:" + hangshu);
sw.WriteLine("词汇量:" + cihui);
sw.WriteLine("词组频统计(词频优先字典序):");
for (int i = 0; i < wd.Length; i++)
{
sw.WriteLine(wd[i] + ": " + hashtable[wd[i]]);
}

sw.Close();
Console.ReadLine();

}
}
}
}

29 changes: 29 additions & 0 deletions ConsoleApp7/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

namespace WordCount
{
class Program
{
static void Main(string[] args)
{
int wordlength=1;
int outnum=0;
string outpath="/";
string path=null;
for(int i=0;i<args.Length;i++)
{
if (args[i] == "-l")//路径参数
{ path = args[i + 1]; i++; }
else if (args[i] == "-m")//参数设定统计的词组长度
{ wordlength = int.Parse(args[i+1]); i++; }
else if (args[i] == "-n")//参数设定输出的单词数量
{ outnum = int.Parse(args[i + 1]); i++; }
else if (args[i] == "-o")//参数设定生成文件的存储路径
{ outpath = args[i + 1]; i++; }


}
new Do().doing(path, wordlength, outnum, outpath);

}
}
}
36 changes: 36 additions & 0 deletions ConsoleApp7/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// 有关程序集的一般信息由以下
// 控制。更改这些特性值可修改
// 与程序集关联的信息。
[assembly: AssemblyTitle("ConsoleApp7")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("ConsoleApp7")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// 将 ComVisible 设置为 false 会使此程序集中的类型
//对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型
//请将此类型的 ComVisible 特性设置为 true。
[assembly: ComVisible(false)]

// 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID
[assembly: Guid("ccf097b7-a4ef-45f4-a051-f95e6387e7e0")]

// 程序集的版本信息由下列四个值组成:
//
// 主版本
// 次版本
// 生成号
// 修订号
//
// 可以指定所有值,也可以使用以下所示的 "*" 预置版本号和修订号
// 方法是按如下所示使用“*”: :
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
45 changes: 45 additions & 0 deletions ConsoleApp7/Read.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace WordCount
{
class Read
{
public int sum=0;//字符数
public int row = 0;
public String[] word;
public String[] words;

public String ReadText(String path,int wordlenth)
{
StreamReader sr = new StreamReader(path, Encoding.Default);
while(sr.Read()!=-1)
{
sum++;

}
row= sr.ReadToEnd().Split('\n').Length;

sr.BaseStream.Seek(0, SeekOrigin.Begin);//重置流指针
row = sr.ReadToEnd().Split('\n').Length;//行数统计
sr.BaseStream.Seek(0, SeekOrigin.Begin);
word = Regex.Split(sr.ReadToEnd(), @"\W+");//
words = new string[word.Length-wordlenth];
words[0] = word[1];


for(int i=0;i<word.Length-wordlenth;i++)
{
for (int j = i; j <= i+wordlenth-1; j++)
{ words[i] = words[i] + " "+word[j]; }
}
sr.BaseStream.Seek(0, SeekOrigin.Begin);//重置流指针
return sr.ReadToEnd();
}
}
}
33 changes: 33 additions & 0 deletions ConsoleApp7/Text.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace ConsoleApp7
{
class Text
{
public void fun()
{
var text = "Youth is asnot a time \nof (life";
string[] words = Regex.Split(text, @"\W+");//这句代码就是将文本转为单词,但是需要添加这一句引用 using System.Text.RegularExpressions;
var i = 0;
foreach (var word in words)
{
i++;
Console.WriteLine("第{0}个单词是:{1}", i, word);
}
for (int j = 0; j < words.Length; j++)
{
Console.WriteLine(words[j]);
if (j == words.Length - 1)
Console.WriteLine();
}
Console.WriteLine("这个句子由{0}个单词组成", words.Length);
Console.ReadKey();
}

}
}
Loading