我有不同的单词和短语的数组。 用户将输入垃圾邮件和我应该检查是否有已在阵列中的单词和短语的任何比赛。 对于每个匹配的分数将1和如果分数超过5然后它是一个垃圾邮件消息的可能性是肯定的。
我的成绩并不虽然增加,我不知道为什么。
string[] spam = new string[] {"-different words and phrases provided by programmer"};
Console.Write("Key in an email message: ");
string email = Console.ReadLine();
int score = 0;
string pattern = "^\\[a-zA-Z]";
Regex expression = new Regex(pattern);
var regexp = new System.Text.RegularExpressions.Regex(pattern);
if (!regexp.IsMatch(email))
{
score += 1;
}
static void Main(string[] args)
{
string[] spam = new string[] { "test", "ak", "admin", "againadmin" };
string email = "Its great to see that admin ak is not perfroming test.";
string email1 = "Its great to see that admin ak is not perfroming test againadmin.";
if (SpamChecker(spam, email))
{
Console.WriteLine("email spam");
}
else
{
Console.WriteLine("email not spam");
}
if (SpamChecker(spam, email1))
{
Console.WriteLine("email1 spam");
}
else
{
Console.WriteLine("email1 not spam");
}
Console.Read();
}
private static bool SpamChecker(string[] spam, string email)
{
int score = 0;
foreach (var item in spam)
{
score += Regex.Matches(email, item, RegexOptions.Compiled | RegexOptions.IgnoreCase).Count;
if (score > 3) // change count as per desired count
{
return true;
}
}
return false;
}
您可以使用LINQ来解决问题
// HashSet<String> is for better performance
HashSet<String> spamWords = new HashSet<String>(
"different words and phrases provided by programmer"
.Split(new Char[] {' '}, StringSplitOptions.RemoveEmptyEntries)
.Select(word => word.ToUpper()));
...
String eMail = "phrases, not words and letters zzz";
...
// score == 3: "phrases" + "words" + "and"
int score = Regex
.Matches(eMail, @"\w+")
.OfType<Match>()
.Select(match => match.Value.ToUpper())
.Sum(word => spamWords.Contains(word) ? 1 : 0);
在这个实现我在不区分大小写的方式寻找垃圾邮件的话(这样And
, and
, AND
将被计为垃圾邮件的话)。 取复数 , 英格斯 (即word
, wording
)考虑,你必须使用词干 。