Skip to content

Commit

Permalink
Repo // dotnet-format.
Browse files Browse the repository at this point in the history
  • Loading branch information
ShikiSuen committed Feb 13, 2025
1 parent 86b43a0 commit c7975f7
Show file tree
Hide file tree
Showing 11 changed files with 2,527 additions and 2,446 deletions.
143 changes: 75 additions & 68 deletions Megrez.Tests/LMDataForTests.cs
Original file line number Diff line number Diff line change
@@ -1,91 +1,98 @@
// CSharpened and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// CSharpened and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Walking algorithm (Dijkstra) implemented by (c) 2025 and onwards The vChewing Project (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)

using System.Collections.Generic;
using System.Linq;

using static System.String;
// ReSharper disable InconsistentNaming

namespace Megrez.Tests {

public class SimpleLM : LangModelProtocol {
private Dictionary<string, List<Unigram>> _database = new();
public string separator { get; set; }
public SimpleLM(string input, bool swapKeyValue = false, string separator = "") {
this.separator = separator;
List<string> sStream = new(input.Split('\n'));
sStream.ForEach(line => {
if (IsNullOrEmpty(line) || line.FirstOrDefault().CompareTo('#') == 0) return;
List<string> lineStream = new(line.Split(' '));
if (lineStream.Count < 2) return;
string col0 = lineStream[0]; // 假設其不為 nil
string col1 = lineStream[1]; // 假設其不為 nil
double col2 = 0; // 防呆
if (lineStream.Count >= 3 && double.TryParse(lineStream[2], out double number)) col2 = number;
string key;
string value;
if (swapKeyValue) {
key = col1;
value = col0;
} else {
key = col0;
value = col1;
}
Unigram u = new(value, col2);
if (!_database.ContainsKey(key)) _database.Add(key, new());
_database[key].Add(u);
});
}
public bool HasUnigramsFor(List<string> keyArray) => _database.ContainsKey(keyArray.Joined(separator: separator));
public List<Unigram> UnigramsFor(List<string> keyArray) =>
_database.ContainsKey(keyArray.Joined(separator: separator)) ? _database[keyArray.Joined(separator: separator)]
: new();
public void Trim(string key, string value) {
if (!_database.TryGetValue(key, out List<Unigram>? arr)) return;
public class SimpleLM : LangModelProtocol {
private Dictionary<string, List<Unigram>> _database = new();
public string separator { get; set; }
public SimpleLM(string input, bool swapKeyValue = false, string separator = "") {
this.separator = separator;
List<string> sStream = new(input.Split('\n'));
sStream.ForEach(line => {
if (IsNullOrEmpty(line) || line.FirstOrDefault().CompareTo('#') == 0)
return;
List<string> lineStream = new(line.Split(' '));
if (lineStream.Count < 2)
return;
string col0 = lineStream[0]; // 假設其不為 nil
string col1 = lineStream[1]; // 假設其不為 nil
double col2 = 0; // 防呆
if (lineStream.Count >= 3 && double.TryParse(lineStream[2], out double number))
col2 = number;
string key;
string value;
if (swapKeyValue) {
key = col1;
value = col0;
} else {
key = col0;
value = col1;
}
Unigram u = new(value, col2);
if (!_database.ContainsKey(key))
_database.Add(key, new());
_database[key].Add(u);
});
}
public bool HasUnigramsFor(List<string> keyArray) => _database.ContainsKey(keyArray.Joined(separator: separator));
public List<Unigram> UnigramsFor(List<string> keyArray) =>
_database.ContainsKey(keyArray.Joined(separator: separator)) ? _database[keyArray.Joined(separator: separator)]
: new();
public void Trim(string key, string value) {
if (!_database.TryGetValue(key, out List<Unigram>? arr))
return;

if (arr is not {} theArr) return;
theArr = theArr.Where(x => x.Value != value).ToList();
if (theArr.IsEmpty()) {
_database.Remove(key);
return;
if (arr is not { } theArr)
return;
theArr = theArr.Where(x => x.Value != value).ToList();
if (theArr.IsEmpty()) {
_database.Remove(key);
return;
}
_database[key] = theArr;
}
_database[key] = theArr;
}
}

public class MockLM : LangModelProtocol {
public bool HasUnigramsFor(List<string> keyArray) => !IsNullOrEmpty(keyArray.Joined());
public List<Unigram> UnigramsFor(List<string> keyArray) => new() { new(value: keyArray.Joined(), score: -1) };
}
public class MockLM : LangModelProtocol {
public bool HasUnigramsFor(List<string> keyArray) => !IsNullOrEmpty(keyArray.Joined());
public List<Unigram> UnigramsFor(List<string> keyArray) => new() { new(value: keyArray.Joined(), score: -1) };
}

public class TestLM : LangModelProtocol {
public bool HasUnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo";
public List<Unigram> UnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo"
? new() { new(keyArray.Joined(), -1) }
: new List<Unigram>();
}
public class TestLM : LangModelProtocol {
public bool HasUnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo";
public List<Unigram> UnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo"
? new() { new(keyArray.Joined(), -1) }
: new List<Unigram>();
}

public class TestLMForRanked : LangModelProtocol {
public bool HasUnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo";
public List<Unigram> UnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo"
? new() { new("middle", -5), new("highest", -2),
public class TestLMForRanked : LangModelProtocol {
public bool HasUnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo";
public List<Unigram> UnigramsFor(List<string> keyArray) => keyArray.Joined() == "foo"
? new() { new("middle", -5), new("highest", -2),
new("lowest", -10) }
: new List<Unigram>();
}
: new List<Unigram>();
}

public class TestDataClass {
public static string StrStressData =
@"
public class TestDataClass {
public static string StrStressData =
@"
yi1 一 -2.08170692
yi1-yi1 一一 -4.38468400
";

public static string StrEmojiSampleData =
@"
public static string StrEmojiSampleData =
@"
gao1 高 -2.9396
re4 熱 -3.6024
gao1re4 高熱 -6.526
Expand All @@ -105,8 +112,8 @@ feng1 蜂 -4.6231
";

public static string StrSampleData =
@"
public static string StrSampleData =
@"
#
# 下述詞頻資料取自 libTaBE 資料庫 (https://sourceforge.net/projects/libtabe/)
# (2002 最終版). 該專案於 1999 年由 Pai-Hsiang Hsiao 發起、以 BSD 授權發行。
Expand Down Expand Up @@ -201,8 +208,8 @@ yu4 育 -3.30192952
";

public static string StrSampleDataLitch =
@"
public static string StrSampleDataLitch =
@"
nai3ji1 荔枝 -4.73
nai3ji1 奶積 -9.399
nai3 乃 -5.262
Expand Down Expand Up @@ -316,5 +323,5 @@ ji1 楫 -9.543
ji1 膣 -9.543
";
}
}
}
76 changes: 40 additions & 36 deletions Megrez.Tests/MegrezImplForTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,46 @@
using System.Linq;

namespace Megrez.Tests {
internal static class Utils {
/// <summary>
/// 返回在當前位置的所有候選字詞(以詞音配對的形式)。<para/>如果組字器內有幅位、且游標
/// 位於組字器的(文字輸入順序的)最前方(也就是游標位置的數值是最大合規數值)的
/// 話,那麼這裡會用到 location - 1、以免去在呼叫該函式後再處理的麻煩。
/// </summary>
/// <param name="self">組字器。</param>
/// <param name="location">游標位置。</param>
/// <param name="filter">候選字音配對陣列。</param>
/// <returns></returns>
public static List<KeyValuePaired> FetchCandidatesDeprecatedAt(
this Compositor self, int location,
Compositor.CandidateFetchFilter filter = Compositor.CandidateFetchFilter.All) {
List<KeyValuePaired> result = new();
if (self.Keys.IsEmpty()) return result;
location = Math.Max(0, Math.Min(location, self.Keys.Count - 1));
// 按照讀音的長度(幅位長度)來給節點排序。
List<Compositor.NodeWithLocation> anchors = self.FetchOverlappingNodesAt(location);
string keyAtCursor = self.Keys[location];
anchors.ForEach(anchor => {
anchor.Node.Unigrams.ForEach(gram => {
switch (filter) {
case Compositor.CandidateFetchFilter.All:
if (!anchor.Node.KeyArray.Contains(keyAtCursor)) return;
break;
case Compositor.CandidateFetchFilter.BeginAt:
if (anchor.Node.KeyArray.First() != keyAtCursor) return;
break;
case Compositor.CandidateFetchFilter.EndAt:
if (anchor.Node.KeyArray.Last() != keyAtCursor) return;
break;
}
result.Add(new(anchor.Node.KeyArray, gram.Value));
internal static class Utils {
/// <summary>
/// 返回在當前位置的所有候選字詞(以詞音配對的形式)。<para/>如果組字器內有幅位、且游標
/// 位於組字器的(文字輸入順序的)最前方(也就是游標位置的數值是最大合規數值)的
/// 話,那麼這裡會用到 location - 1、以免去在呼叫該函式後再處理的麻煩。
/// </summary>
/// <param name="self">組字器。</param>
/// <param name="location">游標位置。</param>
/// <param name="filter">候選字音配對陣列。</param>
/// <returns></returns>
public static List<KeyValuePaired> FetchCandidatesDeprecatedAt(
this Compositor self, int location,
Compositor.CandidateFetchFilter filter = Compositor.CandidateFetchFilter.All) {
List<KeyValuePaired> result = new();
if (self.Keys.IsEmpty())
return result;
location = Math.Max(0, Math.Min(location, self.Keys.Count - 1));
// 按照讀音的長度(幅位長度)來給節點排序。
List<Compositor.NodeWithLocation> anchors = self.FetchOverlappingNodesAt(location);
string keyAtCursor = self.Keys[location];
anchors.ForEach(anchor => {
anchor.Node.Unigrams.ForEach(gram => {
switch (filter) {
case Compositor.CandidateFetchFilter.All:
if (!anchor.Node.KeyArray.Contains(keyAtCursor))
return;
break;
case Compositor.CandidateFetchFilter.BeginAt:
if (anchor.Node.KeyArray.First() != keyAtCursor)
return;
break;
case Compositor.CandidateFetchFilter.EndAt:
if (anchor.Node.KeyArray.Last() != keyAtCursor)
return;
break;
}
result.Add(new(anchor.Node.KeyArray, gram.Value));
});
});
});
return result;
return result;
}
}
}
}
Loading

0 comments on commit c7975f7

Please sign in to comment.