Skip to content

Commit

Permalink
Attempt to rule out maxSpanLength, etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
ShikiSuen committed Feb 11, 2025
1 parent ad37495 commit b104175
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 56 deletions.
28 changes: 27 additions & 1 deletion Megrez/src/0_CSharpExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,39 @@ public struct BRange : IEnumerable<int> {
public int Lowerbound { get; }
public int Upperbound { get; }
public BRange(int lowerbound, int upperbound) {
Lowerbound = lowerbound;
Upperbound = (upperbound < lowerbound) ? lowerbound : upperbound;
}

public List<int> ToList() {
List<int> result = new();
for (int i = Lowerbound; i <= Upperbound; i++) {
result.Add(i);
}
return result;
}

public IEnumerable<EnumeratedItem<int>> Enumerated() => ToList().Enumerated();

IEnumerator<int> IEnumerable<int>.GetEnumerator() => ToList().GetEnumerator();

IEnumerator IEnumerable.GetEnumerator() => ToList().GetEnumerator();
}

/// <summary>
/// 一個「可以返回整數的上下限」的自訂 Range 類型。該類型允許邊界顛倒。
/// </summary>
public struct BRangeSwappable : IEnumerable<int> {
public int Lowerbound { get; }
public int Upperbound { get; }
public BRangeSwappable(int lowerbound, int upperbound) {
Lowerbound = Math.Min(lowerbound, upperbound);
Upperbound = Math.Max(lowerbound, upperbound);
}

public List<int> ToList() {
List<int> result = new();
for (int i = Lowerbound; i < Upperbound; i++) {
for (int i = Lowerbound; i <= Upperbound; i++) {
result.Add(i);
}
return result;
Expand Down
73 changes: 25 additions & 48 deletions Megrez/src/1_Compositor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,6 @@ public enum ResizeBehavior {
/// </summary>
public CompositorConfig Config = new();

private static int _maxSpanLength = 10;
/// <summary>
/// 一個幅位單元內所能接受的最長的節點幅位長度。
/// </summary>
public static int MaxSpanLength {
get => _maxSpanLength;
set => _maxSpanLength = Math.Max(6, value);
}

/// <summary>
/// 公用變數,在生成索引鍵字串時用來分割每個索引鍵單位。最好是鍵盤無法直接敲的 ASCII 字元。
/// </summary>
Expand Down Expand Up @@ -226,7 +217,7 @@ public bool InsertKey(string key) {
if (string.IsNullOrEmpty(key) || key == Separator) return false;
if (!TheLangModel.HasUnigramsFor(new() { key })) return false;
Keys.Insert(Cursor, key);
List<SpanUnit> gridBackup = Spans;
List<SpanUnit> gridBackup = Spans.Select(x => x.HardCopy()).ToList();
ResizeGridAt(Cursor, ResizeBehavior.Expand);
int nodesInserted = Update();
// 用來在 langModel.HasUnigramsFor() 結果不準確的時候防呆、恢復被搞壞的 Spans。
Expand Down Expand Up @@ -351,6 +342,7 @@ public string DumpDOT() {
/// <param name="action">指定是擴張還是縮減一個幅位。</param>
internal void ResizeGridAt(int location, ResizeBehavior action) {
location = Math.Max(Math.Min(location, Spans.Count), 0); // 防呆。
int oldContainerLength = Spans.Count;
switch (action) {
case ResizeBehavior.Expand:
Spans.Insert(location, new());
Expand All @@ -361,7 +353,9 @@ internal void ResizeGridAt(int location, ResizeBehavior action) {
Spans.RemoveAt(location);
break;
}
DropWreckedNodesAt(location);
int newContainerLength = Spans.Count;
int sizeDelta = newContainerLength - oldContainerLength;
DropWreckedNodesAt(location, sizeDelta);
}

/// <summary>
Expand Down Expand Up @@ -397,42 +391,22 @@ internal void ResizeGridAt(int location, ResizeBehavior action) {
/// </code>
/// </summary>
/// <param name="location">給定的幅位座標。</param>
internal void DropWreckedNodesAt(int location) {
/// <param name="sizeDelta">在需要呼叫這個方法時,Grid 的長度變化量(可為負數)。</param>
internal void DropWreckedNodesAt(int location, int sizeDelta) {
int oldContainerLength = Spans.Count - sizeDelta;
if (oldContainerLength < 0) return;
location = Math.Max(Math.Min(location, Spans.Count), 0); // 防呆。
if (Spans.IsEmpty()) return;
int affectedLength = MaxSpanLength - 1;
int affectedLength = oldContainerLength - 1;
int begin = Math.Max(0, location - affectedLength);
if (location < begin) return;
foreach (int delta in new BRange(begin, location)) {
foreach (int theLength in new BRange(location - delta + 1, MaxSpanLength)) {
if (location - 1 < begin) return;
foreach (int delta in new BRange(begin, location - 1)) {
foreach (int theLength in new BRange(location - delta + 1, oldContainerLength)) {
Spans[delta].Nodes.Remove(theLength);
}
}
}

/// <summary>
/// 自索引鍵陣列獲取指定範圍的資料。
/// </summary>
/// <param name="range">指定範圍。</param>
/// <returns>拿到的資料。</returns>
private List<string> GetJoinedKeyArray(BRange range) =>
range.Upperbound <= Keys.Count && range.Lowerbound >= 0
? Keys.GetRange(range.Lowerbound, range.Upperbound - range.Lowerbound).ToList()
: new();

/// <summary>
/// 在指定位置(以指定索引鍵陣列和指定幅位長度)拿取節點。
/// </summary>
/// <param name="location">指定游標位置。</param>
/// <param name="length">指定幅位長度。</param>
/// <param name="keyArray">指定索引鍵陣列。</param>
/// <returns>拿取的節點。拿不到的話就會是 null。</returns>
private Node? GetNodeAt(int location, int length, List<string> keyArray) {
location = Math.Max(Math.Min(location, Spans.Count - 1), 0); // 防呆。
return Spans[location].NodeOf(length) is not {}
node ? null : node.KeyArray.SequenceEqual(keyArray) ? node : null;
}

/// <summary>
/// 根據當前狀況更新整個組字器的節點文脈。
/// </summary>
Expand All @@ -442,19 +416,22 @@ private List<string> GetJoinedKeyArray(BRange range) =>
/// </param>
/// <returns>新增或影響了多少個節點。如果返回「0」則表示可能發生了錯誤。 </returns>
public int Update(bool updateExisting = false) {
BRange range = new(Math.Max(0, Cursor - MaxSpanLength), Math.Min(Cursor + MaxSpanLength, Keys.Count));
int nodesChanged = 0;
foreach (int position in range) {
foreach (int theLength in new BRange(1, Math.Min(MaxSpanLength, range.Upperbound - position) + 1)) {
List<string> joinedKeyArray = GetJoinedKeyArray(new(position, position + theLength));
BRange safeLocationRange = new(0, Spans.Count);
Node? theNode = safeLocationRange.Contains(position) ? GetNodeAt(position, theLength, joinedKeyArray) : null;
if (theNode is {}) {
int spansCount = Spans.Count;
if (spansCount < 1) return 0;
foreach (int position in new BRange(0, Spans.Count - 1)) {
int upperBound4Lengths = spansCount - position;
if (upperBound4Lengths < 1) continue;
foreach (int theLength in new BRange(1, upperBound4Lengths)) {
if (!(position + theLength <= Keys.Count && position >= 0)) continue;
List<string> joinedKeyArray = Keys.GetRange(position, theLength);
if (Spans[position].NodeOf(theLength) is {} theNode) {
if (!updateExisting) continue;
List<Unigram> unigramsA = TheLangModel.UnigramsFor(joinedKeyArray);
// 自動銷毀無效的節點。
if (unigramsA.IsEmpty()) {
if (theNode.KeyArray.Count == 1) continue;
Spans[position].Nullify(givenNode: theNode);
Spans[position].Nodes.Remove(theNode.SpanLength);
} else {
theNode.SyncingUnigramsFrom(unigramsA);
}
Expand Down Expand Up @@ -500,7 +477,7 @@ public int Cursor {
get => _cursor;
set {
_cursor = Math.Max(0, Math.Min(value, Length));
_marker = Cursor; // 同步当前游标至标记器。
_marker = Cursor; // 同步当前游标至标记器。
}
}

Expand Down
8 changes: 1 addition & 7 deletions Megrez/src/4_SpanUnit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,6 @@ public bool Append(Node node) {
return true;
}

/// <summary>
/// 丟掉任何與給定節點完全雷同的節點。
/// </summary>
/// <param name="givenNode">要參照的節點。</param>
public void Nullify(Node givenNode) => Nodes.Remove(givenNode.SpanLength);

/// <summary>
/// 以給定的幅位長度,在當前幅位單元內找出對應的節點。
/// </summary>
Expand Down Expand Up @@ -143,7 +137,7 @@ public List<NodeWithLocation> FetchOverlappingNodesAt(int givenLocation) {
}

// 再獲取以當前位置結尾或開頭的節點。
int begin = givenLocation - Math.Min(givenLocation, MaxSpanLength - 1);
int begin = givenLocation - Math.Min(givenLocation, Spans.Count - 1);
foreach (int theLocation in new BRange(begin, givenLocation)) {
int alpha = givenLocation - theLocation + 1;
int bravo = Spans[theLocation].MaxLength;
Expand Down

0 comments on commit b104175

Please sign in to comment.