Skip to content

Commit

Permalink
Handle nsnumber score values (#94)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZachNagengast authored Apr 28, 2024
1 parent 0a606f5 commit fc65432
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions Sources/Tokenizers/UnigramTokenizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// Copyright © 2024 Hugging Face. All rights reserved.
//

import Foundation
import Hub

class UnigramTokenizer: PreTrainedTokenizerModel {
Expand Down Expand Up @@ -37,8 +38,20 @@ class UnigramTokenizer: PreTrainedTokenizerModel {
}

vocab = try configVocab.map { piece in
guard let token = piece.first as? String else { throw TokenizerError.malformedVocab }
guard let score = piece.last as? Float else { throw TokenizerError.malformedVocab }
guard let token = piece.first as? String,
let scoreValue = piece.last else {
throw TokenizerError.malformedVocab
}

let score: Float
if let floatScore = scoreValue as? Float {
score = floatScore
} else if let numberScore = scoreValue as? NSNumber {
score = numberScore.floatValue
} else {
throw TokenizerError.malformedVocab
}

return SentencePieceToken(token: token, score: score)
}

Expand Down

0 comments on commit fc65432

Please sign in to comment.