Trying to recognize the language of the Toot to set it correctly if none is provided (#633)

Probability threshold arbitrary set to 85%.
Seems to work enough for French, English and Spanish.
Also tested and with only a few words, probability is low unless there is a very language specific word
This commit is contained in:
Thomas Durand 2023-02-03 15:45:59 +01:00 committed by GitHub
parent 61cd21d28b
commit 2a1d1fc697
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
import DesignSystem import DesignSystem
import Env import Env
import Models import Models
import NaturalLanguage
import Network import Network
import PhotosUI import PhotosUI
import SwiftUI import SwiftUI
@ -126,6 +127,21 @@ public class StatusEditorViewModel: ObservableObject {
multiple: pollVotingFrequency.canVoteMultipleTimes, multiple: pollVotingFrequency.canVoteMultipleTimes,
expires_in: pollDuration.rawValue) expires_in: pollDuration.rawValue)
} }
if !hasExplicitlySelectedLanguage {
// Attempt language resolution using Natural Language
let recognizer = NLLanguageRecognizer()
recognizer.processString(statusText.string)
// Use languageHypotheses to get the probability with it
let hypotheses = recognizer.languageHypotheses(withMaximum: 1)
// Assert that 85% probability is enough :)
// A one word toot that is en/fr compatible is only ~50% confident, for instance
if let (language, probability) = hypotheses.first, probability > 0.85 {
// rawValue return the IETF BCP 47 language tag
selectedLanguage = language.rawValue
}
}
let data = StatusData(status: statusText.string, let data = StatusData(status: statusText.string,
visibility: visibility, visibility: visibility,
inReplyToId: mode.replyToStatus?.id, inReplyToId: mode.replyToStatus?.id,