Refactor DiacriticSupport to per-language class with statics in Companion

2025-02-20 19:56:51 +01:00 · 2024-06-25 14:39:22 +02:00 · 2024-06-25 14:39:22 +02:00 · ae779edbdb
commit ae779edbdb
parent 6ae85ca654
3 changed files with 79 additions and 92 deletions
--- a/core/src/com/unciv/models/translations/Translations.kt
+++ b/core/src/com/unciv/models/translations/Translations.kt
@ -119,8 +119,7 @@ class Translations : LinkedHashMap<String, TranslationEntry>() {
    }

    private fun createTranslations(language: String, languageTranslations: HashMap<String, String>) {
-        DiacriticSupport.prepareTranslationData(languageTranslations)
-        val noDiacritics = DiacriticSupport.noDiacritics()
+        val diacriticSupport = DiacriticSupport(languageTranslations).takeUnless { it.noDiacritics() }
        for ((key, value) in languageTranslations) {
            val hashKey = if (key.contains('[') && !key.contains('<'))
                key.getPlaceholderText()
@ -130,7 +129,7 @@ class Translations : LinkedHashMap<String, TranslationEntry>() {
                entry = TranslationEntry(key)
                this[hashKey] = entry
            }
-            entry[language] = if (noDiacritics) value else DiacriticSupport.remapDiacritics(value)
+            entry[language] = diacriticSupport?.remapDiacritics(value) ?: value
        }
    }

--- a/core/src/com/unciv/ui/components/fonts/DiacriticSupport.kt
+++ b/core/src/com/unciv/ui/components/fonts/DiacriticSupport.kt
@ -21,7 +21,7 @@ import org.jetbrains.annotations.VisibleForTesting
 *
 *  ### Usage
 *  - Call [reset] when translation loading starts over
- *  - Call [prepareTranslationData] once a translation file is read (their map of key (left of =) to translation (right of =) is in memory, pass that as argument)
+ *  - Instantiate [DiacriticSupport] through the constructor-like factory [invoke] once a translation file is read (their map of key (left of =) to translation (right of =) is in memory, pass that as argument)
 *  - Check [noDiacritics] - if true, the rest of that language load need not bother with diacritics
 *  - Call [remapDiacritics] on each translation and store the result instead of the original value
 *  - If you wish to save some memory, call [freeTranslationData] after all required languages are done
@ -29,10 +29,73 @@ import org.jetbrains.annotations.VisibleForTesting
 *
 *  ### Notes
 *  - [FontRulesetIcons] initialize ***after*** Translation loading. If this ever changes, this might need some tweaking.
+ *  - The primary constructor is only used from the [Companion.invoke] factory and for testing.
 */
-object DiacriticSupport {
-    /** Start at end of Unicode Private Use Area and go down from there: UShort is the preferred Char() constructor parameter! */
-    private const val startingReplacementCodepoint: UShort = 63743u // 0xF8FF
+class DiacriticSupport(range: CharRange, leftDiacritics: String, rightDiacritics: String, joinerDiacritics: String) {
+    companion object {
+        /** Start at end of Unicode Private Use Area and go down from there: UShort is the preferred Char() constructor parameter! */
+        private const val startingReplacementCodepoint: UShort = 63743u // 0xF8FF
+
+        private var nextFreeDiacriticReplacementCodepoint = startingReplacementCodepoint
+        private val fakeAlphabet = mutableMapOf<Char, String>()
+        private val inverseMap = mutableMapOf<String, Char>()
+
+        /** Prepares this for a complete start-over, expecting a language load to instantiate a DiacriticSupport next */
+        fun reset() {
+            fakeAlphabet.clear()
+            freeTranslationData()
+            nextFreeDiacriticReplacementCodepoint = startingReplacementCodepoint
+        }
+
+        /** This is the main engine for rendering text glyphs after the translation loader has filled up this `object`
+         *  @param  char The real or "fake alphabet" char stored by [remapDiacritics] to render
+         *  @return The one to many (probably 8 max) codepoint string to be rendered into a single glyph by native font services
+         */
+        fun getStringFor(char: Char) = fakeAlphabet[char] ?: char.toString()
+
+        /** Call when use of [remapDiacritics] is finished to save some memory */
+        fun freeTranslationData() {
+            for ((length, examples) in inverseMap.keys.groupBy { it.length }.toSortedMap()) {
+                Log.debug("Length %d - example %s", length, examples.first())
+            }
+            inverseMap.clear()
+        }
+
+        /** Other "fake" alphabets can use Unicode Private Use Areas from U+E000 up to including... */
+        fun getCurrentFreeCode() = Char(nextFreeDiacriticReplacementCodepoint)
+
+        /** If this is true, no need to bother [remapping chars at render time][getStringFor] */
+        fun isEmpty() = fakeAlphabet.isEmpty()
+
+        /** Factory that gets the primary constructor parameters by extracting the translation entries for [TranslationKeys] */
+        operator fun invoke(translations: HashMap<String, String>): DiacriticSupport {
+            val stripCommentRegex = """^"?(.*?)"?(?:\s*#.*)?$""".toRegex()
+            fun String?.parseDiacriticEntry(): String {
+                if (isNullOrEmpty()) return ""
+                val tokens = stripCommentRegex.matchEntire(this)!!.groupValues[1].splitToSequence(' ').toMutableList()
+                for (index in tokens.indices) {
+                    val token = tokens[index]
+                    when {
+                        token.length == 1 -> continue
+                        token.startsWith("u+", true) -> tokens[index] = Char(token.drop(2).toInt(16)).toString()
+                        tokens.size == 1 -> continue
+                        else -> throw IllegalArgumentException("Invalid diacritic definition: \"$token\" is not a single character or unicode codepoint notation")
+                    }
+                }
+                return tokens.joinToString("")
+            }
+
+            val rangeStart = translations[TranslationKeys.rangeStart].parseDiacriticEntry()
+            val rangeEnd = translations[TranslationKeys.rangeEnd].parseDiacriticEntry()
+            val range = if (rangeStart.isEmpty() || rangeEnd.isEmpty()) CharRange.EMPTY
+            else rangeStart.first()..rangeEnd.first()
+            val leftDiacritics = translations[TranslationKeys.left].parseDiacriticEntry()
+            val rightDiacritics = translations[TranslationKeys.right].parseDiacriticEntry()
+            val joinerDiacritics = translations[TranslationKeys.joiner].parseDiacriticEntry()
+
+            return DiacriticSupport(range, leftDiacritics, rightDiacritics, joinerDiacritics)
+        }
+    }

    private object TranslationKeys {
        const val rangeStart = "language_start_code"
@ -42,18 +105,11 @@ object DiacriticSupport {
        const val joiner = "left_and_right_joiners"
    }

-    //region The following fields can persist over loading multiple languages
-    private var nextFreeDiacriticReplacementCodepoint = startingReplacementCodepoint
-    private val fakeAlphabet = mutableMapOf<Char, String>()
-    private val inverseMap = mutableMapOf<String, Char>()
-    //endregion
-
-    //region These fields will only persist during one language load
-    private var noDiacritics = true
+    private val noDiacritics: Boolean
    private val charClassMap = mutableMapOf<Char, CharClass>()
    private var defaultCharClass = CharClass.None

-    private class LineData(capacity: Int) {
+    private inner class LineData(capacity: Int) {
        val output = StringBuilder(capacity)
        val accumulator = StringBuilder(9) // touhidurrr said there can be nine
        fun expectsJoin() = accumulator.isNotEmpty() && getCharClass(accumulator.last()).expectsRightJoin
@ -97,44 +153,11 @@ object DiacriticSupport {
        };
        abstract fun process(data: LineData, char: Char)
    }
-    //endregion
-
-    /** Prepares this for a complete start-over, expecting a language load calling [prepareTranslationData] next */
-    fun reset() {
-        fakeAlphabet.clear()
-        freeTranslationData()
-        nextFreeDiacriticReplacementCodepoint = startingReplacementCodepoint
-        defaultCharClass = CharClass.None
-    }
-
-    /** This is the main engine for rendering text glyphs after the translation loader has filled up this `object`
-     *  @param  char The real or "fake alphabet" char stored by [remapDiacritics] to render
-     *  @return The one to many (probably 8 max) codepoint string to be rendered into a single glyph by native font services
-     */
-    fun getStringFor(char: Char) = fakeAlphabet[char] ?: char.toString()
-
-    /** Call when use of [remapDiacritics] is finished to save some memory */
-    fun freeTranslationData() {
-        for ((length, examples) in inverseMap.keys.groupBy { it.length }.toSortedMap()) {
-            Log.debug("Length %d - example %s", length, examples.first())
-        }
-        inverseMap.clear()
-        charClassMap.clear()
-        noDiacritics = true
-    }
-
-    /** Other "fake" alphabets can use Unicode Private Use Areas from U+E000 up to including... */
-    fun getCurrentFreeCode() = Char(nextFreeDiacriticReplacementCodepoint)
-
-    /** If this is true, no need to bother [remapping chars at render time][getStringFor] */
-    fun isEmpty() = fakeAlphabet.isEmpty()

    @VisibleForTesting
    fun getKnownCombinations(): Set<String> = inverseMap.keys

-    //region Methods used during translation file loading
-
-    /** Set at [prepareTranslationData], if true the translation loader need not bother passing stuff through [remapDiacritics]. */
+    /** Set at instatiation, if true the translation loader need not bother passing stuff through [remapDiacritics]. */
    fun noDiacritics() = noDiacritics

    private fun getCharClass(char: Char) = charClassMap[char] ?: defaultCharClass
@ -149,40 +172,7 @@ object DiacriticSupport {
        return char
    }

-    /** Set up for a series of [remapDiacritics] calls for a specific language.
-     *  Extracts the translation entries for [TranslationKeys] and sets up [CharClass] [mappings][charClassMap] using them. */
-    fun prepareTranslationData(translations: HashMap<String, String>) {
-        val stripCommentRegex = """^"?(.*?)"?(?:\s*#.*)?$""".toRegex()
-        fun String?.parseDiacriticEntry(): String {
-            if (isNullOrEmpty()) return ""
-            val tokens = stripCommentRegex.matchEntire(this)!!.groupValues[1].splitToSequence(' ').toMutableList()
-            for (index in tokens.indices) {
-                val token = tokens[index]
-                when {
-                    token.length == 1 -> continue
-                    token.startsWith("u+", true) -> tokens[index] = Char(token.drop(2).toInt(16)).toString()
-                    tokens.size == 1 -> continue
-                    else -> throw IllegalArgumentException("Invalid diacritic definition: \"$token\" is not a single character or unicode codepoint notation")
-                }
-            }
-            return tokens.joinToString("")
-        }
-
-        noDiacritics = true
-        val rangeStart = translations[TranslationKeys.rangeStart].parseDiacriticEntry()
-        val rangeEnd = translations[TranslationKeys.rangeEnd].parseDiacriticEntry()
-        val range = if (rangeStart.isEmpty() || rangeEnd.isEmpty()) CharRange.EMPTY
-            else rangeStart.first()..rangeEnd.first()
-        val leftDiacritics = translations[TranslationKeys.left].parseDiacriticEntry()
-        val rightDiacritics = translations[TranslationKeys.right].parseDiacriticEntry()
-        val joinerDiacritics = translations[TranslationKeys.joiner].parseDiacriticEntry()
-        if (leftDiacritics.isNotEmpty() || rightDiacritics.isNotEmpty() || joinerDiacritics.isNotEmpty())
-            prepareTranslationData(range, leftDiacritics, rightDiacritics, joinerDiacritics)
-    }
-
-    @VisibleForTesting
-    fun prepareTranslationData(range: CharRange, leftDiacritics: String, rightDiacritics: String, joinerDiacritics: String) {
-        charClassMap.clear()
+    init {
        if (range.isEmpty()) {
            defaultCharClass = CharClass.Base
            charClassMap[' '] = CharClass.None
@ -193,7 +183,7 @@ object DiacriticSupport {
        for (char in leftDiacritics) charClassMap[char] = CharClass.LeftJoiner
        for (char in rightDiacritics) charClassMap[char] = CharClass.RightJoiner
        for (char in joinerDiacritics) charClassMap[char] = CharClass.LeftRightJoiner
-        noDiacritics = false
+        noDiacritics = leftDiacritics.isEmpty() && rightDiacritics.isEmpty() && joinerDiacritics.isEmpty()
    }

    /** Replaces the combos of diacritics/joiners with their affected characters with a "fake" alphabet */
@ -207,6 +197,4 @@ object DiacriticSupport {
        }
        return data.result()
    }
-
-    //endregion
 }
--- a/tests/src/com/unciv/logic/TranslationTests.kt
+++ b/tests/src/com/unciv/logic/TranslationTests.kt
@ -329,8 +329,8 @@ class TranslationTests {
            .mapNotNull { entry ->
                entry["English"]?.let { entry.entry to it }
            }.toMap(HashMap())
-        DiacriticSupport.prepareTranslationData(english)
-        Assert.assertTrue(DiacriticSupport.noDiacritics())
+        val diacriticSupport = DiacriticSupport(english)
+        Assert.assertTrue(diacriticSupport.noDiacritics())
    }

    @Test
@ -344,12 +344,12 @@ class TranslationTests {
        DiacriticSupport.reset()
        val leftJoiningDiacritics = "ঁ ং ঃ ় া ি ী ু ূ ৃ ৄ ে ৈ ো ৌ ্ ৗ ৢ ৣ ৾".replace(" ", "")
        val leftAndRightJoiners = "্"
-        DiacriticSupport.prepareTranslationData(Char(0x0980U)..Char(0x09FDU), leftJoiningDiacritics, "", leftAndRightJoiners)
+        val diacriticSupport = DiacriticSupport(Char(0x0980U)..Char(0x09FDU), leftJoiningDiacritics, "", leftAndRightJoiners)

        listOf(
            "মানচিত্র সম্পাদক", "দেখুন", "উৎপন্ন করুন", "আংশিক", "খ্রিষ্টপূর্ব", "সংক্ষিপ্ত", "শক্তি", "ষ্ঠ্যে"
-        ).forEach { DiacriticSupport.remapDiacritics(it) }
-        val actual = DiacriticSupport.getKnownCombinations()
+        ).forEach { diacriticSupport.remapDiacritics(it) }
+        val actual = diacriticSupport.getKnownCombinations()
        val expected = setOf(
                "ম, া", "চ, ি", "ত, ্, র", "ম, ্, প, া", "দ, ে", "খ, ু", "ন, ্, ন", "র, ু", "আ, ং", "শ, ি",
                "খ, ্, র, ি", "ষ, ্, ট", "প, ূ", "র, ্, ব", "স, ং", "ক, ্, ষ, ি", "প, ্, ত", "ক, ্, ত, ি",