shubham0204 · jkkj · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,5 @@
 local.properties
 .kotlin
 ktlint
-ktlint.bat
+ktlint.bat
+whisper/build/
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "llama.cpp"]
 	path = llama.cpp
 	url = https://github.com/ggerganov/llama.cpp
+[submodule "whisper/src/main/jni/whisper.cpp"]
+	path = whisper/src/main/jni/whisper.cpp
+	url = https://github.com/ggml-org/whisper.cpp.git
diff --git a/app/build.gradle.kts b/app/build.gradle.kts
@@ -3,7 +3,7 @@ plugins {
     alias(libs.plugins.kotlin.android)
     alias(libs.plugins.kotlin.compose)
     id("com.google.devtools.ksp")
-    kotlin("plugin.serialization") version "2.1.0"
+    kotlin("plugin.serialization") version "2.0.0"
 }
 
 android {
@@ -92,12 +92,17 @@ dependencies {
 
     implementation(project(":smollm"))
     implementation(project(":hf-model-hub-api"))
+    implementation(project(":whisper"))
+
+    // Android Wave Recorder for speech-to-text
+    implementation("com.github.squti:Android-Wave-Recorder:2.1.0")
 
     // Koin: dependency injection
     implementation(libs.koin.android)
     implementation(libs.koin.annotations)
     implementation(libs.koin.androidx.compose)
     implementation(libs.androidx.ui.text.google.fonts)
+    implementation(libs.androidx.compose.foundation)
     ksp(libs.koin.ksp.compiler)
 
     // compose-markdown: Markdown rendering in Compose

diff --git a/app/proguard-rules.pro b/app/proguard-rules.pro
@@ -18,4 +18,7 @@
 
 # If you keep the line number information, uncomment this to
 # hide the original source file name.
-#-renamesourcefileattribute SourceFile
+#-renamesourcefileattribute SourceFile
+
+# Keep Whisper native callbacks
+-keep class com.whispercpp.whisper.** { *; }
diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
@@ -3,6 +3,12 @@
     xmlns:tools="http://schemas.android.com/tools">
 
     <uses-permission android:name="android.permission.INTERNET"/>
+    <uses-permission android:name="android.permission.RECORD_AUDIO"/>
+    <uses-permission android:name="android.permission.FOREGROUND_SERVICE"/>
+    <uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE"/>
+    <uses-permission android:name="android.permission.POST_NOTIFICATIONS"/>
+    <uses-permission android:name="android.permission.WAKE_LOCK"/>
+    <uses-permission android:name="android.permission.REQUEST_IGNORE_BATTERY_OPTIMIZATIONS"/>
 
     <application
         android:name=".SmolChatApplication"
@@ -43,6 +49,13 @@
         <activity android:name=".ui.screens.model_download.DownloadModelActivity"/>
 
         <activity android:name=".ui.screens.manage_tasks.ManageTasksActivity"/>
+
+        <activity android:name=".ui.screens.whisper_download.DownloadWhisperModelActivity"/>
+
+        <service
+            android:name=".service.VoiceChatService"
+            android:foregroundServiceType="microphone"
+            android:exported="false"/>
     </application>
 
 </manifest>
diff --git a/app/src/main/java/io/shubham0204/smollmandroid/data/PreferencesManager.kt b/app/src/main/java/io/shubham0204/smollmandroid/data/PreferencesManager.kt
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2024 Shubham Panchal
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.shubham0204.smollmandroid.data
+
+import android.content.Context
+import org.koin.core.annotation.Single
+
+@Single
+class PreferencesManager(context: Context) {
+    private val prefs = context.getSharedPreferences("smolchat_prefs", Context.MODE_PRIVATE)
+
+    var ttsEnabled: Boolean
+        get() = prefs.getBoolean("tts_enabled", false)
+        set(value) = prefs.edit().putBoolean("tts_enabled", value).apply()
+
+    var autoSubmitEnabled: Boolean
+        get() = prefs.getBoolean("auto_submit_enabled", false)
+        set(value) = prefs.edit().putBoolean("auto_submit_enabled", value).apply()
+
+    var autoSubmitDelayMs: Long
+        get() = prefs.getLong("auto_submit_delay_ms", 2000L)
+        set(value) = prefs.edit().putLong("auto_submit_delay_ms", value).apply()
+
+    var selectedWhisperModel: String
+        get() = prefs.getString("selected_whisper_model", DEFAULT_WHISPER_MODEL) ?: DEFAULT_WHISPER_MODEL
+        set(value) = prefs.edit().putString("selected_whisper_model", value).apply()
+
+    var sttLanguage: String
+        get() = prefs.getString("stt_language", DEFAULT_STT_LANGUAGE) ?: DEFAULT_STT_LANGUAGE
+        set(value) = prefs.edit().putString("stt_language", value).apply()
+
+    var autoContextTrimEnabled: Boolean
+        get() = prefs.getBoolean("auto_context_trim_enabled", false)
+        set(value) = prefs.edit().putBoolean("auto_context_trim_enabled", value).apply()
+
+    companion object {
+        const val DEFAULT_WHISPER_MODEL = "ggml-base.en.bin"
+        const val DEFAULT_STT_LANGUAGE = "en"
+
+        // Whisper supported languages with their display names
+        val SUPPORTED_LANGUAGES = listOf(
+            "en" to "English",
+            "de" to "German",
+            "fr" to "French",
+            "es" to "Spanish",
+            "it" to "Italian",
+            "pt" to "Portuguese",
+            "nl" to "Dutch",
+            "pl" to "Polish",
+            "ru" to "Russian",
+            "zh" to "Chinese",
+            "ja" to "Japanese",
+            "ko" to "Korean",
+            "ar" to "Arabic",
+            "hi" to "Hindi",
+            "tr" to "Turkish",
+            "uk" to "Ukrainian",
+            "cs" to "Czech",
+            "sv" to "Swedish",
+            "auto" to "Auto-detect",
+        )
+    }
+}
diff --git a/app/src/main/java/io/shubham0204/smollmandroid/llm/SmolLMManager.kt b/app/src/main/java/io/shubham0204/smollmandroid/llm/SmolLMManager.kt
@@ -16,6 +16,7 @@
 
 package io.shubham0204.smollmandroid.llm
 
+import android.os.Process
 import android.util.Log
 import io.shubham0204.smollm.SmolLM
 import io.shubham0204.smollmandroid.data.AppDB
@@ -163,49 +164,72 @@ class SmolLMManager(private val appDB: AppDB) {
             responseGenerationJob?.cancel()
 
             responseGenerationJob = CoroutineScope(Dispatchers.Default).launch {
+                // Boost thread priority to reduce CPU throttling when screen is locked
+                // THREAD_PRIORITY_URGENT_AUDIO (-19) is the highest priority available
+                // to regular apps and signals to the system this is time-sensitive work
+                val originalPriority = Process.getThreadPriority(Process.myTid())
                 try {
+                    Process.setThreadPriority(Process.THREAD_PRIORITY_URGENT_AUDIO)
+                    LOGD(">>> Thread priority boosted from $originalPriority to URGENT_AUDIO")
+                } catch (e: Exception) {
+                    LOGD(">>> Failed to boost thread priority: ${e.message}")
+                }
+
+                try {
+                    LOGD(">>> getResponse coroutine started on thread: ${Thread.currentThread().name}")
                     isInferenceOn = true
                     var response = ""
 
                     val duration = measureTime {
+                        LOGD(">>> Starting response flow collection...")
                         instance.getResponseAsFlow(query).collect { piece ->
                             response += piece
-                            withContext(Dispatchers.Main) {
-                                onPartialResponseGenerated(response)
-                            }
+                            // Don't use Main dispatcher - callbacks are thread-safe
+                            // Using Main blocks when screen is locked
+                            onPartialResponseGenerated(response)
                         }
+                        LOGD(">>> Response flow collection complete")
                     }
 
                     response = responseTransform(response)
+                    LOGD(">>> Response transformed, length=${response.length}")
 
                     // Thread-safe access to chat
                     val currentChat = stateLock.withLock { chat }
 
                     if (currentChat != null) {
                         // Add response to database
+                        LOGD(">>> Adding assistant message to DB...")
                         appDB.addAssistantMessage(currentChat.id, response)
+                        LOGD(">>> Assistant message added")
                     }
 
-                    withContext(Dispatchers.Main) {
-                        isInferenceOn = false
-                        onSuccess(
-                            SmolLMResponse(
-                                response = response,
-                                generationSpeed = instance.getResponseGenerationSpeed(),
-                                generationTimeSecs = duration.inWholeSeconds.toInt(),
-                                contextLengthUsed = instance.getContextLengthUsed(),
-                            )
+                    LOGD(">>> Calling onSuccess callback...")
+                    isInferenceOn = false
+                    onSuccess(
+                        SmolLMResponse(
+                            response = response,
+                            generationSpeed = instance.getResponseGenerationSpeed(),
+                            generationTimeSecs = duration.inWholeSeconds.toInt(),
+                            contextLengthUsed = instance.getContextLengthUsed(),
                         )
-                    }
+                    )
+                    LOGD(">>> onSuccess callback returned")
                 } catch (e: CancellationException) {
+                    LOGD(">>> Response generation cancelled")
                     isInferenceOn = false
-                    withContext(Dispatchers.Main) {
-                        onCancelled()
-                    }
+                    onCancelled()
                 } catch (e: Exception) {
+                    LOGD(">>> Response generation error: ${e.message}")
                     isInferenceOn = false
-                    withContext(Dispatchers.Main) {
-                        onError(e)
+                    onError(e)
+                } finally {
+                    // Restore original thread priority
+                    try {
+                        Process.setThreadPriority(originalPriority)
+                        LOGD(">>> Thread priority restored to $originalPriority")
+                    } catch (e: Exception) {
+                        LOGD(">>> Failed to restore thread priority: ${e.message}")
                     }
                 }
             }