From d38adc80035c2f870a020d160fe8a76728f937d0 Mon Sep 17 00:00:00 2001 From: Godzilla675 Date: Fri, 13 Mar 2026 02:00:28 +0000 Subject: [PATCH 1/3] Add microphone transcription support - add RECORD_AUDIO permission and a MediaRecorder-based chat recorder - keep file import as a fallback while staging recorded clips before send - route microphone audio through the existing GGUF transcription path Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- app/src/main/AndroidManifest.xml | 3 +- .../tool_neuron/audio/ChatAudioRecorder.kt | 152 ++++++++++ .../ui/components/ActionButtons.kt | 4 + .../com/dark/tool_neuron/ui/icons/TnIcons.kt | 1 + .../ui/screen/home/HomeBottomBar.kt | 269 ++++++++++++++++-- .../tool_neuron/viewmodel/ChatViewModel.kt | 96 +++++-- 6 files changed, 469 insertions(+), 56 deletions(-) create mode 100644 app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index e3bec2cf..879501f7 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -3,6 +3,7 @@ xmlns:tools="http://schemas.android.com/tools" > + - \ No newline at end of file + diff --git a/app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt b/app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt new file mode 100644 index 00000000..560abd78 --- /dev/null +++ b/app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt @@ -0,0 +1,152 @@ +package com.dark.tool_neuron.audio + +import android.content.Context +import android.media.MediaRecorder +import android.os.Build +import android.os.SystemClock +import android.util.Log +import java.io.File +import java.io.IOException + +data class RecordedAudioClip( + val file: File, + val durationMillis: Long +) + +class ChatAudioRecorder( + private val appContext: Context +) { + private var mediaRecorder: MediaRecorder? = null + private var activeOutputFile: File? = null + private var recordingStartedAtMs: Long? = null + + @Throws(IOException::class, IllegalStateException::class) + fun startRecording() { + check(mediaRecorder == null) { "A microphone recording is already in progress" } + + val outputDirectory = File(appContext.cacheDir, CACHE_DIRECTORY_NAME) + if (!outputDirectory.exists() && !outputDirectory.mkdirs()) { + throw IOException("Failed to prepare microphone cache directory") + } + + val outputFile = File.createTempFile("chat-mic-", OUTPUT_EXTENSION, outputDirectory) + val recorder = createMediaRecorder().apply { + setAudioSource(MediaRecorder.AudioSource.MIC) + setOutputFormat(MediaRecorder.OutputFormat.MPEG_4) + setAudioEncoder(MediaRecorder.AudioEncoder.AAC) + setAudioEncodingBitRate(DEFAULT_AUDIO_BITRATE) + setAudioSamplingRate(DEFAULT_AUDIO_SAMPLE_RATE) + setOutputFile(outputFile.absolutePath) + } + + try { + recorder.prepare() + recorder.start() + } catch (e: IOException) { + recorder.release() + deleteClip(outputFile) + throw IOException("Unable to prepare microphone recording", e) + } catch (e: RuntimeException) { + recorder.release() + deleteClip(outputFile) + throw IllegalStateException("Unable to start microphone recording", e) + } + + mediaRecorder = recorder + activeOutputFile = outputFile + recordingStartedAtMs = SystemClock.elapsedRealtime() + } + + @Throws(IllegalStateException::class) + fun stopRecording(): RecordedAudioClip { + val recorder = mediaRecorder ?: throw IllegalStateException("No microphone recording is in progress") + val outputFile = activeOutputFile + ?: throw IllegalStateException("No microphone recording file is available") + val startedAtMs = recordingStartedAtMs + ?: throw IllegalStateException("Microphone recording start time is unavailable") + + try { + recorder.stop() + } catch (e: RuntimeException) { + releaseRecorder(resetFirst = false) + deleteClip(outputFile) + throw IllegalStateException( + "Microphone recording could not be finalized. Try recording a little longer.", + e + ) + } + + releaseRecorder(resetFirst = true) + + return RecordedAudioClip( + file = outputFile, + durationMillis = (SystemClock.elapsedRealtime() - startedAtMs).coerceAtLeast(0L) + ) + } + + fun cancelRecording() { + val recorder = mediaRecorder ?: return + val outputFile = activeOutputFile + + try { + recorder.stop() + } catch (e: RuntimeException) { + Log.w(TAG, "Discarding incomplete microphone recording", e) + } finally { + releaseRecorder(resetFirst = true) + } + + outputFile?.let(::deleteClip) + } + + fun deleteClip(file: File) { + if (file.exists() && !file.delete()) { + Log.w(TAG, "Failed to delete temporary audio clip: ${file.absolutePath}") + } + } + + fun release() { + if (mediaRecorder != null) { + cancelRecording() + } + } + + @Suppress("DEPRECATION") + private fun createMediaRecorder(): MediaRecorder { + return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + MediaRecorder(appContext) + } else { + MediaRecorder() + } + } + + private fun releaseRecorder(resetFirst: Boolean) { + val recorder = mediaRecorder + if (recorder != null) { + if (resetFirst) { + try { + recorder.reset() + } catch (e: RuntimeException) { + Log.w(TAG, "Failed to reset MediaRecorder before release", e) + } + } + try { + recorder.release() + } catch (e: RuntimeException) { + Log.w(TAG, "Failed to release MediaRecorder", e) + } + } + + mediaRecorder = null + activeOutputFile = null + recordingStartedAtMs = null + } + + companion object { + private const val TAG = "ChatAudioRecorder" + private const val CACHE_DIRECTORY_NAME = "audio-recordings" + private const val OUTPUT_EXTENSION = ".m4a" + private const val DEFAULT_AUDIO_BITRATE = 128_000 + private const val DEFAULT_AUDIO_SAMPLE_RATE = 44_100 + } +} diff --git a/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt b/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt index f36174ff..e09cdb9d 100644 --- a/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt +++ b/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt @@ -72,6 +72,7 @@ fun ActionButton( icon: Int, contentDescription: String = "Description", modifier: Modifier = Modifier, + enabled: Boolean = true, shape: Shape = MaterialShapes.Square.toShape(), colors: IconButtonColors = IconButtonDefaults.filledIconButtonColors( containerColor = MaterialTheme.colorScheme.primary.copy(0.06f), @@ -80,6 +81,7 @@ fun ActionButton( ) { FilledIconButton( onClick = { onClickListener() }, + enabled = enabled, colors = colors, shape = shape, modifier = modifier.size(Standards.ActionIconSize) @@ -143,6 +145,7 @@ fun ActionButton( icon: ImageVector, contentDescription: String = "Description", modifier: Modifier = Modifier, + enabled: Boolean = true, shape: Shape = MaterialShapes.Square.toShape(), colors: IconButtonColors = IconButtonDefaults.filledIconButtonColors( containerColor = MaterialTheme.colorScheme.primary.copy(0.06f), @@ -151,6 +154,7 @@ fun ActionButton( ) { FilledIconButton( onClick = { onClickListener() }, + enabled = enabled, colors = colors, shape = shape, modifier = modifier.size(Standards.ActionIconSize) diff --git a/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt b/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt index 663986a4..fd9c8d58 100644 --- a/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt +++ b/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt @@ -145,6 +145,7 @@ object TnIcons { val Prompt by lazy { tabler("M8 9h8", "M8 13h6", "M9 18h-3a3 3 0 0 1 -3 -3v-8a3 3 0 0 1 3 -3h12a3 3 0 0 1 3 3v8a3 3 0 0 1 -3 3h-3l-3 3l-3 -3") } // ── Audio ── + val Microphone by lazy { tabler("M9 5a3 3 0 0 1 6 0v5a3 3 0 0 1 -6 0z", "M5 10a7 7 0 0 0 14 0", "M8 21h8", "M12 17v4") } val Volume by lazy { tabler("M15 8a5 5 0 0 1 0 8", "M17.7 5a9 9 0 0 1 0 14", "M6 15h-2a1 1 0 0 1 -1 -1v-4a1 1 0 0 1 1 -1h2l3.5 -4.5a.8 .8 0 0 1 1.5 .5v14a.8 .8 0 0 1 -1.5 .5l-3.5 -4.5") } // ── Misc ── diff --git a/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt b/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt index 7a6b7ead..8747a1a3 100644 --- a/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt +++ b/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt @@ -1,6 +1,9 @@ package com.dark.tool_neuron.ui.screen.home +import android.Manifest import android.content.Intent +import android.content.pm.PackageManager +import android.os.SystemClock import androidx.activity.compose.rememberLauncherForActivityResult import androidx.activity.result.contract.ActivityResultContracts import androidx.compose.animation.AnimatedVisibility @@ -30,7 +33,9 @@ import androidx.compose.material3.TextField import androidx.compose.material3.TextFieldDefaults import androidx.compose.material3.toShape import androidx.compose.runtime.Composable +import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.getValue +import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember import androidx.compose.runtime.rememberCoroutineScope @@ -41,9 +46,12 @@ import androidx.compose.ui.graphics.Color import androidx.compose.ui.graphics.compositeOver import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.unit.dp +import androidx.core.content.ContextCompat import androidx.hilt.lifecycle.viewmodel.compose.hiltViewModel import androidx.lifecycle.compose.collectAsStateWithLifecycle import com.dark.tool_neuron.activity.RagActivity +import com.dark.tool_neuron.audio.ChatAudioRecorder +import com.dark.tool_neuron.audio.RecordedAudioClip import com.dark.tool_neuron.global.Standards import com.dark.tool_neuron.models.ModelType import com.dark.tool_neuron.ui.components.ActionButton @@ -61,6 +69,9 @@ import com.dark.tool_neuron.viewmodel.LLMModelViewModel import com.dark.tool_neuron.viewmodel.MemoryViewModel import com.dark.tool_neuron.viewmodel.PluginViewModel import com.dark.tool_neuron.viewmodel.RagViewModel +import java.io.IOException +import java.util.Locale +import kotlinx.coroutines.delay import kotlinx.coroutines.launch // ── BottomBar ─────────────────────────────────────────────────────────────────── @@ -111,19 +122,123 @@ internal fun BottomBar( // Coroutine scope for RAG queries val scope = rememberCoroutineScope() + val audioRecorder = remember(context.applicationContext) { + ChatAudioRecorder(context.applicationContext) + } + var stagedRecording by remember { mutableStateOf(null) } + var isMicRecording by remember { mutableStateOf(false) } + var recordingStartedAtMs by remember { mutableStateOf(null) } + var recordingElapsedMs by remember { mutableStateOf(0L) } + var micErrorMessage by remember { mutableStateOf(null) } - // Current audio UX is file-picker based. A future mic flow can keep the same audio mode and - // route its recorded output into ChatViewModel.sendChatWithAudio(...) instead of this launcher. + val discardStagedRecording = { + stagedRecording?.let { audioRecorder.deleteClip(it.file) } + stagedRecording = null + } + val clearAudioCaptureState = { + if (isMicRecording) { + audioRecorder.cancelRecording() + } + isMicRecording = false + recordingStartedAtMs = null + recordingElapsedMs = 0L + discardStagedRecording() + micErrorMessage = null + } + val startMicrophoneRecording = { + discardStagedRecording() + micErrorMessage = null + try { + audioRecorder.startRecording() + isMicRecording = true + recordingStartedAtMs = SystemClock.elapsedRealtime() + recordingElapsedMs = 0L + } catch (e: IOException) { + micErrorMessage = e.message ?: "Unable to start microphone recording" + } catch (e: IllegalStateException) { + micErrorMessage = e.message ?: "Unable to start microphone recording" + } catch (e: SecurityException) { + micErrorMessage = e.message ?: "Microphone permission denied" + } + } + val stopMicrophoneRecording = { + try { + val clip = audioRecorder.stopRecording() + discardStagedRecording() + stagedRecording = clip + isMicRecording = false + recordingStartedAtMs = null + recordingElapsedMs = clip.durationMillis + micErrorMessage = null + } catch (e: IllegalStateException) { + isMicRecording = false + recordingStartedAtMs = null + recordingElapsedMs = 0L + micErrorMessage = e.message ?: "Unable to finalize microphone recording" + } + } + + val microphonePermissionLauncher = rememberLauncherForActivityResult( + contract = ActivityResultContracts.RequestPermission() + ) { isGranted -> + if (isGranted) { + startMicrophoneRecording() + } else { + micErrorMessage = "Microphone permission is required to record audio." + } + } + + // File imports remain the fallback path, while mic capture stages a temporary clip for review + // before both routes converge on ChatViewModel.sendChatWithAudio(...). val audioLauncher = rememberLauncherForActivityResult( contract = ActivityResultContracts.GetContent() ) { uri -> if (uri != null) { + discardStagedRecording() + micErrorMessage = null val audioPrompt = value.ifBlank { "Transcribe this audio." } chatViewModel.sendChatWithAudio(audioPrompt, context, uri) value = "" } } + DisposableEffect(audioRecorder) { + onDispose { + audioRecorder.release() + stagedRecording?.let { audioRecorder.deleteClip(it.file) } + } + } + + LaunchedEffect(isMicRecording, recordingStartedAtMs) { + if (!isMicRecording || recordingStartedAtMs == null) { + return@LaunchedEffect + } + + while (isMicRecording) { + val startedAt = recordingStartedAtMs ?: break + recordingElapsedMs = SystemClock.elapsedRealtime() - startedAt + delay(250L) + } + } + + LaunchedEffect(chatState.generationType) { + if (chatState.generationType != ModelType.AUDIO_GENERATION) { + clearAudioCaptureState() + } + } + + val audioStatusMessage = when { + micErrorMessage != null -> micErrorMessage + isMicRecording -> "Recording microphone - ${formatAudioDuration(recordingElapsedMs)}" + stagedRecording != null -> "Recorded clip ready to send - ${formatAudioDuration(stagedRecording!!.durationMillis)}" + else -> null + } + val canSendCurrentInput = when (chatState.generationType) { + ModelType.TEXT_GENERATION -> value.isNotBlank() + ModelType.IMAGE_GENERATION -> value.isNotBlank() + ModelType.AUDIO_GENERATION -> stagedRecording != null && !isMicRecording + } + // More Options overlay state var showMoreOptions by remember { mutableStateOf(false) } @@ -280,6 +395,28 @@ internal fun BottomBar( onWebSearchChipClick = { pluginViewModel.toggleWebSearch(false) } ) + AnimatedVisibility( + visible = chatState.generationType == ModelType.AUDIO_GENERATION && + audioStatusMessage != null + ) { + Row( + modifier = Modifier + .fillMaxWidth() + .padding(top = Standards.SpacingXs, start = Standards.SpacingMd), + verticalAlignment = Alignment.CenterVertically + ) { + Text( + text = audioStatusMessage.orEmpty(), + color = if (micErrorMessage != null) { + MaterialTheme.colorScheme.error + } else { + MaterialTheme.colorScheme.onSurfaceVariant + }, + style = MaterialTheme.typography.bodySmall + ) + } + } + // More Options overlay (above action row, like model list) MoreOptionsOverlay( show = showMoreOptions, @@ -376,6 +513,58 @@ internal fun BottomBar( Spacer(Modifier.weight(1f)) + if (chatState.generationType == ModelType.AUDIO_GENERATION) { + ActionButton( + onClickListener = { + micErrorMessage = null + audioLauncher.launch("audio/*") + }, + enabled = !chatState.isGenerating && !isMicRecording, + icon = TnIcons.FileUpload, + modifier = Modifier.padding(end = Standards.SpacingXs) + ) + + ActionButton( + onClickListener = { + if (isMicRecording) { + stopMicrophoneRecording() + } else if ( + ContextCompat.checkSelfPermission( + context, + Manifest.permission.RECORD_AUDIO + ) == PackageManager.PERMISSION_GRANTED + ) { + startMicrophoneRecording() + } else { + microphonePermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) + } + }, + enabled = !chatState.isGenerating, + icon = if (isMicRecording) TnIcons.PlayerStop else TnIcons.Microphone, + modifier = Modifier.padding(end = Standards.SpacingXs), + colors = if (isMicRecording) { + IconButtonDefaults.filledIconButtonColors( + containerColor = MaterialTheme.colorScheme.error.copy(0.18f), + contentColor = MaterialTheme.colorScheme.error + ) + } else { + IconButtonDefaults.filledIconButtonColors( + containerColor = MaterialTheme.colorScheme.primary.copy(0.06f), + contentColor = MaterialTheme.colorScheme.primary + ) + } + ) + + if (isMicRecording || stagedRecording != null) { + ActionButton( + onClickListener = { clearAudioCaptureState() }, + enabled = !chatState.isGenerating, + icon = TnIcons.X, + modifier = Modifier.padding(end = Standards.SpacingXs) + ) + } + } + // 6. Send/Stop when (chatState.isGenerating) { true -> { @@ -394,41 +583,54 @@ internal fun BottomBar( false -> { ActionButton( onClickListener = { - if (value.isNotBlank() || chatState.generationType == ModelType.AUDIO_GENERATION) { - // Close overlays on send - showMoreOptions = false - when (chatState.generationType) { - ModelType.TEXT_GENERATION -> { - val hasRags = loadedRags.isNotEmpty() && isRagEnabledForChat - - if (hasRags) { - val userQuery = value - value = "" - scope.launch { - val ragContext = ragViewModel.queryAndStoreResults(userQuery) - chatViewModel.setRagContext( - ragContext.ifBlank { null }, - ragViewModel.lastRagResults.value - ) - chatViewModel.sendTextMessage(userQuery) - } - } else { - chatViewModel.clearRagContext() - chatViewModel.sendTextMessage(value) - value = "" - } - } + // Close overlays on send + showMoreOptions = false + when (chatState.generationType) { + ModelType.TEXT_GENERATION -> { + val hasRags = loadedRags.isNotEmpty() && isRagEnabledForChat - ModelType.IMAGE_GENERATION -> { - chatViewModel.sendImageRequest(value) + if (hasRags) { + val userQuery = value + value = "" + scope.launch { + val ragContext = ragViewModel.queryAndStoreResults(userQuery) + chatViewModel.setRagContext( + ragContext.ifBlank { null }, + ragViewModel.lastRagResults.value + ) + chatViewModel.sendTextMessage(userQuery) + } + } else { + chatViewModel.clearRagContext() + chatViewModel.sendTextMessage(value) value = "" } - ModelType.AUDIO_GENERATION -> { - audioLauncher.launch("audio/*") + } + + ModelType.IMAGE_GENERATION -> { + chatViewModel.sendImageRequest(value) + value = "" + } + + ModelType.AUDIO_GENERATION -> { + val clip = stagedRecording ?: return@ActionButton + val readinessError = chatViewModel.getAudioGenerationReadinessError() + if (readinessError != null) { + micErrorMessage = readinessError + return@ActionButton } + + val audioPrompt = value.ifBlank { "Transcribe this audio." } + chatViewModel.sendChatWithAudio(audioPrompt, clip.file) + stagedRecording = null + recordingStartedAtMs = null + recordingElapsedMs = 0L + micErrorMessage = null + value = "" } } }, + enabled = canSendCurrentInput, icon = TnIcons.Send, shape = MaterialShapes.Ghostish.toShape(), modifier = Modifier.padding(end = Standards.SpacingMd), @@ -444,3 +646,10 @@ internal fun BottomBar( } } } + +private fun formatAudioDuration(durationMillis: Long): String { + val totalSeconds = (durationMillis / 1000L).toInt() + val minutes = totalSeconds / 60 + val seconds = totalSeconds % 60 + return String.format(Locale.US, "%02d:%02d", minutes, seconds) +} diff --git a/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt b/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt index 536e6c39..6efd8cfa 100644 --- a/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt +++ b/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt @@ -38,6 +38,8 @@ import com.dark.gguf_lib.toolcalling.ToolCallingConfig import dagger.hilt.android.lifecycle.HiltViewModel import dagger.hilt.android.qualifiers.ApplicationContext import jakarta.inject.Inject +import java.io.File +import java.io.IOException import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.SharingStarted @@ -468,45 +470,76 @@ class ChatViewModel @Inject constructor( sendChatWithMedia(prompt, imageData) } - fun sendChatWithAudio(prompt: String, context: Context, audioUri: Uri) { - if (_isGenerating.value) return + fun getAudioGenerationReadinessError(): String? = getMediaGenerationReadinessError() - viewModelScope.launch { - try { - // File-based audio is the current MVP. Future microphone capture can feed the - // same byte-array entrypoint below after encoding captured audio into a supported - // container/format for the loaded projector. - val audioBytes = withContext(Dispatchers.IO) { - context.contentResolver.openInputStream(audioUri)?.use { input -> - input.readBytes() - } - } ?: throw IllegalStateException("Failed to read the selected audio file") + fun sendChatWithAudio(prompt: String, context: Context, audioUri: Uri) { + sendChatWithLoadedAudio( + prompt = prompt, + sourceLabel = "selected audio input" + ) { + context.contentResolver.openInputStream(audioUri)?.use { input -> + input.readBytes() + } ?: throw IllegalStateException("Failed to read the selected audio file") + } + } - sendChatWithAudio(prompt, audioBytes) - } catch (e: Exception) { - Log.e(TAG, "Failed to load audio input", e) - reportError(e.message) + fun sendChatWithAudio(prompt: String, audioFile: File) { + sendChatWithLoadedAudio( + prompt = prompt, + sourceLabel = "recorded audio input", + cleanup = { + if (audioFile.exists() && !audioFile.delete()) { + Log.w(TAG, "Failed to delete temporary recorded audio: ${audioFile.absolutePath}") + } + } + ) { + if (!audioFile.exists()) { + throw IllegalStateException("Recorded audio file is missing") } + audioFile.readBytes() } } fun sendChatWithAudio(prompt: String, audioData: ByteArray) { - // Keep all non-file audio ingestion funneled through this method so a future mic-recording - // flow only has to produce audio bytes and does not need separate chat/generation logic. + // Keep every audio source funneled through the same byte-array path so file import, mic + // recording, and any future capture UX reuse the exact same chat/generation logic. val effectivePrompt = prompt.ifBlank { "Transcribe this audio." } sendChatWithMedia(effectivePrompt, listOf(audioData)) } - private fun sendChatWithMedia(prompt: String, mediaData: List) { - if (!LlmModelWorker.isGgufModelLoaded.value) { - reportError("Please load a text generation model first") - return + private fun sendChatWithLoadedAudio( + prompt: String, + sourceLabel: String, + cleanup: suspend () -> Unit = {}, + loadAudio: suspend () -> ByteArray + ) { + if (_isGenerating.value) return + + viewModelScope.launch { + try { + val audioBytes = withContext(Dispatchers.IO) { loadAudio() } + sendChatWithAudio(prompt, audioBytes) + } catch (e: IOException) { + Log.e(TAG, "Failed to load $sourceLabel", e) + reportError(e.message) + } catch (e: SecurityException) { + Log.e(TAG, "Permission denied while loading $sourceLabel", e) + reportError(e.message) + } catch (e: IllegalStateException) { + Log.e(TAG, "Failed to load $sourceLabel", e) + reportError(e.message) + } finally { + withContext(Dispatchers.IO) { cleanup() } + } } - if (!LlmModelWorker.isVlmLoaded.value) { - reportError("Please load a compatible projector (mmproj) first") + } + + private fun sendChatWithMedia(prompt: String, mediaData: List) { + val readinessError = getMediaGenerationReadinessError() + if (readinessError != null) { + reportError(readinessError) return } - if (_isGenerating.value) return _isGenerating.value = true _streamingUserMessage.value = prompt @@ -605,6 +638,19 @@ class ChatViewModel @Inject constructor( } } + private fun getMediaGenerationReadinessError(): String? { + if (!LlmModelWorker.isGgufModelLoaded.value) { + return "Please load a text generation model first" + } + if (!LlmModelWorker.isVlmLoaded.value) { + return "Please load a compatible projector (mmproj) first" + } + if (_isGenerating.value) { + return "Please wait for the current generation to finish" + } + return null + } + /** * Regenerate the last assistant response. * Removes the last assistant message and re-sends the last user prompt. From 9dbd658b636349b53f51222e6cb5531987911ab0 Mon Sep 17 00:00:00 2001 From: sngodzilla Date: Sat, 14 Mar 2026 23:55:20 +0000 Subject: [PATCH 2/3] Add APK publishing workflow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/build-test-apk.yml | 154 +++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 .github/workflows/build-test-apk.yml diff --git a/.github/workflows/build-test-apk.yml b/.github/workflows/build-test-apk.yml new file mode 100644 index 00000000..4aa0355e --- /dev/null +++ b/.github/workflows/build-test-apk.yml @@ -0,0 +1,154 @@ +name: Build and publish test APK + +on: + push: + branches: + - Fix-whisper-initial-download-issue + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: test-apk-${{ github.ref }} + cancel-in-progress: true + +env: + APK_NAME: ToolNeuron-gguf-audio-mic-debug.apk + APK_ARTIFACT_NAME: toolneuron-gguf-audio-mic-debug-apk + RELEASE_TAG: toolneuron-fix-whisper-test-apk + +jobs: + build-test-apk: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Java 17 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "17" + cache: gradle + + - name: Set up Android SDK + uses: android-actions/setup-android@v3 + + - name: Install Android packages + run: | + yes | sdkmanager --licenses > /dev/null + sdkmanager \ + "platform-tools" \ + "platforms;android-36" \ + "build-tools;36.0.0" \ + "cmake;3.22.1" \ + "ndk;28.2.13676358" + + - name: Create local.properties + run: | + SDK_ROOT="${ANDROID_SDK_ROOT:-$ANDROID_HOME}" + cat > local.properties <> "$GITHUB_STEP_SUMMARY" From ac022b39aa94765a4d078ff7ecbdd4abb83dc1cd Mon Sep 17 00:00:00 2001 From: sngodzilla Date: Sun, 22 Mar 2026 14:02:28 +0000 Subject: [PATCH 3/3] Add mmjproj projector sidecar support - Recognize mmjproj as a projector marker alongside mmproj, vision-adapter, projector - Score mmjproj candidates in sidecar auto-download selection - Broaden user-facing projector readiness message to mmproj/mmjproj - Add unit tests for mmjproj filtering and case-insensitive detection --- .../java/com/dark/tool_neuron/repo/ModelStoreRepository.kt | 2 +- .../com/dark/tool_neuron/service/ModelDownloadService.kt | 2 +- .../java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt | 6 +++--- .../com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt | 7 +++++++ 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt b/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt index 9c6a53f5..46b63381 100644 --- a/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt +++ b/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt @@ -33,7 +33,7 @@ class ModelStoreRepository(private val context: Context) { private val GGUF_SUFFIX_REGEX = Regex("\\.gguf$", RegexOption.IGNORE_CASE) private val QUANTIZATION_MATCH_REGEX = Regex("""(?:^|[.-])((?:I?Q)\d+(?:_[A-Z0-9]+)*)""") private val TRAILING_QUANTIZATION_REGEX = Regex("""([.-])(?:I?Q)\d+(?:_[A-Z0-9]+)*(?:-[A-Z0-9]+)*$""") - private val PROJECTOR_MARKERS = listOf("mmproj", "vision-adapter", "projector") + private val PROJECTOR_MARKERS = listOf("mmproj", "mmjproj", "vision-adapter", "projector") internal fun isProjectorGgufFile(path: String): Boolean { return path.endsWith(".gguf", ignoreCase = true) && diff --git a/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt b/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt index 6604c8df..6b9e48fa 100644 --- a/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt +++ b/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt @@ -413,7 +413,7 @@ class ModelDownloadService : Service() { if (modelFamilyKey.isNotBlank() && lowerPath.contains(modelFamilyKey)) { score += 3 } - if (lowerPath.contains("mmproj")) { + if (lowerPath.contains("mmproj") || lowerPath.contains("mmjproj")) { score += 1 } file to score diff --git a/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt b/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt index c96ddb1f..ee71a6a7 100644 --- a/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt +++ b/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt @@ -455,7 +455,7 @@ class ChatViewModel @Inject constructor( fun sendTextMessage(prompt: String) = sendChat(prompt) /** - * Send a message with images (VLM). Requires a VLM projector to be loaded. + * Send a message with projector-backed media. Requires a compatible projector to be loaded. * @param prompt User's text prompt * @param imageData List of raw image file bytes (JPEG/PNG) */ @@ -557,7 +557,7 @@ class ChatViewModel @Inject constructor( val maxTokens = getCurrentModelMaxTokens() val isNewChat = isNewConversation - // Insert image marker into prompt for VLM + // Insert the default media marker into the prompt for projector-backed generation. val marker = LlmModelWorker.getVlmDefaultMarker() val vlmPrompt = if (prompt.contains(marker)) prompt else marker.repeat(mediaData.size) + "\n" + prompt @@ -636,7 +636,7 @@ class ChatViewModel @Inject constructor( return "Please load a text generation model first" } if (!LlmModelWorker.isVlmLoaded.value) { - return "Please load a compatible projector (mmproj) first" + return "Please load a compatible projector sidecar (mmproj/mmjproj) first" } if (_isGenerating.value) { return "Please wait for the current generation to finish" diff --git a/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt b/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt index 0946364e..f44f005e 100644 --- a/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt +++ b/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt @@ -21,10 +21,17 @@ class ModelStoreRepositoryTest { @Test fun supportedGgufFileRejectsProjectionArtifacts() { assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-mmproj.Q4_K_M.GGUF")) + assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-mmjproj.Q4_K_M.GGUF")) assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-vision-adapter.gguf")) assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-projector.gguf")) } + @Test + fun projectorGgufFileAcceptsMmjprojAliasCaseInsensitively() { + assertTrue(ModelStoreRepository.isProjectorGgufFile("models/whisper-mmjproj.q4_k_m.gguf")) + assertTrue(ModelStoreRepository.isProjectorGgufFile("models/Whisper-MMJPROJ.Q4_K_M.GGUF")) + } + @Test fun stripGgufSuffixRemovesExtensionCaseInsensitively() { assertEquals(