diff --git a/.github/workflows/build-test-apk.yml b/.github/workflows/build-test-apk.yml new file mode 100644 index 00000000..4aa0355e --- /dev/null +++ b/.github/workflows/build-test-apk.yml @@ -0,0 +1,154 @@ +name: Build and publish test APK + +on: + push: + branches: + - Fix-whisper-initial-download-issue + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: test-apk-${{ github.ref }} + cancel-in-progress: true + +env: + APK_NAME: ToolNeuron-gguf-audio-mic-debug.apk + APK_ARTIFACT_NAME: toolneuron-gguf-audio-mic-debug-apk + RELEASE_TAG: toolneuron-fix-whisper-test-apk + +jobs: + build-test-apk: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Java 17 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "17" + cache: gradle + + - name: Set up Android SDK + uses: android-actions/setup-android@v3 + + - name: Install Android packages + run: | + yes | sdkmanager --licenses > /dev/null + sdkmanager \ + "platform-tools" \ + "platforms;android-36" \ + "build-tools;36.0.0" \ + "cmake;3.22.1" \ + "ndk;28.2.13676358" + + - name: Create local.properties + run: | + SDK_ROOT="${ANDROID_SDK_ROOT:-$ANDROID_HOME}" + cat > local.properties <> "$GITHUB_STEP_SUMMARY" diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 0e3e9b05..7f09d4ae 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -3,6 +3,7 @@ xmlns:tools="http://schemas.android.com/tools" > + - \ No newline at end of file + diff --git a/app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt b/app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt new file mode 100644 index 00000000..560abd78 --- /dev/null +++ b/app/src/main/java/com/dark/tool_neuron/audio/ChatAudioRecorder.kt @@ -0,0 +1,152 @@ +package com.dark.tool_neuron.audio + +import android.content.Context +import android.media.MediaRecorder +import android.os.Build +import android.os.SystemClock +import android.util.Log +import java.io.File +import java.io.IOException + +data class RecordedAudioClip( + val file: File, + val durationMillis: Long +) + +class ChatAudioRecorder( + private val appContext: Context +) { + private var mediaRecorder: MediaRecorder? = null + private var activeOutputFile: File? = null + private var recordingStartedAtMs: Long? = null + + @Throws(IOException::class, IllegalStateException::class) + fun startRecording() { + check(mediaRecorder == null) { "A microphone recording is already in progress" } + + val outputDirectory = File(appContext.cacheDir, CACHE_DIRECTORY_NAME) + if (!outputDirectory.exists() && !outputDirectory.mkdirs()) { + throw IOException("Failed to prepare microphone cache directory") + } + + val outputFile = File.createTempFile("chat-mic-", OUTPUT_EXTENSION, outputDirectory) + val recorder = createMediaRecorder().apply { + setAudioSource(MediaRecorder.AudioSource.MIC) + setOutputFormat(MediaRecorder.OutputFormat.MPEG_4) + setAudioEncoder(MediaRecorder.AudioEncoder.AAC) + setAudioEncodingBitRate(DEFAULT_AUDIO_BITRATE) + setAudioSamplingRate(DEFAULT_AUDIO_SAMPLE_RATE) + setOutputFile(outputFile.absolutePath) + } + + try { + recorder.prepare() + recorder.start() + } catch (e: IOException) { + recorder.release() + deleteClip(outputFile) + throw IOException("Unable to prepare microphone recording", e) + } catch (e: RuntimeException) { + recorder.release() + deleteClip(outputFile) + throw IllegalStateException("Unable to start microphone recording", e) + } + + mediaRecorder = recorder + activeOutputFile = outputFile + recordingStartedAtMs = SystemClock.elapsedRealtime() + } + + @Throws(IllegalStateException::class) + fun stopRecording(): RecordedAudioClip { + val recorder = mediaRecorder ?: throw IllegalStateException("No microphone recording is in progress") + val outputFile = activeOutputFile + ?: throw IllegalStateException("No microphone recording file is available") + val startedAtMs = recordingStartedAtMs + ?: throw IllegalStateException("Microphone recording start time is unavailable") + + try { + recorder.stop() + } catch (e: RuntimeException) { + releaseRecorder(resetFirst = false) + deleteClip(outputFile) + throw IllegalStateException( + "Microphone recording could not be finalized. Try recording a little longer.", + e + ) + } + + releaseRecorder(resetFirst = true) + + return RecordedAudioClip( + file = outputFile, + durationMillis = (SystemClock.elapsedRealtime() - startedAtMs).coerceAtLeast(0L) + ) + } + + fun cancelRecording() { + val recorder = mediaRecorder ?: return + val outputFile = activeOutputFile + + try { + recorder.stop() + } catch (e: RuntimeException) { + Log.w(TAG, "Discarding incomplete microphone recording", e) + } finally { + releaseRecorder(resetFirst = true) + } + + outputFile?.let(::deleteClip) + } + + fun deleteClip(file: File) { + if (file.exists() && !file.delete()) { + Log.w(TAG, "Failed to delete temporary audio clip: ${file.absolutePath}") + } + } + + fun release() { + if (mediaRecorder != null) { + cancelRecording() + } + } + + @Suppress("DEPRECATION") + private fun createMediaRecorder(): MediaRecorder { + return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + MediaRecorder(appContext) + } else { + MediaRecorder() + } + } + + private fun releaseRecorder(resetFirst: Boolean) { + val recorder = mediaRecorder + if (recorder != null) { + if (resetFirst) { + try { + recorder.reset() + } catch (e: RuntimeException) { + Log.w(TAG, "Failed to reset MediaRecorder before release", e) + } + } + try { + recorder.release() + } catch (e: RuntimeException) { + Log.w(TAG, "Failed to release MediaRecorder", e) + } + } + + mediaRecorder = null + activeOutputFile = null + recordingStartedAtMs = null + } + + companion object { + private const val TAG = "ChatAudioRecorder" + private const val CACHE_DIRECTORY_NAME = "audio-recordings" + private const val OUTPUT_EXTENSION = ".m4a" + private const val DEFAULT_AUDIO_BITRATE = 128_000 + private const val DEFAULT_AUDIO_SAMPLE_RATE = 44_100 + } +} diff --git a/app/src/main/java/com/dark/tool_neuron/global/AppPaths.kt b/app/src/main/java/com/dark/tool_neuron/global/AppPaths.kt index ee6936a5..c207a397 100644 --- a/app/src/main/java/com/dark/tool_neuron/global/AppPaths.kt +++ b/app/src/main/java/com/dark/tool_neuron/global/AppPaths.kt @@ -37,6 +37,14 @@ object AppPaths { fun modelFile(context: Context, modelId: String): File = File(models(context), "$modelId.gguf") + /** Hidden multimodal/audio projector sidecars paired to GGUF models */ + fun modelProjectors(context: Context): File = + File(models(context), "projectors") + + /** Specific GGUF projector sidecar */ + fun modelProjectorFile(context: Context, modelId: String): File = + File(modelProjectors(context), "$modelId.gguf") + /** TTS model directory */ fun ttsModel(context: Context): File = File(models(context), "supertonic-2") diff --git a/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt b/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt index d85d1cf6..46b63381 100644 --- a/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt +++ b/app/src/main/java/com/dark/tool_neuron/repo/ModelStoreRepository.kt @@ -23,12 +23,47 @@ data class ModelStoreCache( ) { companion object { // Bump this when filtering logic changes to auto-invalidate stale caches - const val CURRENT_VERSION = 2 + const val CURRENT_VERSION = 3 } } class ModelStoreRepository(private val context: Context) { + companion object { + private val GGUF_SUFFIX_REGEX = Regex("\\.gguf$", RegexOption.IGNORE_CASE) + private val QUANTIZATION_MATCH_REGEX = Regex("""(?:^|[.-])((?:I?Q)\d+(?:_[A-Z0-9]+)*)""") + private val TRAILING_QUANTIZATION_REGEX = Regex("""([.-])(?:I?Q)\d+(?:_[A-Z0-9]+)*(?:-[A-Z0-9]+)*$""") + private val PROJECTOR_MARKERS = listOf("mmproj", "mmjproj", "vision-adapter", "projector") + + internal fun isProjectorGgufFile(path: String): Boolean { + return path.endsWith(".gguf", ignoreCase = true) && + PROJECTOR_MARKERS.any { marker -> path.contains(marker, ignoreCase = true) } + } + + internal fun isSupportedGgufFile(path: String): Boolean { + return path.endsWith(".gguf", ignoreCase = true) && !isProjectorGgufFile(path) + } + + internal fun stripGgufSuffix(fileName: String): String { + return fileName.replace(GGUF_SUFFIX_REGEX, "") + } + + internal fun extractModelFamilyKey(fileName: String): String { + val baseName = stripGgufSuffix(fileName).uppercase() + return baseName.replace(TRAILING_QUANTIZATION_REGEX, "").lowercase() + } + + internal fun extractQuantType(fileName: String): String { + val baseName = stripGgufSuffix(fileName).uppercase() + val quantMatch = QUANTIZATION_MATCH_REGEX.find(baseName) + if (quantMatch != null) { + return quantMatch.groupValues[1] + } + + return baseName.substringAfterLast("-", baseName).uppercase() + } + } + private val json = Json { ignoreUnknownKeys = true; prettyPrint = false } private val cacheDir = File(context.filesDir, "cache").apply { mkdirs() } private val cacheFile = File(cacheDir, "model_store_cache.json") @@ -323,18 +358,13 @@ class ModelStoreRepository(private val context: Context) { repo.name.contains("qwen", ignoreCase = true) files.filter { file -> - file.path.endsWith(".gguf") && - // Filter out mmproj/vision projection files - these are not standalone models - !file.path.contains("mmproj", ignoreCase = true) && - !file.path.contains("vision-adapter", ignoreCase = true) && - !file.path.contains("projector", ignoreCase = true) + isSupportedGgufFile(file.path) }.forEach { file -> val fileName = file.path.substringAfterLast("/") val sizeStr = formatDecimalBytes(file.size ?: 0) // Extract quantization type from filename - val quantType = - fileName.substringAfterLast("-").removeSuffix(".gguf").uppercase() + val quantType = extractQuantType(fileName) val baseTags = mutableListOf("GGUF", quantType, repo.name) if (supportsToolCalling) { @@ -343,7 +373,7 @@ class ModelStoreRepository(private val context: Context) { models.add( HuggingFaceModel( - id = "${repo.id}-${fileName.removeSuffix(".gguf")}", + id = "${repo.id}-${stripGgufSuffix(fileName)}", name = "${repo.name} - $quantType", description = "${repo.name} model with $quantType quantization", fileUri = "${repo.repoPath}/resolve/main/${file.path}", @@ -372,4 +402,4 @@ class ModelStoreRepository(private val context: Context) { return models } -} \ No newline at end of file +} diff --git a/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt b/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt index e7b6f414..6b9e48fa 100644 --- a/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt +++ b/app/src/main/java/com/dark/tool_neuron/service/ModelDownloadService.kt @@ -17,6 +17,9 @@ import com.dark.tool_neuron.global.HardwareScanner import com.dark.tool_neuron.models.engine_schema.GgufEngineSchema import com.dark.tool_neuron.models.enums.PathType import com.dark.tool_neuron.models.enums.ProviderType +import com.dark.tool_neuron.network.HuggingFaceClient +import com.dark.tool_neuron.network.HuggingFaceFileResponse +import com.dark.tool_neuron.repo.ModelStoreRepository import com.dark.tool_neuron.models.table_schema.Model import com.dark.tool_neuron.models.table_schema.ModelConfig import com.dark.tool_neuron.worker.DiffusionConfig @@ -229,12 +232,17 @@ class ModelDownloadService : Service() { AppPaths.models(applicationContext).mkdirs() val targetFile = AppPaths.modelFile(applicationContext, modelId) + val projectorFile = AppPaths.modelProjectorFile(applicationContext, modelId) if (targetFile.exists()) { targetFile.delete() } + if (projectorFile.exists()) { + projectorFile.delete() + } tempFile?.copyTo(targetFile, overwrite = true) + downloadProjectorSidecarIfPresent(fileUrl, modelId, modelName, notificationId) updateDownloadState(modelId, DownloadState.Processing(modelId)) updateNotification(modelName, 0f, notificationId, isProcessing = true) @@ -361,6 +369,105 @@ class ModelDownloadService : Service() { downloadJobs[modelId] = job } + private data class HuggingFaceResolvedFile( + val repoPath: String, + val filePath: String + ) + + private fun parseHuggingFaceResolvedFile(url: String): HuggingFaceResolvedFile? { + val prefix = "https://huggingface.co/" + val marker = "/resolve/main/" + if (!url.startsWith(prefix)) return null + + val path = url.removePrefix(prefix) + val markerIndex = path.indexOf(marker) + if (markerIndex < 0) return null + + val repoPath = path.substring(0, markerIndex) + val filePath = path.substring(markerIndex + marker.length) + if (repoPath.isBlank() || filePath.isBlank()) return null + + return HuggingFaceResolvedFile(repoPath = repoPath, filePath = filePath) + } + + private fun selectProjectorSidecar( + files: List, + mainFilePath: String + ): HuggingFaceFileResponse? { + val projectorFiles = files.filter { ModelStoreRepository.isProjectorGgufFile(it.path) } + if (projectorFiles.isEmpty()) return null + if (projectorFiles.size == 1) return projectorFiles.first() + + val mainDirectory = mainFilePath.substringBeforeLast("/", "") + val modelFamilyKey = ModelStoreRepository.extractModelFamilyKey(mainFilePath.substringAfterLast("/")) + + return projectorFiles + .map { file -> + var score = 0 + val candidateDirectory = file.path.substringBeforeLast("/", "") + if (mainDirectory.isNotEmpty() && candidateDirectory == mainDirectory) { + score += 4 + } + + val lowerPath = file.path.lowercase() + if (modelFamilyKey.isNotBlank() && lowerPath.contains(modelFamilyKey)) { + score += 3 + } + if (lowerPath.contains("mmproj") || lowerPath.contains("mmjproj")) { + score += 1 + } + file to score + } + .filter { (_, score) -> score > 0 } + .maxByOrNull { (_, score) -> score } + ?.first + } + + private suspend fun downloadProjectorSidecarIfPresent( + fileUrl: String, + modelId: String, + modelName: String, + notificationId: Int + ) { + val resolved = parseHuggingFaceResolvedFile(fileUrl) ?: return + val response = HuggingFaceClient.api.getRepoFiles(resolved.repoPath) + if (!response.isSuccessful) { + android.util.Log.w( + "ModelDownloadService", + "Failed to inspect repo files for projector sidecar: ${resolved.repoPath}" + ) + return + } + + val projectorFile = selectProjectorSidecar(response.body().orEmpty(), resolved.filePath) ?: return + val projectorUrl = "https://huggingface.co/${resolved.repoPath}/resolve/main/${projectorFile.path}" + val targetFile = AppPaths.modelProjectorFile(applicationContext, modelId) + targetFile.parentFile?.mkdirs() + + val tempFile = File( + AppPaths.tempDownloads(applicationContext, modelId), + "projector_${System.currentTimeMillis()}.tmp" + ) + + try { + downloadFile(projectorUrl, tempFile, modelId, "$modelName projector", notificationId) + tempFile.copyTo(targetFile, overwrite = true) + android.util.Log.i( + "ModelDownloadService", + "Downloaded projector sidecar ${projectorFile.path} for $modelId" + ) + } catch (e: kotlinx.coroutines.CancellationException) { + throw e + } catch (e: Exception) { + android.util.Log.w( + "ModelDownloadService", + "Projector sidecar download failed for $modelId from ${projectorFile.path}: ${e.message}" + ) + } finally { + tempFile.delete() + } + } + private suspend fun downloadFile( url: String, destFile: File, modelId: String, modelName: String, notificationId: Int ) = withContext(Dispatchers.IO) { @@ -728,4 +835,4 @@ class ModelDownloadService : Service() { super.onDestroy() serviceScope.cancel() } -} \ No newline at end of file +} diff --git a/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt b/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt index f36174ff..e09cdb9d 100644 --- a/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt +++ b/app/src/main/java/com/dark/tool_neuron/ui/components/ActionButtons.kt @@ -72,6 +72,7 @@ fun ActionButton( icon: Int, contentDescription: String = "Description", modifier: Modifier = Modifier, + enabled: Boolean = true, shape: Shape = MaterialShapes.Square.toShape(), colors: IconButtonColors = IconButtonDefaults.filledIconButtonColors( containerColor = MaterialTheme.colorScheme.primary.copy(0.06f), @@ -80,6 +81,7 @@ fun ActionButton( ) { FilledIconButton( onClick = { onClickListener() }, + enabled = enabled, colors = colors, shape = shape, modifier = modifier.size(Standards.ActionIconSize) @@ -143,6 +145,7 @@ fun ActionButton( icon: ImageVector, contentDescription: String = "Description", modifier: Modifier = Modifier, + enabled: Boolean = true, shape: Shape = MaterialShapes.Square.toShape(), colors: IconButtonColors = IconButtonDefaults.filledIconButtonColors( containerColor = MaterialTheme.colorScheme.primary.copy(0.06f), @@ -151,6 +154,7 @@ fun ActionButton( ) { FilledIconButton( onClick = { onClickListener() }, + enabled = enabled, colors = colors, shape = shape, modifier = modifier.size(Standards.ActionIconSize) diff --git a/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt b/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt index 663986a4..fd9c8d58 100644 --- a/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt +++ b/app/src/main/java/com/dark/tool_neuron/ui/icons/TnIcons.kt @@ -145,6 +145,7 @@ object TnIcons { val Prompt by lazy { tabler("M8 9h8", "M8 13h6", "M9 18h-3a3 3 0 0 1 -3 -3v-8a3 3 0 0 1 3 -3h12a3 3 0 0 1 3 3v8a3 3 0 0 1 -3 3h-3l-3 3l-3 -3") } // ── Audio ── + val Microphone by lazy { tabler("M9 5a3 3 0 0 1 6 0v5a3 3 0 0 1 -6 0z", "M5 10a7 7 0 0 0 14 0", "M8 21h8", "M12 17v4") } val Volume by lazy { tabler("M15 8a5 5 0 0 1 0 8", "M17.7 5a9 9 0 0 1 0 14", "M6 15h-2a1 1 0 0 1 -1 -1v-4a1 1 0 0 1 1 -1h2l3.5 -4.5a.8 .8 0 0 1 1.5 .5v14a.8 .8 0 0 1 -1.5 .5l-3.5 -4.5") } // ── Misc ── diff --git a/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt b/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt index db83c76c..5b355913 100644 --- a/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt +++ b/app/src/main/java/com/dark/tool_neuron/ui/screen/home/HomeBottomBar.kt @@ -1,6 +1,11 @@ package com.dark.tool_neuron.ui.screen.home +import android.Manifest import android.content.Intent +import android.content.pm.PackageManager +import android.os.SystemClock +import androidx.activity.compose.rememberLauncherForActivityResult +import androidx.activity.result.contract.ActivityResultContracts import androidx.compose.animation.AnimatedVisibility import androidx.compose.foundation.background import androidx.compose.foundation.layout.Arrangement @@ -28,7 +33,9 @@ import androidx.compose.material3.TextField import androidx.compose.material3.TextFieldDefaults import androidx.compose.material3.toShape import androidx.compose.runtime.Composable +import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.getValue +import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember import androidx.compose.runtime.rememberCoroutineScope @@ -39,9 +46,12 @@ import androidx.compose.ui.graphics.Color import androidx.compose.ui.graphics.compositeOver import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.unit.dp +import androidx.core.content.ContextCompat import androidx.hilt.navigation.compose.hiltViewModel import androidx.lifecycle.compose.collectAsStateWithLifecycle import com.dark.tool_neuron.activity.RagActivity +import com.dark.tool_neuron.audio.ChatAudioRecorder +import com.dark.tool_neuron.audio.RecordedAudioClip import com.dark.tool_neuron.global.Standards import com.dark.tool_neuron.models.ModelType import com.dark.tool_neuron.ui.components.ActionButton @@ -59,6 +69,9 @@ import com.dark.tool_neuron.viewmodel.LLMModelViewModel import com.dark.tool_neuron.viewmodel.MemoryViewModel import com.dark.tool_neuron.viewmodel.PluginViewModel import com.dark.tool_neuron.viewmodel.RagViewModel +import java.io.IOException +import java.util.Locale +import kotlinx.coroutines.delay import kotlinx.coroutines.launch // ── BottomBar ─────────────────────────────────────────────────────────────────── @@ -109,7 +122,122 @@ internal fun BottomBar( // Coroutine scope for RAG queries val scope = rememberCoroutineScope() + val audioRecorder = remember(context.applicationContext) { + ChatAudioRecorder(context.applicationContext) + } + var stagedRecording by remember { mutableStateOf(null) } + var isMicRecording by remember { mutableStateOf(false) } + var recordingStartedAtMs by remember { mutableStateOf(null) } + var recordingElapsedMs by remember { mutableStateOf(0L) } + var micErrorMessage by remember { mutableStateOf(null) } + + val discardStagedRecording = { + stagedRecording?.let { audioRecorder.deleteClip(it.file) } + stagedRecording = null + } + val clearAudioCaptureState = { + if (isMicRecording) { + audioRecorder.cancelRecording() + } + isMicRecording = false + recordingStartedAtMs = null + recordingElapsedMs = 0L + discardStagedRecording() + micErrorMessage = null + } + val startMicrophoneRecording = { + discardStagedRecording() + micErrorMessage = null + try { + audioRecorder.startRecording() + isMicRecording = true + recordingStartedAtMs = SystemClock.elapsedRealtime() + recordingElapsedMs = 0L + } catch (e: IOException) { + micErrorMessage = e.message ?: "Unable to start microphone recording" + } catch (e: IllegalStateException) { + micErrorMessage = e.message ?: "Unable to start microphone recording" + } catch (e: SecurityException) { + micErrorMessage = e.message ?: "Microphone permission denied" + } + } + val stopMicrophoneRecording = { + try { + val clip = audioRecorder.stopRecording() + discardStagedRecording() + stagedRecording = clip + isMicRecording = false + recordingStartedAtMs = null + recordingElapsedMs = clip.durationMillis + micErrorMessage = null + } catch (e: IllegalStateException) { + isMicRecording = false + recordingStartedAtMs = null + recordingElapsedMs = 0L + micErrorMessage = e.message ?: "Unable to finalize microphone recording" + } + } + + val microphonePermissionLauncher = rememberLauncherForActivityResult( + contract = ActivityResultContracts.RequestPermission() + ) { isGranted -> + if (isGranted) { + startMicrophoneRecording() + } else { + micErrorMessage = "Microphone permission is required to record audio." + } + } + + // File imports remain the fallback path, while mic capture stages a temporary clip for review + // before both routes converge on ChatViewModel.sendChatWithAudio(...). + val audioLauncher = rememberLauncherForActivityResult( + contract = ActivityResultContracts.GetContent() + ) { uri -> + if (uri != null) { + discardStagedRecording() + micErrorMessage = null + val audioPrompt = value.ifBlank { "Transcribe this audio." } + chatViewModel.sendChatWithAudio(audioPrompt, context, uri) + value = "" + } + } + DisposableEffect(audioRecorder) { + onDispose { + audioRecorder.release() + stagedRecording?.let { audioRecorder.deleteClip(it.file) } + } + } + + LaunchedEffect(isMicRecording, recordingStartedAtMs) { + if (!isMicRecording || recordingStartedAtMs == null) { + return@LaunchedEffect + } + + while (isMicRecording) { + val startedAt = recordingStartedAtMs ?: break + recordingElapsedMs = SystemClock.elapsedRealtime() - startedAt + delay(250L) + } + } + + LaunchedEffect(chatState.generationType) { + if (chatState.generationType != ModelType.AUDIO_GENERATION) { + clearAudioCaptureState() + } + } + + val audioStatusMessage = when { + micErrorMessage != null -> micErrorMessage + isMicRecording -> "Recording microphone - ${formatAudioDuration(recordingElapsedMs)}" + stagedRecording != null -> "Recorded clip ready to send - ${formatAudioDuration(stagedRecording!!.durationMillis)}" + else -> null + } + val canSendCurrentInput = when (chatState.generationType) { + ModelType.TEXT_GENERATION -> value.isNotBlank() + ModelType.IMAGE_GENERATION -> value.isNotBlank() + ModelType.AUDIO_GENERATION -> stagedRecording != null && !isMicRecording + } // More Options overlay state var showMoreOptions by remember { mutableStateOf(false) } @@ -266,6 +394,28 @@ internal fun BottomBar( onWebSearchChipClick = { pluginViewModel.toggleWebSearch(false) } ) + AnimatedVisibility( + visible = chatState.generationType == ModelType.AUDIO_GENERATION && + audioStatusMessage != null + ) { + Row( + modifier = Modifier + .fillMaxWidth() + .padding(top = Standards.SpacingXs, start = Standards.SpacingMd), + verticalAlignment = Alignment.CenterVertically + ) { + Text( + text = audioStatusMessage.orEmpty(), + color = if (micErrorMessage != null) { + MaterialTheme.colorScheme.error + } else { + MaterialTheme.colorScheme.onSurfaceVariant + }, + style = MaterialTheme.typography.bodySmall + ) + } + } + // More Options overlay (above action row, like model list) MoreOptionsOverlay( show = showMoreOptions, @@ -349,6 +499,58 @@ internal fun BottomBar( Spacer(Modifier.weight(1f)) + if (chatState.generationType == ModelType.AUDIO_GENERATION) { + ActionButton( + onClickListener = { + micErrorMessage = null + audioLauncher.launch("audio/*") + }, + enabled = !chatState.isGenerating && !isMicRecording, + icon = TnIcons.FileUpload, + modifier = Modifier.padding(end = Standards.SpacingXs) + ) + + ActionButton( + onClickListener = { + if (isMicRecording) { + stopMicrophoneRecording() + } else if ( + ContextCompat.checkSelfPermission( + context, + Manifest.permission.RECORD_AUDIO + ) == PackageManager.PERMISSION_GRANTED + ) { + startMicrophoneRecording() + } else { + microphonePermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) + } + }, + enabled = !chatState.isGenerating, + icon = if (isMicRecording) TnIcons.PlayerStop else TnIcons.Microphone, + modifier = Modifier.padding(end = Standards.SpacingXs), + colors = if (isMicRecording) { + IconButtonDefaults.filledIconButtonColors( + containerColor = MaterialTheme.colorScheme.error.copy(0.18f), + contentColor = MaterialTheme.colorScheme.error + ) + } else { + IconButtonDefaults.filledIconButtonColors( + containerColor = MaterialTheme.colorScheme.primary.copy(0.06f), + contentColor = MaterialTheme.colorScheme.primary + ) + } + ) + + if (isMicRecording || stagedRecording != null) { + ActionButton( + onClickListener = { clearAudioCaptureState() }, + enabled = !chatState.isGenerating, + icon = TnIcons.X, + modifier = Modifier.padding(end = Standards.SpacingXs) + ) + } + } + // 6. Send/Stop when (chatState.isGenerating) { true -> { @@ -367,39 +569,54 @@ internal fun BottomBar( false -> { ActionButton( onClickListener = { - if (value.isNotBlank()) { - // Close overlays on send - showMoreOptions = false - when (chatState.generationType) { - ModelType.TEXT_GENERATION -> { - val hasRags = loadedRags.isNotEmpty() && isRagEnabledForChat - - if (hasRags) { - val userQuery = value - value = "" - scope.launch { - val ragContext = ragViewModel.queryAndStoreResults(userQuery) - chatViewModel.setRagContext( - ragContext.ifBlank { null }, - ragViewModel.lastRagResults.value - ) - chatViewModel.sendTextMessage(userQuery) - } - } else { - chatViewModel.clearRagContext() - chatViewModel.sendTextMessage(value) - value = "" + // Close overlays on send + showMoreOptions = false + when (chatState.generationType) { + ModelType.TEXT_GENERATION -> { + val hasRags = loadedRags.isNotEmpty() && isRagEnabledForChat + + if (hasRags) { + val userQuery = value + value = "" + scope.launch { + val ragContext = ragViewModel.queryAndStoreResults(userQuery) + chatViewModel.setRagContext( + ragContext.ifBlank { null }, + ragViewModel.lastRagResults.value + ) + chatViewModel.sendTextMessage(userQuery) } + } else { + chatViewModel.clearRagContext() + chatViewModel.sendTextMessage(value) + value = "" } + } - ModelType.IMAGE_GENERATION -> { - chatViewModel.sendImageRequest(value) - value = "" + ModelType.IMAGE_GENERATION -> { + chatViewModel.sendImageRequest(value) + value = "" + } + + ModelType.AUDIO_GENERATION -> { + val clip = stagedRecording ?: return@ActionButton + val readinessError = chatViewModel.getAudioGenerationReadinessError() + if (readinessError != null) { + micErrorMessage = readinessError + return@ActionButton } - ModelType.AUDIO_GENERATION -> {} + + val audioPrompt = value.ifBlank { "Transcribe this audio." } + chatViewModel.sendChatWithAudio(audioPrompt, clip.file) + stagedRecording = null + recordingStartedAtMs = null + recordingElapsedMs = 0L + micErrorMessage = null + value = "" } } }, + enabled = canSendCurrentInput, icon = TnIcons.Send, shape = MaterialShapes.Ghostish.toShape(), modifier = Modifier.padding(end = Standards.SpacingMd), @@ -415,3 +632,10 @@ internal fun BottomBar( } } } + +private fun formatAudioDuration(durationMillis: Long): String { + val totalSeconds = (durationMillis / 1000L).toInt() + val minutes = totalSeconds / 60 + val seconds = totalSeconds % 60 + return String.format(Locale.US, "%02d:%02d", minutes, seconds) +} diff --git a/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt b/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt index a35d16a9..ee71a6a7 100644 --- a/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt +++ b/app/src/main/java/com/dark/tool_neuron/viewmodel/ChatViewModel.kt @@ -2,6 +2,7 @@ package com.dark.tool_neuron.viewmodel import android.content.Context import android.graphics.Bitmap +import android.net.Uri import android.util.Log import android.widget.Toast import androidx.compose.runtime.mutableStateListOf @@ -37,6 +38,8 @@ import com.dark.gguf_lib.toolcalling.ToolCallingConfig import dagger.hilt.android.lifecycle.HiltViewModel import dagger.hilt.android.qualifiers.ApplicationContext import javax.inject.Inject +import java.io.File +import java.io.IOException import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.SharingStarted @@ -452,21 +455,85 @@ class ChatViewModel @Inject constructor( fun sendTextMessage(prompt: String) = sendChat(prompt) /** - * Send a message with images (VLM). Requires a VLM projector to be loaded. + * Send a message with projector-backed media. Requires a compatible projector to be loaded. * @param prompt User's text prompt * @param imageData List of raw image file bytes (JPEG/PNG) */ fun sendChatWithImages(prompt: String, imageData: List) { - if (!LlmModelWorker.isGgufModelLoaded.value) { - reportError("Please load a text generation model first") - return + sendChatWithMedia(prompt, imageData) + } + + fun getAudioGenerationReadinessError(): String? = getMediaGenerationReadinessError() + + fun sendChatWithAudio(prompt: String, context: Context, audioUri: Uri) { + sendChatWithLoadedAudio( + prompt = prompt, + sourceLabel = "selected audio input" + ) { + context.contentResolver.openInputStream(audioUri)?.use { input -> + input.readBytes() + } ?: throw IllegalStateException("Failed to read the selected audio file") } - if (!LlmModelWorker.isVlmLoaded.value) { - reportError("Please load a vision projector (mmproj) first") - return + } + + fun sendChatWithAudio(prompt: String, audioFile: File) { + sendChatWithLoadedAudio( + prompt = prompt, + sourceLabel = "recorded audio input", + cleanup = { + if (audioFile.exists() && !audioFile.delete()) { + Log.w(TAG, "Failed to delete temporary recorded audio: ${audioFile.absolutePath}") + } + } + ) { + if (!audioFile.exists()) { + throw IllegalStateException("Recorded audio file is missing") + } + audioFile.readBytes() } + } + + fun sendChatWithAudio(prompt: String, audioData: ByteArray) { + // Keep every audio source funneled through the same byte-array path so file import, mic + // recording, and any future capture UX reuse the exact same chat/generation logic. + val effectivePrompt = prompt.ifBlank { "Transcribe this audio." } + sendChatWithMedia(effectivePrompt, listOf(audioData)) + } + + private fun sendChatWithLoadedAudio( + prompt: String, + sourceLabel: String, + cleanup: suspend () -> Unit = {}, + loadAudio: suspend () -> ByteArray + ) { if (_isGenerating.value) return + viewModelScope.launch { + try { + val audioBytes = withContext(Dispatchers.IO) { loadAudio() } + sendChatWithAudio(prompt, audioBytes) + } catch (e: IOException) { + Log.e(TAG, "Failed to load $sourceLabel", e) + reportError(e.message) + } catch (e: SecurityException) { + Log.e(TAG, "Permission denied while loading $sourceLabel", e) + reportError(e.message) + } catch (e: IllegalStateException) { + Log.e(TAG, "Failed to load $sourceLabel", e) + reportError(e.message) + } finally { + withContext(Dispatchers.IO) { cleanup() } + } + } + } + + private fun sendChatWithMedia(prompt: String, mediaData: List) { + val readinessError = getMediaGenerationReadinessError() + if (readinessError != null) { + reportError(readinessError) + return + } + _isGenerating.value = true _streamingUserMessage.value = prompt _streamingAssistantMessage.value = "" @@ -490,10 +557,10 @@ class ChatViewModel @Inject constructor( val maxTokens = getCurrentModelMaxTokens() val isNewChat = isNewConversation - // Insert image marker into prompt for VLM + // Insert the default media marker into the prompt for projector-backed generation. val marker = LlmModelWorker.getVlmDefaultMarker() val vlmPrompt = if (prompt.contains(marker)) prompt - else marker.repeat(imageData.size) + "\n" + prompt + else marker.repeat(mediaData.size) + "\n" + prompt val conversationMessages = buildConversationMessages(vlmPrompt) val jsonArray = JSONArray(conversationMessages) @@ -504,7 +571,7 @@ class ChatViewModel @Inject constructor( var lastEmitTime = 0L LlmModelWorker.vlmGenerateStreaming( - jsonArray.toString(), imageData, maxTokens + jsonArray.toString(), mediaData, maxTokens ).collect { event -> when (event) { is GenerationEvent.Token -> { @@ -564,6 +631,19 @@ class ChatViewModel @Inject constructor( } } + private fun getMediaGenerationReadinessError(): String? { + if (!LlmModelWorker.isGgufModelLoaded.value) { + return "Please load a text generation model first" + } + if (!LlmModelWorker.isVlmLoaded.value) { + return "Please load a compatible projector sidecar (mmproj/mmjproj) first" + } + if (_isGenerating.value) { + return "Please wait for the current generation to finish" + } + return null + } + /** * Regenerate the last assistant response. * Removes the last assistant message and re-sends the last user prompt. @@ -1920,11 +2000,11 @@ class ChatViewModel @Inject constructor( // 2. Stop native generation (synchronous signal to engine) when (_currentGenerationType.value) { - ModelType.TEXT_GENERATION -> { + ModelType.TEXT_GENERATION, + ModelType.AUDIO_GENERATION -> { LlmModelWorker.ggufStopGeneration() } ModelType.IMAGE_GENERATION -> LlmModelWorker.stopDiffusionGeneration() - ModelType.AUDIO_GENERATION -> stopTTS() } // 3. Cancel the coroutine job (triggers finally → resetStreamingState) diff --git a/app/src/main/java/com/dark/tool_neuron/viewmodel/LLMModelViewModel.kt b/app/src/main/java/com/dark/tool_neuron/viewmodel/LLMModelViewModel.kt index d7825897..0e2a7d9a 100644 --- a/app/src/main/java/com/dark/tool_neuron/viewmodel/LLMModelViewModel.kt +++ b/app/src/main/java/com/dark/tool_neuron/viewmodel/LLMModelViewModel.kt @@ -204,6 +204,7 @@ class LLMModelViewModel @Inject constructor( // TODO: re-enable once native engine position tracking is fixed LlmModelWorker.setSpeculativeDecodingGguf(false) LlmModelWorker.warmUpGguf() + maybeLoadProjectorSidecar(model) } catch (e: Exception) { android.util.Log.w("LLMModelVM", "Optimization wiring failed: ${e.message}") } @@ -217,6 +218,19 @@ class LLMModelViewModel @Inject constructor( } } + private fun maybeLoadProjectorSidecar(model: Model) { + val projectorFile = AppPaths.modelProjectorFile(getApplication(), model.id) + if (!projectorFile.exists()) { + LlmModelWorker.releaseVlmProjector() + return + } + + val loaded = LlmModelWorker.loadVlmProjector(projectorFile.absolutePath) + if (!loaded) { + AppStateManager.setError("Loaded ${model.modelName}, but failed to load its projector sidecar") + } + } + private suspend fun loadDiffusionModel(model: Model, config: ModelConfig) { val diffusionConfig = DiffusionConfig.fromJson(config.modelLoadingParams) @@ -250,6 +264,7 @@ class LLMModelViewModel @Inject constructor( when (_currentModelType.value) { ProviderType.GGUF -> { LlmModelWorker.unloadGgufModel() + LlmModelWorker.releaseVlmProjector() LlmModelWorker.setCurrentGgufModelId(null) } ProviderType.DIFFUSION -> { @@ -274,6 +289,7 @@ class LLMModelViewModel @Inject constructor( when (_currentModelType.value) { ProviderType.GGUF -> { LlmModelWorker.unloadGgufModel() + LlmModelWorker.releaseVlmProjector() LlmModelWorker.setCurrentGgufModelId(null) } ProviderType.DIFFUSION -> { @@ -359,4 +375,4 @@ class LLMModelViewModel @Inject constructor( } } } -} \ No newline at end of file +} diff --git a/app/src/main/java/com/dark/tool_neuron/viewmodel/ModelStoreViewModel.kt b/app/src/main/java/com/dark/tool_neuron/viewmodel/ModelStoreViewModel.kt index c99dbc82..5366afa2 100644 --- a/app/src/main/java/com/dark/tool_neuron/viewmodel/ModelStoreViewModel.kt +++ b/app/src/main/java/com/dark/tool_neuron/viewmodel/ModelStoreViewModel.kt @@ -11,6 +11,7 @@ import com.dark.tool_neuron.models.data.HFModelRepository import com.dark.tool_neuron.models.data.HuggingFaceModel import com.dark.tool_neuron.models.data.ModelCategory import com.dark.tool_neuron.models.data.ModelType +import com.dark.tool_neuron.models.enums.ProviderType import com.dark.tool_neuron.models.table_schema.Model import com.dark.tool_neuron.models.table_schema.ModelConfig import com.dark.tool_neuron.repo.HuggingFaceExplorerRepo @@ -474,6 +475,14 @@ class ModelStoreViewModel @Inject constructor( } } + if (model.providerType == ProviderType.GGUF) { + val projectorFile = AppPaths.modelProjectorFile(getApplication(), model.id) + if (projectorFile.exists()) { + val deleted = projectorFile.delete() + Log.d("ModelStoreViewModel", "Projector sidecar deleted: $deleted - ${projectorFile.absolutePath}") + } + } + // Delete config from database val config = systemRepo.getConfigByModelId(model.id) if (config != null) { diff --git a/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt b/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt new file mode 100644 index 00000000..f44f005e --- /dev/null +++ b/app/src/test/java/com/dark/tool_neuron/repo/ModelStoreRepositoryTest.kt @@ -0,0 +1,65 @@ +package com.dark.tool_neuron.repo + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertTrue +import org.junit.Test + +class ModelStoreRepositoryTest { + + @Test + fun supportedGgufFileAcceptsUppercaseExtension() { + assertTrue(ModelStoreRepository.isSupportedGgufFile("models/Whisper-EN-Small.Q5_K_M.GGUF")) + } + + @Test + fun supportedGgufFileAcceptsMixedCaseExtension() { + assertTrue(ModelStoreRepository.isSupportedGgufFile("models/Whisper-EN-Small.Q5_K_M.GgUf")) + assertTrue(ModelStoreRepository.isSupportedGgufFile("models/whisper-en-small.q5_k_m.gguf")) + } + + @Test + fun supportedGgufFileRejectsProjectionArtifacts() { + assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-mmproj.Q4_K_M.GGUF")) + assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-mmjproj.Q4_K_M.GGUF")) + assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-vision-adapter.gguf")) + assertFalse(ModelStoreRepository.isSupportedGgufFile("models/whisper-projector.gguf")) + } + + @Test + fun projectorGgufFileAcceptsMmjprojAliasCaseInsensitively() { + assertTrue(ModelStoreRepository.isProjectorGgufFile("models/whisper-mmjproj.q4_k_m.gguf")) + assertTrue(ModelStoreRepository.isProjectorGgufFile("models/Whisper-MMJPROJ.Q4_K_M.GGUF")) + } + + @Test + fun stripGgufSuffixRemovesExtensionCaseInsensitively() { + assertEquals( + "Whisper-EN-Small.Q5_K_M", + ModelStoreRepository.stripGgufSuffix("Whisper-EN-Small.Q5_K_M.GGUF") + ) + } + + @Test + fun extractQuantTypeStripsSuffixBeforeReadingLastSegment() { + assertEquals("Q5_K_M", ModelStoreRepository.extractQuantType("Whisper-EN-Small.Q5_K_M.GGUF")) + assertEquals("Q5_K_M", ModelStoreRepository.extractQuantType("whisper-en-small.q5_k_m.gguf")) + assertEquals("MODEL", ModelStoreRepository.extractQuantType("model.GGUF")) + } + + @Test + fun extractQuantTypeIgnoresTrailingDescriptorSuffixes() { + assertEquals( + "Q4_K_M", + ModelStoreRepository.extractQuantType("Whisper-EN-Small-Q4_K_M-hip-optimized.gguf") + ) + } + + @Test + fun extractModelFamilyKeyRemovesTrailingQuantizationDetails() { + assertEquals( + "whisper-en-small", + ModelStoreRepository.extractModelFamilyKey("Whisper-EN-Small-Q4_K_M-hip-optimized.gguf") + ) + } +}