Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-and-notarize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
- name: Install Linux build and packaging dependencies
run: |
sudo apt-get update
sudo apt-get install -y rpm libx11-dev libxtst-dev
sudo apt-get install -y rpm libx11-dev libxtst-dev libatspi2.0-dev libglib2.0-dev

- name: Build Application
run: npm run build:linux -- --publish never
Expand Down
72 changes: 72 additions & 0 deletions .github/workflows/build-windows-text-monitor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: Build Windows Text Monitor

on:
push:
paths:
- 'resources/windows-text-monitor.c'
- '.github/workflows/build-windows-text-monitor.yml'
branches:
- main
workflow_dispatch:
inputs:
version:
description: 'Version tag (e.g., 1.0.0)'
required: false
default: '1.0.0'
type: string

permissions:
contents: write

jobs:
build:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4

- name: Setup MSVC
uses: microsoft/setup-msbuild@v2

- name: Setup MSVC environment
uses: ilammy/msvc-dev-cmd@v1

- name: Compile Windows Text Monitor
run: |
cl /O2 /nologo resources/windows-text-monitor.c /Fe:windows-text-monitor.exe ole32.lib oleaut32.lib
shell: cmd

- name: Verify binary
run: |
if (Test-Path "windows-text-monitor.exe") {
Write-Host "Binary built successfully"
Get-Item "windows-text-monitor.exe" | Select-Object Name, Length
} else {
Write-Error "Binary not found"
exit 1
}
shell: pwsh

- name: Create zip archive
run: |
Compress-Archive -Path windows-text-monitor.exe -DestinationPath windows-text-monitor-win32-x64.zip
shell: pwsh

- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: windows-text-monitor
path: windows-text-monitor-win32-x64.zip

- name: Create or update release
uses: softprops/action-gh-release@v2
with:
tag_name: windows-text-monitor-v${{ inputs.version || '1.0.0' }}
name: Windows Text Monitor v${{ inputs.version || '1.0.0' }}
body: |
Prebuilt Windows text monitor binary for auto-learn correction monitoring.

This binary uses Windows UI Automation to detect text field changes after paste.
files: windows-text-monitor-win32-x64.zip
make_latest: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 2 additions & 0 deletions electron-builder.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
"resources/bin/macos-globe-listener",
"resources/bin/macos-fast-paste",
"resources/bin/linux-fast-paste",
"resources/bin/linux-text-monitor",
{
"from": "resources/bin/",
"to": "bin/",
Expand All @@ -97,6 +98,7 @@
"llama-server-*",
"sherpa-onnx-*",
"windows-key-listener*",
"windows-text-monitor*",
"windows-fast-paste*",
"*.dylib",
"*.dll",
Expand Down
16 changes: 15 additions & 1 deletion main.js
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ const UpdateManager = require("./src/updater");
const GlobeKeyManager = require("./src/helpers/globeKeyManager");
const DevServerManager = require("./src/helpers/devServerManager");
const WindowsKeyManager = require("./src/helpers/windowsKeyManager");
const TextEditMonitor = require("./src/helpers/textEditMonitor");
const WhisperCudaManager = require("./src/helpers/whisperCudaManager");
const { i18nMain, changeLanguage } = require("./src/helpers/i18nMain");

Expand All @@ -180,7 +181,9 @@ let trayManager = null;
let updateManager = null;
let globeKeyManager = null;
let windowsKeyManager = null;
let textEditMonitor = null;
let whisperCudaManager = null;
let ipcHandlers = null;
let globeKeyAlertShown = false;
let authBridgeServer = null;

Expand Down Expand Up @@ -245,9 +248,11 @@ function initializeCoreManagers() {
parakeetManager = new ParakeetManager();
updateManager = new UpdateManager();
windowsKeyManager = new WindowsKeyManager();
textEditMonitor = new TextEditMonitor();
windowManager.textEditMonitor = textEditMonitor;

// IPC handlers must be registered before window content loads
new IPCHandlers({
ipcHandlers = new IPCHandlers({
environmentManager,
databaseManager,
clipboardManager,
Expand All @@ -256,6 +261,7 @@ function initializeCoreManagers() {
windowManager,
updateManager,
windowsKeyManager,
textEditMonitor,
whisperCudaManager,
getTrayManager: () => trayManager,
});
Expand Down Expand Up @@ -551,6 +557,8 @@ async function startApp() {
// Handle dictation if Globe is the current hotkey
if (currentHotkey === "GLOBE") {
if (mainWindowLive) {
// Capture target app PID BEFORE showing the overlay
if (textEditMonitor) textEditMonitor.captureTargetPid();
const activationMode = windowManager.getActivationMode();
if (activationMode === "push") {
const now = Date.now();
Expand Down Expand Up @@ -916,6 +924,12 @@ if (gotSingleInstanceLock) {
if (windowsKeyManager) {
windowsKeyManager.stop();
}
if (ipcHandlers) {
ipcHandlers._cleanupTextEditMonitor();
}
if (textEditMonitor) {
textEditMonitor.stopMonitoring();
}
if (updateManager) {
updateManager.cleanup();
}
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"compile:globe": "node scripts/build-globe-listener.js",
"compile:fast-paste": "node scripts/build-macos-fast-paste.js",
"compile:winkeys": "node scripts/build-windows-key-listener.js",
"compile:text-monitor": "node scripts/build-text-monitor.js",
"compile:winpaste": "node scripts/build-windows-fast-paste.js",
"compile:linux-paste": "node scripts/build-linux-fast-paste.js",
"compile:native": "npm run compile:globe && npm run compile:fast-paste && npm run compile:winkeys && npm run compile:winpaste && npm run compile:linux-paste",
"compile:native": "npm run compile:globe && npm run compile:fast-paste && npm run compile:winkeys && npm run compile:winpaste && npm run compile:linux-paste && npm run compile:text-monitor",
"prestart": "npm run compile:native",
"start": "electron .",
"predev": "npm run compile:native",
Expand Down
11 changes: 11 additions & 0 deletions preload.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ contextBridge.exposeInMainWorld("electronAPI", {
// Dictionary functions
getDictionary: () => ipcRenderer.invoke("db-get-dictionary"),
setDictionary: (words) => ipcRenderer.invoke("db-set-dictionary", words),
onDictionaryUpdated: (callback) => {
const listener = (_event, words) => callback?.(words);
ipcRenderer.on("dictionary-updated", listener);
return () => ipcRenderer.removeListener("dictionary-updated", listener);
},
setAutoLearnEnabled: (enabled) => ipcRenderer.send("auto-learn-changed", enabled),
onCorrectionsLearned: (callback) => {
const listener = (_event, words) => callback?.(words);
ipcRenderer.on("corrections-learned", listener);
return () => ipcRenderer.removeListener("corrections-learned", listener);
},

// Note functions
saveNote: (title, content, noteType, sourceFile, audioDuration, folderId) =>
Expand Down
203 changes: 203 additions & 0 deletions resources/linux-text-monitor.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
/**
* Linux Text Edit Monitor
*
* Uses AT-SPI2 to monitor the focused text field for value changes.
* Outputs "CHANGED:<value>" to stdout when the text changes.
* Exits after a timeout or on receiving a termination signal.
*
* Protocol (stdout):
* INITIAL_VALUE:<text> - Initial text field value
* CHANGED:<text> - Text field value after a change
* NO_ELEMENT - Could not get focused element
* NO_VALUE - Focused element has no text value
*
* Input (stdin):
* First line: original pasted text (informational)
*
* Compile:
* gcc -O2 linux-text-monitor.c -o linux-text-monitor $(pkg-config --cflags --libs atspi-2)
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <atspi/atspi.h>

#define TIMEOUT_SECONDS 30
#define POLL_INTERVAL_MS 500
#define MAX_OUTPUT_CHARS 10240

static volatile sig_atomic_t running = 1;

static void signal_handler(int sig) {
(void)sig;
running = 0;
}

static AtspiAccessible *find_focused(AtspiAccessible *accessible) {
GError *error = NULL;

AtspiStateSet *states = atspi_accessible_get_state_set(accessible);
if (states) {
if (atspi_state_set_contains(states, ATSPI_STATE_FOCUSED)) {
g_object_unref(states);
return g_object_ref(accessible);
}
g_object_unref(states);
}

int count = atspi_accessible_get_child_count(accessible, &error);
if (error) {
g_error_free(error);
return NULL;
}

for (int i = 0; i < count; i++) {
AtspiAccessible *child = atspi_accessible_get_child_at_index(accessible, i, &error);
if (error) {
g_error_free(error);
error = NULL;
continue;
}
if (!child) continue;

AtspiAccessible *result = find_focused(child);
g_object_unref(child);
if (result) return result;
}

return NULL;
}

static char *read_text_value(AtspiText *text_iface) {
GError *error = NULL;

int char_count = atspi_text_get_character_count(text_iface, &error);
if (error) {
g_error_free(error);
return NULL;
}
if (char_count <= 0) return NULL;

int limit = char_count < MAX_OUTPUT_CHARS ? char_count : MAX_OUTPUT_CHARS;
char *value = atspi_text_get_text(text_iface, 0, limit, &error);
if (error) {
g_error_free(error);
return NULL;
}

return value;
}

int main(void) {
signal(SIGTERM, signal_handler);
signal(SIGINT, signal_handler);

/* Read original text from stdin (consume but don't use) */
char stdin_buf[4096];
if (fgets(stdin_buf, sizeof(stdin_buf), stdin)) {
/* consumed */
}

int init_result = atspi_init();
if (init_result != 0 && init_result != 1) {
printf("NO_ELEMENT\n");
fflush(stdout);
return 1;
}

GError *error = NULL;
AtspiAccessible *desktop = atspi_get_desktop(0);
if (!desktop) {
printf("NO_ELEMENT\n");
fflush(stdout);
return 1;
}

/* Search for focused element across all applications */
AtspiAccessible *focused = NULL;
int app_count = atspi_accessible_get_child_count(desktop, &error);
if (error) {
g_error_free(error);
error = NULL;
app_count = 0;
}

for (int i = 0; i < app_count && !focused; i++) {
AtspiAccessible *app = atspi_accessible_get_child_at_index(desktop, i, &error);
if (error) {
g_error_free(error);
error = NULL;
continue;
}
if (!app) continue;

focused = find_focused(app);
g_object_unref(app);
}

g_object_unref(desktop);

if (!focused) {
printf("NO_ELEMENT\n");
fflush(stdout);
return 1;
}

/* Get the Text interface */
AtspiText *text_iface = atspi_accessible_get_text_iface(focused);
if (!text_iface) {
printf("NO_VALUE\n");
fflush(stdout);
g_object_unref(focused);
return 0;
}

/* Read initial value */
char *last_value = read_text_value(text_iface);
if (!last_value) {
printf("NO_VALUE\n");
fflush(stdout);
g_object_unref(text_iface);
g_object_unref(focused);
return 0;
}

printf("INITIAL_VALUE:%s\n", last_value);
fflush(stdout);

/* Poll for changes */
struct timespec start;
clock_gettime(CLOCK_MONOTONIC, &start);

while (running) {
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
long elapsed_ms = (now.tv_sec - start.tv_sec) * 1000 +
(now.tv_nsec - start.tv_nsec) / 1000000;
if (elapsed_ms >= TIMEOUT_SECONDS * 1000) break;

usleep(POLL_INTERVAL_MS * 1000);

char *current_value = read_text_value(text_iface);
if (!current_value) continue;

if (strcmp(current_value, last_value) != 0) {
printf("CHANGED:%s\n", current_value);
fflush(stdout);
g_free(last_value);
last_value = current_value;
} else {
g_free(current_value);
}
}

g_free(last_value);
g_object_unref(text_iface);
g_object_unref(focused);

return 0;
}
Loading