Skip to content

Commit 5ff5063

Browse files
committed
Address capture review documentation
1 parent f259b47 commit 5ff5063

4 files changed

Lines changed: 62 additions & 12 deletions

File tree

server/scripts/capture_events_schema.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,13 @@ CREATE INDEX IF NOT EXISTS events_data_gin_idx
153153

154154
COMMENT ON TABLE public.events IS
155155
'CodeChat dissertation capture events. Course, group, assignment, condition, and task context are joined during analysis from participant/date mappings.';
156+
COMMENT ON COLUMN public.events.event_id IS 'Opaque stable per-event ID for correlation and future deduplication; not used for event ordering.';
157+
COMMENT ON COLUMN public.events.sequence_number IS 'Client-local event order within one VS Code extension session, useful for ordering and detecting gaps.';
156158
COMMENT ON COLUMN public.events.user_id IS 'Pseudonymous participant UUID generated or supplied by the VS Code extension.';
157159
COMMENT ON COLUMN public.events.session_id IS 'Capture session UUID emitted by the VS Code extension.';
158160
COMMENT ON COLUMN public.events.file_hash IS 'SHA-256 hash of the local file path; raw local paths are not stored.';
159161
COMMENT ON COLUMN public.events."timestamp" IS 'Server receive/record timestamp in UTC.';
162+
COMMENT ON COLUMN public.events.client_tz_offset_min IS 'Client timezone offset in minutes, used with timestamp to derive local time of day without storing location or full timezone name.';
160163
COMMENT ON COLUMN public.events.data IS 'Event-specific JSON payload. Known telemetry metadata lives in typed columns.';
161164

162165
-- Least-privilege deployment guidance:
@@ -165,11 +168,13 @@ COMMENT ON COLUMN public.events.data IS 'Event-specific JSON payload. Known tele
165168
-- password/database/user names, a database administrator can grant only the
166169
-- permissions needed for capture inserts:
167170
--
171+
-- ```sql
168172
-- CREATE ROLE codechat_capture_writer LOGIN PASSWORD 'replace-with-secret';
169173
-- GRANT CONNECT ON DATABASE codechat_capture TO codechat_capture_writer;
170174
-- GRANT USAGE ON SCHEMA public TO codechat_capture_writer;
171175
-- GRANT INSERT ON public.events TO codechat_capture_writer;
172176
-- GRANT USAGE ON SEQUENCE public.events_id_seq TO codechat_capture_writer;
177+
-- ```
173178
--
174179
-- Do not grant SELECT, UPDATE, DELETE, CREATE, or ownership privileges to the
175180
-- writer account used in `capture_config.json`.

server/src/capture.rs

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,17 @@
4747
// * `user_id` – pseudonymous participant UUID. Course, group, assignment, and
4848
// study condition are intentionally joined later from researcher-managed
4949
// participant/date mappings instead of being configured by students.
50-
// * `session_id`, `event_id`, `sequence_number`, `schema_version` – event
51-
// integrity and versioning metadata.
50+
// * `event_id` – opaque stable per-event ID for correlation and future
51+
// deduplication across capture transports or retries.
52+
// * `sequence_number` – ordered, session-local event counter for reconstructing
53+
// event order and detecting gaps.
54+
// * `session_id`, `schema_version` – session grouping and payload versioning
55+
// metadata.
5256
// * `file_hash` – privacy-preserving SHA-256 hash of the local file path.
5357
// * `event_type` – coarse event type (see `CaptureEventType` below).
5458
// * `timestamp` – server receive/record timestamp (in UTC).
59+
// * `client_tz_offset_min` – browser/VS Code timezone offset used to derive
60+
// local time-of-day without storing location or full timezone identity.
5561
// * `data` – JSONB payload with event-specific details.
5662

5763
use std::{
@@ -396,8 +402,16 @@ impl CaptureStatus {
396402
#[derive(Debug, Clone)]
397403
pub struct CaptureEvent {
398404
/// Globally unique event identifier, generated by the client or server.
405+
///
406+
/// This is an opaque stable ID for correlation and possible future
407+
/// deduplication. It is not ordered; use `sequence_number` for event order
408+
/// within one extension session.
399409
pub event_id: Option<String>,
400410
/// Client-local event order for one extension session.
411+
///
412+
/// This is intentionally ordered so analysis can reconstruct event order and
413+
/// detect missing events. It is not globally unique; use `event_id` for
414+
/// stable event identity.
401415
pub sequence_number: Option<i64>,
402416
/// Capture payload schema version.
403417
pub schema_version: Option<i32>,
@@ -416,8 +430,29 @@ pub struct CaptureEvent {
416430
/// Server receive/record timestamp, in UTC.
417431
pub timestamp: DateTime<Utc>,
418432
/// Client timezone offset in minutes.
433+
///
434+
/// Combined with the server UTC timestamp, this supports local time-of-day
435+
/// analysis without collecting student location or a full timezone name.
419436
pub client_tz_offset_min: Option<i32>,
420-
/// Event-specific payload, stored as JSON text in the DB.
437+
/// Event-specific payload, stored as JSONB in the DB.
438+
///
439+
/// Known keys include:
440+
///
441+
/// * Capture/settings control: `capture_active`, `capture_control_only`,
442+
/// `changed_by`, `changed_settings`, previous/new consent and recording
443+
/// booleans.
444+
/// * Activity/session summaries: `mode`, `closed_by`, `duration_ms`,
445+
/// `duration_seconds`, `from`, `to`.
446+
/// * Save/run/compile metadata: `reason`, `lineCount`, `sessionName`,
447+
/// `sessionType`, `taskName`, `taskSource`, `processId`, `exitCode`.
448+
/// * Write classification: `source`, `classification_basis`, `diff`,
449+
/// `doc_block_diff`, `doc_block_count_before`, `doc_block_count_after`,
450+
/// `block_kind`, `basis`, `confidence`, `size_band`.
451+
/// * Paste evidence: `operation`, `pending_code_paste`.
452+
///
453+
/// Future keys should be documented here, tied to an analysis question, and
454+
/// privacy-reviewed before capture. Do not store raw source text or raw
455+
/// local file paths in this payload.
421456
pub data: serde_json::Value,
422457
}
423458

server/src/translation.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -510,10 +510,6 @@ impl CaptureContext {
510510
{
511511
self.active = active;
512512
}
513-
// Support older wire payloads that stored the session in `data`.
514-
if let Some(session_id) = data.get("session_id").and_then(serde_json::Value::as_str) {
515-
self.session_id = Some(session_id.to_string());
516-
}
517513
if wire.event_type == CaptureEventType::CodePaste
518514
&& data
519515
.get("pending_code_paste")
@@ -1922,7 +1918,7 @@ mod tests {
19221918
sequence_number: None,
19231919
schema_version: Some(2),
19241920
user_id: "participant".to_string(),
1925-
session_id: None,
1921+
session_id: Some("session".to_string()),
19261922
event_source: Some("vscode_extension".to_string()),
19271923
language_id: None,
19281924
file_hash: None,
@@ -1945,7 +1941,6 @@ mod tests {
19451941
context.update_from_wire(&capture_wire(
19461942
CaptureEventType::SessionStart,
19471943
serde_json::json!({
1948-
"session_id": "session",
19491944
"capture_active": true,
19501945
}),
19511946
));

server/src/webserver.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -437,10 +437,14 @@ pub struct Credentials {
437437
#[derive(Debug, Serialize, Deserialize, PartialEq, TS)]
438438
#[ts(export, optional_fields)]
439439
pub struct CaptureEventWire {
440-
/// Client-generated unique event identifier.
440+
/// Client-generated unique event identifier. Unlike `sequence_number`, this
441+
/// is an opaque stable ID for correlation and possible future deduplication
442+
/// across capture transports or retries.
441443
#[serde(skip_serializing_if = "Option::is_none")]
442444
pub event_id: Option<String>,
443-
/// Client-local event order for one extension session.
445+
/// Client-local event order for one extension session. Unlike `event_id`,
446+
/// this is intentionally ordered so analysis can reconstruct event order and
447+
/// detect gaps within a session.
444448
#[serde(skip_serializing_if = "Option::is_none")]
445449
pub sequence_number: Option<i64>,
446450
/// Capture payload schema version.
@@ -465,10 +469,21 @@ pub struct CaptureEventWire {
465469
pub event_type: CaptureEventType,
466470

467471
/// Optional client timezone offset in minutes (JS Date().getTimezoneOffset()).
472+
/// Combined with the server UTC timestamp, this allows local time-of-day
473+
/// analysis without storing the student's location or full timezone name.
468474
#[serde(skip_serializing_if = "Option::is_none")]
469475
pub client_tz_offset_min: Option<i32>,
470476

471-
/// Arbitrary event-specific data stored as JSON (optional).
477+
/// Event-specific data stored as JSON. Known keys include capture controls
478+
/// (`capture_active`, `capture_control_only`), activity/session details
479+
/// (`mode`, `closed_by`, `duration_ms`, `duration_seconds`, `from`, `to`),
480+
/// tool/run/build details (`reason`, `lineCount`, `sessionName`,
481+
/// `sessionType`, `taskName`, `taskSource`, `processId`, `exitCode`), write
482+
/// classification details (`source`, `classification_basis`, `diff`,
483+
/// `doc_block_diff`, `block_kind`, `basis`, `confidence`, `size_band`), and
484+
/// paste markers (`operation`, `pending_code_paste`). Add future keys only
485+
/// when they support a specific analysis question and do not store source
486+
/// text or raw local paths.
472487
#[serde(skip_serializing_if = "Option::is_none")]
473488
#[ts(type = "unknown")]
474489
pub data: Option<serde_json::Value>,

0 commit comments

Comments
 (0)