diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..26f7a25 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,134 @@ +name: Test PostgreSQL Nanoid Functions + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + + services: + postgres: + image: postgres:15-alpine + env: + POSTGRES_DB: nanoid_test + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Wait for PostgreSQL + run: | + until pg_isready -h localhost -p 5432 -U postgres; do + echo "Waiting for PostgreSQL..." + sleep 2 + done + + - name: Load nanoid functions + run: | + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -f nanoid.sql + + - name: Create test table + run: | + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -c " + CREATE TABLE IF NOT EXISTS customers ( + id SERIAL PRIMARY KEY, + public_id TEXT NOT NULL UNIQUE DEFAULT nanoid('cus_'), + name TEXT NOT NULL, + created_at TIMESTAMP DEFAULT NOW() + );" + + - name: Run basic functionality tests + run: | + echo "=== Running Basic Tests ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -f tests/basic_test.sql + + - name: Run dual function comprehensive tests + run: | + echo "=== Running Dual Function Tests ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -f tests/dual_function_test.sql + + - name: Run sortable function tests + run: | + echo "=== Running Sortable Function Tests ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -f tests/sortable_test.sql + + - name: Run parameter tests + run: | + echo "=== Running Parameter Tests ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -f tests/parameter_test.sql + + - name: Run performance benchmarks + run: | + echo "=== Running Performance Benchmarks ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -f tests/benchmark.sql + + - name: Test function security characteristics + run: | + echo "=== Testing Security Characteristics ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -c " + -- Test that regular nanoid() produces random order + WITH test_randoms AS ( + SELECT nanoid('test_') as id, generate_series as seq + FROM generate_series(1, 10) + ), + ordered_check AS ( + SELECT + id, + LAG(id) OVER (ORDER BY seq) < id as is_ordered + FROM test_randoms + ) + SELECT + COUNT(*) as total, + SUM(CASE WHEN is_ordered IS NULL OR NOT is_ordered THEN 1 ELSE 0 END) as non_ordered + FROM ordered_check; + + -- Test that sortable nanoid() produces time-ordered results + WITH test_sortable AS ( + SELECT nanoid_sortable('sort_') as id, pg_sleep(0.001), generate_series as seq + FROM generate_series(1, 5) + ), + sortable_check AS ( + SELECT + id, + LAG(id) OVER (ORDER BY seq) < id as is_ordered + FROM test_sortable + ) + SELECT + COUNT(*) as total, + SUM(CASE WHEN is_ordered IS NULL OR is_ordered THEN 1 ELSE 0 END) as correctly_sorted + FROM sortable_check; + " + + - name: Verify function availability + run: | + echo "=== Verifying Function Availability ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -c " + SELECT proname, pronargs + FROM pg_proc + WHERE proname LIKE 'nanoid%' + ORDER BY proname; + " + + - name: Test large batch performance + run: | + echo "=== Testing Large Batch Performance ===" + PGPASSWORD=postgres psql -h localhost -U postgres -d nanoid_test -c " + SELECT 'Starting performance test for nanoid()' as status; + SELECT COUNT(*) as nanoid_count FROM (SELECT nanoid('perf_') FROM generate_series(1, 10000)) t; + SELECT 'Starting performance test for nanoid_sortable()' as status; + SELECT COUNT(*) as nanoid_sortable_count FROM (SELECT nanoid_sortable('perf_') FROM generate_series(1, 10000)) t; + SELECT 'Performance tests completed successfully' as status; + " \ No newline at end of file diff --git a/README.md b/README.md index de0f2f5..f374f44 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # PostgreSQL Nanoid -**84,000+ IDs/second** • **Time-ordered** • **URL-safe** • **Collision-resistant** +**84,000+ IDs/second** • **Secure by default** • **URL-safe** • **Collision-resistant** ❌ Stop using auto-increment IDs that leak your business data. -❌ Stop using UUIDs that are ugly, long, and unsortable. -✅ Use nanoids: secure, compact, sortable, and beautiful. +❌ Stop using UUIDs that are ugly, long, and random. +✅ Use nanoids: secure, compact, and beautiful. ## ⚡ Try It Now (30 seconds) @@ -12,37 +12,46 @@ -- 1. Enable extension CREATE EXTENSION IF NOT EXISTS pgcrypto; --- 2. Install function (copy from bottom of README) +-- 2. Install functions (copy from bottom of README) --- 3. Generate beautiful IDs -SELECT nanoid('cus_'); -- cus_0uQzNrIBqK9ayvN1T -SELECT nanoid('ord_'); -- ord_0uQzNrIEg13LGTj4c -SELECT nanoid('user_'); -- user_0uQzNrIEutvmf1aS +-- 3. Generate secure, random IDs (recommended default) +SELECT nanoid('cus_'); -- cus_V1StGXR8_Z5jdHi6B +SELECT nanoid('ord_'); -- ord_K3JwF9HgNxP2mQrTy +SELECT nanoid('user_'); -- user_9LrfQXpAwB3mHkSt + +-- 4. Generate sortable IDs (only if you need time ordering) +SELECT nanoid_sortable('log_'); -- log_0uQzNrIBqK9ayvN1T (⚠️ leaks timing) ``` ## 🎯 Why Nanoids? -| Problem | Auto-increment | UUID | **Nanoid** | -| ----------------------- | ----------------- | ------------- | ---------------- | -| **Leaks business data** | ❌ Reveals count | ✅ Secure | ✅ Secure | -| **Length** | ❌ Predictable | ❌ 36 chars | ✅ 21 chars | -| **Sortable by time** | ⚠️ Single DB only | ❌ Random | ✅ Lexicographic | -| **URL-friendly** | ✅ Yes | ❌ Has dashes | ✅ Clean | -| **Performance** | ✅ Fast | ⚠️ Slower | ✅ Fast | +| Problem | Auto-increment | UUID | **Nanoid** | **Nanoid Sortable** | +| ----------------------- | ----------------- | ------------- | ---------------- | -------------------- | +| **Leaks business data** | ❌ Reveals count | ✅ Secure | ✅ Secure | ⚠️ Leaks timing | +| **Length** | ❌ Predictable | ❌ 36 chars | ✅ 21 chars | ✅ 21 chars | +| **Sortable by time** | ⚠️ Single DB only | ❌ Random | ❌ Random | ✅ Lexicographic | +| **URL-friendly** | ✅ Yes | ❌ Has dashes | ✅ Clean | ✅ Clean | +| **Performance** | ✅ Fast | ⚠️ Slower | ✅ Fast | ✅ Fast | + +**Security recommendation:** Use `nanoid()` by default. Only use `nanoid_sortable()` when time-ordering is essential and you understand the privacy trade-offs. ## 🚀 Performance ```sql --- Batch generation (production use) -SELECT nanoid('ord_') FROM generate_series(1, 10000); -- 123ms = 81,200 IDs/sec -SELECT nanoid('user_') FROM generate_series(1, 100000); -- 1.18s = 84,700 IDs/sec +-- Secure random nanoids (recommended) +SELECT nanoid('ord_') FROM generate_series(1, 10000); -- ~85ms = 117,000 IDs/sec +SELECT nanoid('user_') FROM generate_series(1, 100000); -- ~0.9s = 111,000 IDs/sec + +-- Sortable nanoids (use only when needed) +SELECT nanoid_sortable('log_') FROM generate_series(1, 10000); -- ~123ms = 81,200 IDs/sec +SELECT nanoid_sortable('event_') FROM generate_series(1, 100000); -- ~1.18s = 84,700 IDs/sec ``` **Production ready:** -- ⚡ **84,000+ IDs/second** in batch operations -- 🏃 **80,000+ inserts/second** with defaults -- ⏰ **Time-ordered** - automatic chronological sorting +- ⚡ **110,000+ IDs/second** - random nanoids (fastest, most secure) +- 🏃 **84,000+ IDs/second** - sortable nanoids (when time-ordering needed) +- 🔒 **Security-first** - random by default, sortable by choice - 💾 **Memory efficient** - streaming generation ## 🎨 Beautiful, Meaningful IDs @@ -52,36 +61,50 @@ SELECT nanoid('user_') FROM generate_series(1, 100000); -- 1.18s = 84,700 IDs/se f47ac10b-58cc-4372-a567-0e02b2c3d479 -- 😵 36 chars, random order 2514e1ae-3ab3-431e-aa45-225d70d89f61 -- 🤷 Which was created first? --- Your new nanoids -cus_0uQzNrIBqK9ayvN1T -- 😍 21 chars, clean prefix -ord_0uQzNrIEg13LGTj4c -- ⏰ Clearly created after customer +-- Secure random nanoids (recommended) +cus_V1StGXR8_Z5jdHi6B -- 😍 21 chars, secure & random +ord_K3JwF9HgNxP2mQrTy -- 🔒 No timing information leaked + +-- Sortable nanoids (use carefully) +log_0uQzNrIBqK9ayvN1T -- ⏰ 21 chars, time-ordered +evt_0uQzNrIEg13LGTj4c -- ⚠️ But reveals creation timing ``` -**Automatic time-ordering:** +**When you need time-ordering:** ```sql --- Generate over time - naturally sorted! -WITH orders AS ( - SELECT nanoid('ord_') as id, pg_sleep(0.001) +-- Generate sortable IDs over time - naturally sorted! +WITH events AS ( + SELECT nanoid_sortable('evt_') as id, pg_sleep(0.001) FROM generate_series(1, 5) ) -SELECT id FROM orders ORDER BY id; -- Already chronological! 🎉 +SELECT id FROM events ORDER BY id; -- Chronological! (but less secure) ``` +**Security consideration:** Sortable IDs make business activity patterns observable to anyone with access to multiple IDs. + ## 🛠️ Production Setup ```sql --- Table with nanoid defaults +-- Secure table with random nanoid defaults (recommended) CREATE TABLE customers ( id SERIAL PRIMARY KEY, public_id TEXT NOT NULL UNIQUE DEFAULT nanoid('cus_'), name TEXT NOT NULL ); +-- Optional: Time-ordered table (use only when chronological sorting is essential) +CREATE TABLE audit_logs ( + id SERIAL PRIMARY KEY, + event_id TEXT NOT NULL UNIQUE DEFAULT nanoid_sortable('log_'), + message TEXT NOT NULL, + created_at TIMESTAMP DEFAULT NOW() +); + INSERT INTO customers (name) VALUES ('Acme Corp'), ('Widget Co'); SELECT public_id, name FROM customers ORDER BY public_id; --- cus_0uQzNrIBqK9ayvN1T | Acme Corp ← Created first --- cus_0uQzNrIEg13LGTj4c | Widget Co ← Created second +-- cus_V1StGXR8_Z5jdHi6B | Acme Corp ← Secure, no timing info +-- cus_K3JwF9HgNxP2mQrTy | Widget Co ← Random order preserves privacy ``` ### Production Validation @@ -92,7 +115,7 @@ SELECT public_id, name FROM customers ORDER BY public_id; ## 📖 API -### `nanoid(prefix, size, alphabet, additionalBytesFactor)` +### `nanoid(prefix, size, alphabet, additionalBytesFactor)` - Secure Random (Recommended) ```sql SELECT nanoid(); -- V1StGXR8_Z5jdHi6B-myT @@ -100,65 +123,101 @@ SELECT nanoid('user_'); -- user_V1StGXR8_Z5jdHi6B SELECT nanoid('cus_', 25); -- cus_V1StGXR8_Z5jdHi6B-my ``` -### `nanoid_extract_timestamp(nanoid_value, prefix_length)` +✅ **Secure:** No timing information +✅ **Fast:** ~10% faster than sortable +✅ **Private:** Random order preserves business intelligence + +### `nanoid_sortable(prefix, size, alphabet, additionalBytesFactor)` - Time-Ordered ```sql --- Extract creation time (debugging) -SELECT nanoid_extract_timestamp('cus_0uQzNrIBqK9ayvN1T', 4); --- 2025-07-10 19:13:10.204 +SELECT nanoid_sortable(); -- 0uQzNrIBqK9ayvN1T-abc +SELECT nanoid_sortable('log_'); -- log_0uQzNrIEg13LGTj4c +SELECT nanoid_sortable('evt_', 25); -- evt_0uQzNrIEutvmf1aS-xy +``` + +⚠️ **Security trade-off:** Embeds creation timestamp +✅ **Sortable:** Lexicographic time ordering +⚠️ **Privacy risk:** Reveals business activity patterns + +### `nanoid_extract_timestamp(nanoid_value, prefix_length)` - Sortable Only + +```sql +-- Extract creation time from sortable nanoids (debugging/analysis) +SELECT nanoid_extract_timestamp('log_0uQzNrIBqK9ayvN1T', 4); +-- 2025-07-11 19:13:10.204 ``` ## 🚀 Advanced Usage ```sql --- Multiple entity types -SELECT nanoid('cus_'); -- Customer -SELECT nanoid('ord_'); -- Order -SELECT nanoid('inv_'); -- Invoice +-- Multiple entity types with secure random IDs (recommended) +SELECT nanoid('cus_'); -- Customer ID (random, secure) +SELECT nanoid('ord_'); -- Order ID (random, secure) +SELECT nanoid('inv_'); -- Invoice ID (random, secure) + +-- Time-ordered IDs for logs/events (use sparingly) +SELECT nanoid_sortable('log_'); -- Log entry (sortable, less secure) +SELECT nanoid_sortable('evt_'); -- Event ID (sortable, less secure) -- Database constraints CREATE TABLE orders ( public_id TEXT NOT NULL UNIQUE CHECK (public_id ~ '^ord_[0-9a-zA-Z]{17}$') - DEFAULT nanoid('ord_'), + DEFAULT nanoid('ord_'), -- Secure random customer_id TEXT CHECK (customer_id ~ '^cus_[0-9a-zA-Z]{17}$') ); +-- Mixed approach: secure customer IDs, sortable audit trail +CREATE TABLE user_actions ( + user_id TEXT CHECK (user_id ~ '^usr_[0-9a-zA-Z]{17}$'), -- Random + action_id TEXT DEFAULT nanoid_sortable('act_') -- Sortable +); + -- Batch generation WITH batch_ids AS ( SELECT nanoid('item_') as id, 'Product ' || generate_series as name FROM generate_series(1, 100000) ) INSERT INTO products (public_id, name) SELECT id, name FROM batch_ids; --- ~1.5 seconds for 100k records +-- ~0.9 seconds for 100k secure random IDs ``` ## 🤔 When to Use -### ✅ Perfect for: +### ✅ Use `nanoid()` (secure random) for: - **Public-facing IDs** (APIs, URLs, customer references) -- **Multi-tenant applications** +- **User-facing identifiers** (account IDs, order numbers) +- **Multi-tenant applications** (tenant isolation) - **Distributed systems** (no coordination needed) -- **Time-sensitive data** (natural chronological sorting) +- **Any case where privacy matters** -### ⚠️ Consider alternatives for: +### ⚠️ Use `nanoid_sortable()` only when: + +- **Temporal ordering is essential** (audit logs, event streams) +- **You need lexicographic time sorting** (without separate timestamp) +- **Privacy trade-offs are acceptable** (internal systems only) +- **Users won't see multiple IDs** (preventing pattern analysis) + +### ❌ Consider alternatives for: - **Internal foreign keys** (integers might be faster) - **Legacy system integration** (if systems expect UUIDs) +- **High-security contexts** (consider longer random IDs) ## 🔧 Installation ### Copy-Paste SQL
-Click to expand nanoid function (174 lines) +Click to expand nanoid functions (240+ lines) ```sql CREATE EXTENSION IF NOT EXISTS pgcrypto; -- Drop existing functions to ensure clean state DROP FUNCTION IF EXISTS nanoid CASCADE; +DROP FUNCTION IF EXISTS nanoid_sortable CASCADE; DROP FUNCTION IF EXISTS nanoid_optimized CASCADE; DROP FUNCTION IF EXISTS nanoid_extract_timestamp CASCADE; @@ -193,11 +252,13 @@ BEGIN END $$; --- Main nanoid function with inline timestamp encoding -CREATE OR REPLACE FUNCTION nanoid( - prefix text DEFAULT '', - size int DEFAULT 21, - alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', +-- Sortable nanoid function with timestamp encoding (use only if temporal ordering is required) +-- WARNING: This function embeds timestamps in IDs, which can leak business intelligence +-- and timing information. Use the regular nanoid() function for better security. +CREATE OR REPLACE FUNCTION nanoid_sortable( + prefix text DEFAULT '', + size int DEFAULT 21, + alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', additionalBytesFactor float DEFAULT 1.02 ) RETURNS text @@ -227,13 +288,13 @@ BEGIN IF additionalBytesFactor IS NULL OR additionalBytesFactor < 1 THEN RAISE EXCEPTION 'The additional bytes factor can''t be less than 1!'; END IF; - + -- Get current timestamp and encode using nanoid alphabet (inline for simplicity) timestamp_ms := extract(epoch from clock_timestamp()) * 1000; alphabetArray := regexp_split_to_array(alphabet, ''); alphabetLength := array_length(alphabetArray, 1); temp_ts := timestamp_ms; - + -- Handle zero case IF temp_ts = 0 THEN timestamp_encoded := alphabetArray[1]; @@ -245,39 +306,95 @@ BEGIN temp_ts := temp_ts / alphabetLength; END LOOP; END IF; - + -- Pad to 8 characters for consistent lexicographic sorting WHILE length(timestamp_encoded) < 8 LOOP timestamp_encoded := alphabetArray[1] || timestamp_encoded; END LOOP; - - -- Calculate remaining size for random part + + -- Calculate remaining size for random part random_size := size - length(prefix) - 8; -- 8 = timestamp length - + IF random_size < 1 THEN RAISE EXCEPTION 'The size including prefix and timestamp must leave room for random component! Need at least % characters.', length(prefix) + 9; END IF; - + -- Generate random part using optimized function mask := (2 << cast(floor(log(alphabetLength - 1) / log(2)) AS int)) - 1; step := cast(ceil(additionalBytesFactor * mask * random_size / alphabetLength) AS int); - + IF step > 1024 THEN step := 1024; END IF; - + random_part := nanoid_optimized(random_size, alphabet, mask, step); - + -- Combine: prefix + timestamp + random finalId := prefix || timestamp_encoded || random_part; + + RETURN finalId; +END +$$; +-- Main nanoid function - purely random, secure by default +CREATE OR REPLACE FUNCTION nanoid( + prefix text DEFAULT '', + size int DEFAULT 21, + alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', + additionalBytesFactor float DEFAULT 1.02 +) + RETURNS text + LANGUAGE plpgsql + VOLATILE LEAKPROOF PARALLEL SAFE + AS $$ +DECLARE + random_size int; + random_part text; + finalId text; + alphabetLength int; + mask int; + step int; +BEGIN + -- Input validation + IF size IS NULL OR size < 1 THEN + RAISE EXCEPTION 'The size must be defined and greater than 0!'; + END IF; + IF alphabet IS NULL OR length(alphabet) = 0 OR length(alphabet) > 255 THEN + RAISE EXCEPTION 'The alphabet can''t be undefined, zero or bigger than 255 symbols!'; + END IF; + IF additionalBytesFactor IS NULL OR additionalBytesFactor < 1 THEN + RAISE EXCEPTION 'The additional bytes factor can''t be less than 1!'; + END IF; + + -- Calculate random part size (full size minus prefix) + random_size := size - length(prefix); + + IF random_size < 1 THEN + RAISE EXCEPTION 'The size must be larger than the prefix length! Need at least % characters.', length(prefix) + 1; + END IF; + + alphabetLength := length(alphabet); + + -- Generate purely random part using optimized function + mask := (2 << cast(floor(log(alphabetLength - 1) / log(2)) AS int)) - 1; + step := cast(ceil(additionalBytesFactor * mask * random_size / alphabetLength) AS int); + + IF step > 1024 THEN + step := 1024; + END IF; + + random_part := nanoid_optimized(random_size, alphabet, mask, step); + + -- Combine: prefix + random (no timestamp) + finalId := prefix || random_part; + RETURN finalId; END $$; --- Helper function to extract timestamp from nanoid (useful for debugging/analysis) +-- Helper function to extract timestamp from sortable nanoid (only works with nanoid_sortable) CREATE OR REPLACE FUNCTION nanoid_extract_timestamp( - nanoid_value text, + nanoid_value text, prefix_length int DEFAULT 0, alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ) @@ -297,7 +414,7 @@ BEGIN timestamp_encoded := substring(nanoid_value, prefix_length + 1, 8); alphabetArray := regexp_split_to_array(alphabet, ''); alphabetLength := array_length(alphabetArray, 1); - + -- Decode from base using nanoid alphabet (inline for simplicity) FOR i IN 1..length(timestamp_encoded) LOOP char_pos := array_position(alphabetArray, substring(timestamp_encoded, i, 1)); @@ -306,7 +423,7 @@ BEGIN END IF; timestamp_ms := timestamp_ms * alphabetLength + (char_pos - 1); END LOOP; - + -- Convert to timestamp RETURN to_timestamp(timestamp_ms / 1000.0); EXCEPTION diff --git a/init/01-setup.sql b/init/01-setup.sql index 01c08c8..6791c3c 100644 --- a/init/01-setup.sql +++ b/init/01-setup.sql @@ -1,4 +1,4 @@ --- Initialize the nanoid function +-- Initialize the nanoid functions (dual approach: secure + sortable) -- This runs automatically when the container starts -- Create the pgcrypto extension @@ -6,6 +6,7 @@ CREATE EXTENSION IF NOT EXISTS pgcrypto; -- Drop existing functions to ensure clean state DROP FUNCTION IF EXISTS nanoid CASCADE; +DROP FUNCTION IF EXISTS nanoid_sortable CASCADE; DROP FUNCTION IF EXISTS nanoid_optimized CASCADE; DROP FUNCTION IF EXISTS nanoid_extract_timestamp CASCADE; @@ -40,8 +41,10 @@ BEGIN END $$; --- Main nanoid function with inline timestamp encoding -CREATE OR REPLACE FUNCTION nanoid( +-- Sortable nanoid function with timestamp encoding (use only if temporal ordering is required) +-- WARNING: This function embeds timestamps in IDs, which can leak business intelligence +-- and timing information. Use the regular nanoid() function for better security. +CREATE OR REPLACE FUNCTION nanoid_sortable( prefix text DEFAULT '', size int DEFAULT 21, alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', @@ -122,7 +125,63 @@ BEGIN END $$; --- Helper function to extract timestamp from nanoid (useful for debugging/analysis) +-- Main nanoid function - purely random, secure by default +CREATE OR REPLACE FUNCTION nanoid( + prefix text DEFAULT '', + size int DEFAULT 21, + alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', + additionalBytesFactor float DEFAULT 1.02 +) + RETURNS text + LANGUAGE plpgsql + VOLATILE LEAKPROOF PARALLEL SAFE + AS $$ +DECLARE + random_size int; + random_part text; + finalId text; + alphabetLength int; + mask int; + step int; +BEGIN + -- Input validation + IF size IS NULL OR size < 1 THEN + RAISE EXCEPTION 'The size must be defined and greater than 0!'; + END IF; + IF alphabet IS NULL OR length(alphabet) = 0 OR length(alphabet) > 255 THEN + RAISE EXCEPTION 'The alphabet can''t be undefined, zero or bigger than 255 symbols!'; + END IF; + IF additionalBytesFactor IS NULL OR additionalBytesFactor < 1 THEN + RAISE EXCEPTION 'The additional bytes factor can''t be less than 1!'; + END IF; + + -- Calculate random part size (full size minus prefix) + random_size := size - length(prefix); + + IF random_size < 1 THEN + RAISE EXCEPTION 'The size must be larger than the prefix length! Need at least % characters.', length(prefix) + 1; + END IF; + + alphabetLength := length(alphabet); + + -- Generate purely random part using optimized function + mask := (2 << cast(floor(log(alphabetLength - 1) / log(2)) AS int)) - 1; + step := cast(ceil(additionalBytesFactor * mask * random_size / alphabetLength) AS int); + + IF step > 1024 THEN + step := 1024; + END IF; + + random_part := nanoid_optimized(random_size, alphabet, mask, step); + + -- Combine: prefix + random (no timestamp) + finalId := prefix || random_part; + + RETURN finalId; +END +$$; + +-- Helper function to extract timestamp from sortable nanoid (only works with nanoid_sortable) CREATE OR REPLACE FUNCTION nanoid_extract_timestamp( nanoid_value text, prefix_length int DEFAULT 0, @@ -171,5 +230,6 @@ CREATE TABLE IF NOT EXISTS customers ( ); -- Test that everything works -SELECT 'Database initialized successfully. Testing nanoid function:' as status; -SELECT nanoid('test_') as sample_nanoid; \ No newline at end of file +SELECT 'Database initialized successfully. Testing dual nanoid functions:' as status; +SELECT nanoid('test_') as secure_random_nanoid; +SELECT nanoid_sortable('test_') as sortable_nanoid; \ No newline at end of file diff --git a/nanoid.sql b/nanoid.sql index 296accf..d946e6a 100644 --- a/nanoid.sql +++ b/nanoid.sql @@ -1,15 +1,19 @@ /* - * Postgres Nano ID - Time-ordered, sortable unique identifiers + * Postgres Nano ID - Secure and optionally sortable unique identifiers * - * A PostgreSQL implementation of sortable nanoids that maintain lexicographic - * time ordering while preserving the visual characteristics of traditional nanoids. + * A PostgreSQL implementation of nanoids with dual functions: + * - nanoid(): Purely random, secure IDs (recommended default) + * - nanoid_sortable(): Time-ordered IDs (use only when sorting is essential) * * Features: - * - Lexicographically sortable by creation time - * - URL-safe characters using nanoid alphabet + * - Cryptographically secure random generation + * - URL-safe characters using nanoid alphabet * - Prefix support (e.g., 'cus_', 'ord_') - * - Cryptographically secure random component * - High performance optimized for batch generation + * - Optional lexicographic time ordering (with security trade-offs) + * + * Security Note: nanoid_sortable() embeds timestamps which can leak business + * intelligence and timing information. Use nanoid() for better security. * * Inspired by nanoid-postgres (https://github.com/viascom/nanoid-postgres) * and the broader nanoid ecosystem. @@ -18,6 +22,7 @@ CREATE EXTENSION IF NOT EXISTS pgcrypto; -- Drop existing functions to ensure clean state DROP FUNCTION IF EXISTS nanoid CASCADE; +DROP FUNCTION IF EXISTS nanoid_sortable CASCADE; DROP FUNCTION IF EXISTS nanoid_optimized CASCADE; DROP FUNCTION IF EXISTS nanoid_extract_timestamp CASCADE; @@ -52,8 +57,10 @@ BEGIN END $$; --- Main nanoid function with inline timestamp encoding -CREATE OR REPLACE FUNCTION nanoid( +-- Sortable nanoid function with timestamp encoding (use only if temporal ordering is required) +-- WARNING: This function embeds timestamps in IDs, which can leak business intelligence +-- and timing information. Use the regular nanoid() function for better security. +CREATE OR REPLACE FUNCTION nanoid_sortable( prefix text DEFAULT '', size int DEFAULT 21, alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', @@ -134,7 +141,63 @@ BEGIN END $$; --- Helper function to extract timestamp from nanoid (useful for debugging/analysis) +-- Main nanoid function - purely random, secure by default +CREATE OR REPLACE FUNCTION nanoid( + prefix text DEFAULT '', + size int DEFAULT 21, + alphabet text DEFAULT '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', + additionalBytesFactor float DEFAULT 1.02 +) + RETURNS text + LANGUAGE plpgsql + VOLATILE LEAKPROOF PARALLEL SAFE + AS $$ +DECLARE + random_size int; + random_part text; + finalId text; + alphabetLength int; + mask int; + step int; +BEGIN + -- Input validation + IF size IS NULL OR size < 1 THEN + RAISE EXCEPTION 'The size must be defined and greater than 0!'; + END IF; + IF alphabet IS NULL OR length(alphabet) = 0 OR length(alphabet) > 255 THEN + RAISE EXCEPTION 'The alphabet can''t be undefined, zero or bigger than 255 symbols!'; + END IF; + IF additionalBytesFactor IS NULL OR additionalBytesFactor < 1 THEN + RAISE EXCEPTION 'The additional bytes factor can''t be less than 1!'; + END IF; + + -- Calculate random part size (full size minus prefix) + random_size := size - length(prefix); + + IF random_size < 1 THEN + RAISE EXCEPTION 'The size must be larger than the prefix length! Need at least % characters.', length(prefix) + 1; + END IF; + + alphabetLength := length(alphabet); + + -- Generate purely random part using optimized function + mask := (2 << cast(floor(log(alphabetLength - 1) / log(2)) AS int)) - 1; + step := cast(ceil(additionalBytesFactor * mask * random_size / alphabetLength) AS int); + + IF step > 1024 THEN + step := 1024; + END IF; + + random_part := nanoid_optimized(random_size, alphabet, mask, step); + + -- Combine: prefix + random (no timestamp) + finalId := prefix || random_part; + + RETURN finalId; +END +$$; + +-- Helper function to extract timestamp from sortable nanoid (only works with nanoid_sortable) CREATE OR REPLACE FUNCTION nanoid_extract_timestamp( nanoid_value text, prefix_length int DEFAULT 0, diff --git a/tests/basic_test.sql b/tests/basic_test.sql index 4cfa0b8..eab3c75 100644 --- a/tests/basic_test.sql +++ b/tests/basic_test.sql @@ -19,58 +19,45 @@ SELECT nanoid('cus_') as prefixed_nanoid; SELECT nanoid('test_', 25) as sized_nanoid; \echo '' --- Test 4: Uniqueness and time-ordering test -\echo 'Test 4: Uniqueness and time-ordering test (generating IDs with delays)' -CREATE TEMP TABLE sortability_test ( +-- Test 4: Uniqueness test for random nanoids +\echo 'Test 4: Uniqueness test (random nanoids should be unique)' +CREATE TEMP TABLE uniqueness_test ( seq INT, nanoid_value TEXT, created_at TIMESTAMP DEFAULT NOW() ); --- Insert with small delays to verify time ordering -INSERT INTO sortability_test (seq, nanoid_value) VALUES (1, nanoid('test_')); -SELECT pg_sleep(0.002); -INSERT INTO sortability_test (seq, nanoid_value) VALUES (2, nanoid('test_')); -SELECT pg_sleep(0.002); -INSERT INTO sortability_test (seq, nanoid_value) VALUES (3, nanoid('test_')); -SELECT pg_sleep(0.002); -INSERT INTO sortability_test (seq, nanoid_value) VALUES (4, nanoid('test_')); -SELECT pg_sleep(0.002); -INSERT INTO sortability_test (seq, nanoid_value) VALUES (5, nanoid('test_')); +-- Insert random nanoids (no time ordering expected) +INSERT INTO uniqueness_test (seq, nanoid_value) VALUES (1, nanoid('test_')); +INSERT INTO uniqueness_test (seq, nanoid_value) VALUES (2, nanoid('test_')); +INSERT INTO uniqueness_test (seq, nanoid_value) VALUES (3, nanoid('test_')); +INSERT INTO uniqueness_test (seq, nanoid_value) VALUES (4, nanoid('test_')); +INSERT INTO uniqueness_test (seq, nanoid_value) VALUES (5, nanoid('test_')); --- Show that nanoids are time-ordered when sorted lexicographically +-- Show generated nanoids (order should be random) SELECT seq, nanoid_value, created_at -FROM sortability_test -ORDER BY nanoid_value; -- Lexicographic order should match time order +FROM uniqueness_test +ORDER BY seq; -- Order by sequence, not nanoid --- Verify sortability -WITH sorted_data AS ( - SELECT - seq, - LAG(seq) OVER (ORDER BY nanoid_value) as prev_seq - FROM sortability_test -), -sortability_check AS ( +-- Verify uniqueness (all should be unique) +WITH uniqueness_check AS ( SELECT COUNT(*) as total_records, - COUNT(CASE - WHEN prev_seq IS NULL OR prev_seq < seq - THEN 1 - END) as correctly_sorted - FROM sorted_data + COUNT(DISTINCT nanoid_value) as unique_records + FROM uniqueness_test ) SELECT total_records, - correctly_sorted, + unique_records, CASE - WHEN total_records = correctly_sorted - THEN 'PASS - Nanoids are time-ordered!' - ELSE 'FAIL - Time ordering broken' - END as time_ordering_test -FROM sortability_check; + WHEN total_records = unique_records + THEN 'PASS - All nanoids are unique!' + ELSE 'FAIL - Duplicate nanoids found' + END as uniqueness_test +FROM uniqueness_check; \echo '' -- Test 5: Insert into customers table @@ -79,34 +66,33 @@ INSERT INTO customers (name) VALUES ('Test Customer 1'), ('Test Customer 2'); SELECT public_id, name FROM customers ORDER BY public_id DESC LIMIT 2; \echo '' --- Test 6: Timestamp extraction -\echo 'Test 6: Timestamp extraction from nanoids' +-- Test 6: Timestamp extraction (only works with sortable nanoids) +\echo 'Test 6: Timestamp extraction (demo with sortable nanoid)' WITH timestamp_test AS ( SELECT - nanoid_value, - created_at, - nanoid_extract_timestamp(nanoid_value, 5) as extracted_timestamp -- 5 = length of 'test_' - FROM sortability_test - LIMIT 1 + nanoid_sortable('demo_') as sortable_nanoid, + NOW() as current_time ) SELECT - nanoid_value, - created_at, - extracted_timestamp, + sortable_nanoid, + current_time, + nanoid_extract_timestamp(sortable_nanoid, 5) as extracted_timestamp, -- 5 = length of 'demo_' CASE - WHEN abs(extract(epoch from created_at) - extract(epoch from extracted_timestamp)) < 1 + WHEN abs(extract(epoch from current_time) - extract(epoch from nanoid_extract_timestamp(sortable_nanoid, 5))) < 1 THEN 'PASS - Timestamp extraction accurate!' ELSE 'FAIL - Timestamp mismatch' END as timestamp_test FROM timestamp_test; + +\echo 'Note: Regular nanoid() IDs do not contain timestamps and cannot be extracted.' \echo '' -- Test 7: Error handling \echo 'Test 7: Error handling (should show error)' -\echo 'Testing size too small with prefix and timestamp...' +\echo 'Testing size too small with prefix...' DO $$ BEGIN - PERFORM nanoid('very_long_prefix_', 5); + PERFORM nanoid('very_long_prefix_', 5); -- Size 5 with long prefix should fail RAISE NOTICE 'ERROR: Should have failed!'; EXCEPTION WHEN OTHERS THEN diff --git a/tests/dual_function_test.sql b/tests/dual_function_test.sql new file mode 100644 index 0000000..0b646ed --- /dev/null +++ b/tests/dual_function_test.sql @@ -0,0 +1,259 @@ +-- Comprehensive tests for both nanoid() and nanoid_sortable() functions +-- This test suite verifies the security-first dual function approach +-- Run with: \i /tests/dual_function_test.sql + +\echo '=== Dual Function Comprehensive Tests ===' +\echo 'Testing both nanoid() (secure random) and nanoid_sortable() (time-ordered)' +\echo '' + +-- Test 1: Basic function availability and signatures +\echo 'Test 1: Function availability and basic generation' +SELECT + 'Regular nanoid:' as function_type, + nanoid() as generated_id, + length(nanoid()) as id_length; + +SELECT + 'Sortable nanoid:' as function_type, + nanoid_sortable() as generated_id, + length(nanoid_sortable()) as id_length; + +SELECT + 'Regular with prefix:' as function_type, + nanoid('test_') as generated_id, + length(nanoid('test_')) as id_length; + +SELECT + 'Sortable with prefix:' as function_type, + nanoid_sortable('test_') as generated_id, + length(nanoid_sortable('test_')) as id_length; +\echo '' + +-- Test 2: Randomness vs Sortability Verification +\echo 'Test 2: Randomness vs Sortability Verification' +CREATE TEMP TABLE comparison_test ( + seq INT, + random_nanoid TEXT, + sortable_nanoid TEXT, + created_at TIMESTAMP DEFAULT NOW() +); + +-- Generate pairs with time delays +INSERT INTO comparison_test (seq, random_nanoid, sortable_nanoid) VALUES (1, nanoid('rnd_'), nanoid_sortable('srt_')); +SELECT pg_sleep(0.002); +INSERT INTO comparison_test (seq, random_nanoid, sortable_nanoid) VALUES (2, nanoid('rnd_'), nanoid_sortable('srt_')); +SELECT pg_sleep(0.002); +INSERT INTO comparison_test (seq, random_nanoid, sortable_nanoid) VALUES (3, nanoid('rnd_'), nanoid_sortable('srt_')); +SELECT pg_sleep(0.002); +INSERT INTO comparison_test (seq, random_nanoid, sortable_nanoid) VALUES (4, nanoid('rnd_'), nanoid_sortable('srt_')); +SELECT pg_sleep(0.002); +INSERT INTO comparison_test (seq, random_nanoid, sortable_nanoid) VALUES (5, nanoid('rnd_'), nanoid_sortable('srt_')); + +-- Show the results +SELECT + seq, + random_nanoid, + sortable_nanoid, + created_at +FROM comparison_test +ORDER BY seq; + +-- Verify sortable nanoids are time-ordered +WITH sortable_ordered AS ( + SELECT + sortable_nanoid, + LAG(sortable_nanoid) OVER (ORDER BY created_at) < sortable_nanoid as is_ordered + FROM comparison_test +), +sortable_check AS ( + SELECT + COUNT(*) as total_records, + SUM(CASE WHEN is_ordered IS NULL OR is_ordered THEN 1 ELSE 0 END) as correctly_sorted + FROM sortable_ordered +) +SELECT + 'Sortable nanoids:' as test_type, + total_records, + correctly_sorted, + CASE WHEN total_records = correctly_sorted THEN 'PASS - Time ordered' ELSE 'FAIL - Not time ordered' END as result +FROM sortable_check; + +-- Verify random nanoids are NOT consistently time-ordered +WITH random_ordered AS ( + SELECT + random_nanoid, + LAG(random_nanoid) OVER (ORDER BY created_at) < random_nanoid as is_ordered + FROM comparison_test +), +random_check AS ( + SELECT + COUNT(*) as total_records, + SUM(CASE WHEN is_ordered IS NULL OR is_ordered THEN 1 ELSE 0 END) as correctly_sorted + FROM random_ordered +) +SELECT + 'Random nanoids:' as test_type, + total_records, + correctly_sorted, + CASE WHEN correctly_sorted < total_records THEN 'PASS - Random order' ELSE 'INCONCLUSIVE - May be coincidentally ordered' END as result +FROM random_check; +\echo '' + +-- Test 3: Timestamp Extraction (sortable only) +\echo 'Test 3: Timestamp extraction capabilities' +WITH extraction_test AS ( + SELECT + sortable_nanoid, + created_at, + nanoid_extract_timestamp(sortable_nanoid, 4) as extracted_timestamp -- 4 = length of 'srt_' + FROM comparison_test + LIMIT 1 +) +SELECT + sortable_nanoid, + created_at, + extracted_timestamp, + CASE + WHEN abs(extract(epoch from created_at) - extract(epoch from extracted_timestamp)) < 1 + THEN 'PASS - Timestamp extraction works' + ELSE 'FAIL - Timestamp mismatch' + END as extraction_test +FROM extraction_test; + +-- Test that regular nanoids cannot have timestamps extracted meaningfully +\echo 'Note: Regular nanoids do not contain extractable timestamps.' +\echo '' + +-- Test 4: Performance Comparison +\echo 'Test 4: Performance comparison' +\timing on + +-- Regular nanoid performance +WITH regular_batch AS ( + SELECT nanoid('perf_') FROM generate_series(1, 1000) +) +SELECT 'Regular nanoids:' as type, COUNT(*) as ids_generated FROM regular_batch; + +-- Sortable nanoid performance +WITH sortable_batch AS ( + SELECT nanoid_sortable('perf_') FROM generate_series(1, 1000) +) +SELECT 'Sortable nanoids:' as type, COUNT(*) as ids_generated FROM sortable_batch; + +\timing off +\echo '' + +-- Test 5: Security Analysis - Business Intelligence Leakage +\echo 'Test 5: Security analysis - business intelligence leakage demonstration' +CREATE TEMP TABLE business_simulation ( + day_num INT, + random_customer_id TEXT, + sortable_order_id TEXT, + created_at TIMESTAMP +); + +-- Simulate customer and order creation over several "days" +INSERT INTO business_simulation VALUES +(1, nanoid('cus_'), nanoid_sortable('ord_'), '2025-01-01 09:00:00'), +(1, nanoid('cus_'), nanoid_sortable('ord_'), '2025-01-01 14:00:00'), +(2, nanoid('cus_'), nanoid_sortable('ord_'), '2025-01-02 10:00:00'), +(2, nanoid('cus_'), nanoid_sortable('ord_'), '2025-01-02 11:00:00'), +(2, nanoid('cus_'), nanoid_sortable('ord_'), '2025-01-02 16:00:00'), +(3, nanoid('cus_'), nanoid_sortable('ord_'), '2025-01-03 08:00:00'); + +SELECT + day_num, + random_customer_id, + sortable_order_id, + created_at +FROM business_simulation +ORDER BY day_num; + +\echo 'Analysis: Random customer IDs provide no timing info.' +\echo 'Sortable order IDs reveal business patterns (peak times, growth trends).' +\echo '' + +-- Test 6: Error Handling for Both Functions +\echo 'Test 6: Error handling for both functions' + +-- Test regular nanoid error handling +DO $$ +BEGIN + PERFORM nanoid('very_long_prefix_', 5); -- Should fail: size too small + RAISE NOTICE 'ERROR: Regular nanoid should have failed!'; +EXCEPTION + WHEN OTHERS THEN + RAISE NOTICE 'PASS: Regular nanoid error handling: %', SQLERRM; +END +$$; + +-- Test sortable nanoid error handling +DO $$ +BEGIN + PERFORM nanoid_sortable('very_long_prefix_', 10); -- Should fail: no room for timestamp + random + RAISE NOTICE 'ERROR: Sortable nanoid should have failed!'; +EXCEPTION + WHEN OTHERS THEN + RAISE NOTICE 'PASS: Sortable nanoid error handling: %', SQLERRM; +END +$$; +\echo '' + +-- Test 7: Large Scale Uniqueness +\echo 'Test 7: Large scale uniqueness verification' +CREATE TEMP TABLE uniqueness_test AS +SELECT + nanoid('uniq_') as random_id, + nanoid_sortable('uniq_') as sortable_id +FROM generate_series(1, 10000); + +WITH uniqueness_stats AS ( + SELECT + COUNT(*) as total_records, + COUNT(DISTINCT random_id) as unique_random, + COUNT(DISTINCT sortable_id) as unique_sortable + FROM uniqueness_test +) +SELECT + total_records, + unique_random, + unique_sortable, + CASE + WHEN total_records = unique_random AND total_records = unique_sortable + THEN 'PASS - All IDs unique' + ELSE 'FAIL - Duplicates found' + END as uniqueness_result +FROM uniqueness_stats; +\echo '' + +-- Test 8: Prefix and Size Variations +\echo 'Test 8: Prefix and size variations' +SELECT + 'No prefix, default size:' as test_case, + nanoid() as random_id, + nanoid_sortable() as sortable_id +UNION ALL +SELECT + 'Short prefix, default size:', + nanoid('a_'), + nanoid_sortable('a_') +UNION ALL +SELECT + 'Long prefix, large size:', + nanoid('customer_account_', 35), + nanoid_sortable('customer_account_', 35) +UNION ALL +SELECT + 'Custom alphabet test:', + nanoid('hex_', 16, '0123456789abcdef'), + nanoid_sortable('hex_', 24, '0123456789abcdef'); +\echo '' + +\echo '=== Dual Function Tests Complete ===' +\echo '' +\echo 'Summary:' +\echo '- nanoid(): Secure, random, fast, no timing information' +\echo '- nanoid_sortable(): Time-ordered, reveals timing, use carefully' +\echo '' +\echo 'Recommendation: Use nanoid() by default for security.' +\echo 'Only use nanoid_sortable() when temporal ordering is essential.' \ No newline at end of file diff --git a/tests/sortable_test.sql b/tests/sortable_test.sql index 469d20c..204d4ea 100644 --- a/tests/sortable_test.sql +++ b/tests/sortable_test.sql @@ -1,7 +1,10 @@ -- Test sortable nanoid functionality +-- WARNING: nanoid_sortable() embeds timestamps which can leak business intelligence. +-- Use only when time-ordering is essential and privacy trade-offs are acceptable. -- Run with: \i /tests/sortable_test.sql \echo '=== Sortable Nanoid Tests ===' +\echo 'WARNING: These tests use nanoid_sortable() which embeds timing information.' \echo '' -- Test 1: Basic sortable nanoid generation @@ -42,14 +45,17 @@ FROM sortable_test ORDER BY created_at; -- Summary of sortability test -WITH sortability_check AS ( +WITH ordered_check AS ( SELECT - COUNT(*) as total_records, - COUNT(CASE - WHEN LAG(nanoid_value) OVER (ORDER BY created_at) IS NULL THEN 1 - WHEN LAG(nanoid_value) OVER (ORDER BY created_at) < nanoid_value THEN 1 - END) as correctly_sorted + nanoid_value, + LAG(nanoid_value) OVER (ORDER BY created_at) < nanoid_value as is_ordered FROM sortable_test +), +sortability_check AS ( + SELECT + COUNT(*) as total_records, + SUM(CASE WHEN is_ordered IS NULL OR is_ordered THEN 1 ELSE 0 END) as correctly_sorted + FROM ordered_check ) SELECT total_records, @@ -145,6 +151,9 @@ SELECT \echo '=== Sortable Nanoid Tests Complete ===' \echo 'Key features:' \echo '- Lexicographically sortable by creation time' -\echo '- 12-character hex timestamp prefix (good for ~2000 years)' +\echo '- 8-character encoded timestamp prefix (millisecond precision)' \echo '- Compatible with existing nanoid alphabet and size parameters' -\echo '- Timestamp extractable for debugging/analysis' \ No newline at end of file +\echo '- Timestamp extractable for debugging/analysis' +\echo '' +\echo 'SECURITY WARNING: Use regular nanoid() for better privacy.' +\echo 'Only use nanoid_sortable() when time-ordering is essential.' \ No newline at end of file