diff --git a/README.md b/README.md index 1f0533a..4fdfc8c 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,51 @@ -CouchDB Dump/Restore Tools -========================== +###PHP-based CouchDB Dump/Restore Utility -PHP scripts to dump and restore CouchDB databases. +This tool was originally authored by [Anton Bondar](https://github.com/zebooka). Additional work to support inline base64 attachements was sponsored by [CloudPBX Inc.](http://cloudpbx.ca) and authored by [Miralem Mehic](https://github.com/mickeyze). +The original dump tool authored by zebooka included supported incremental backups. This made it much more feasible to add support to dump inline base64 attachements. -Reason, why I written them --------------------------- +Although CouchDB's `/_all_docs` function is more popular with DB backups that don't include attachements, this function doesn't support attachments. To download attachments, individual documents must be accessed. -Because I found no usable and available tools to dump Couch databases, including history revisions, if needed. +Rerence on CouchDB's `/_all_docs` function [here ](http://docs.couchdb.org/en/latest/api/database/bulk-api.html) +###Usage for BACKUP with `couchdb-dump.php` -Why on PHP? ------------ +#####Basic Example: -Why not? This is just scripting language. I know it. If you would like, you can write your one scripts, for example on Python, with all bells and whistles, with blackjack and hookers. +`couchdb-dump.php -H localhost -p 5984 -d test > dump.json` +#####Attachment Example: -Usage ------ +`couchdb-dump.php -X -a -H localhost -p 5984 -d test > dump.json` + +OPTIONS: + +* `-h` Display this help message. +* `-e` Turn php error reporting ON. +* `-H ` Hostname or IP of CouchDB server (default: 'localhost'). +* `-p ` Port of CouchDB server (default: 5984). +* `-d ` Database to dump. +* `-a` Fetch attachments inline (capture them in base64 encoded format). +* `-X` No revisions history in dump. +* `-A` Fetch attachments binary (Download them to current folder). +* `-y ` Include this PHP script that returns callback/function to check if document/revision needs to be dumped. + +###Usage for RESTORE with `couchdb-restore.php` + +#####Basic Example: + +`couchdb-restore.php -H localhost -p 5984 -d test -f dump.json` + +OPTIONS: + +* `-h` Display this help message. +* `-e` Turn php error reporting ON. +* `-H ` Hostname or IP of CouchDB server (default: 'localhost'). +* `-p ` Port of CouchDB server (default: 5984). +* `-d ` Database to restore. +* `-f ` JSON file to restore. +* `-D` Drop and create database, if needed +(default: create db, only if it does not exist). +* `-F` Force restore on existing DB with documents. +* `-a` Restore inline attachments (from base64 encoded format). -Just run each script with -h flag and read help. diff --git a/bin/couchdb-dump.php b/bin/couchdb-dump.php index eac1ad2..1583541 100755 --- a/bin/couchdb-dump.php +++ b/bin/couchdb-dump.php @@ -1,8 +1,10 @@ #!/usr/bin/env php http://zebooka.com/soft/LICENSE/" . PHP_EOL . PHP_EOL); +fwrite(STDERR, "(c) Copyright 2014, Updated by Miralem Mehic . Sponsored by CloudPBX Inc. " . PHP_EOL . PHP_EOL); $help = << Hostname or IP of CouchDB server (default: 'localhost'). -p Port of CouchDB server (default: 5984). -d Database to dump. + -g Download all databases from the server. + -z Compress output group directory in .tar.gz archive + -a Fetch attachments inline (capture them in base64 encoded format). -X No revisions history in dump. + -A Fetch attachments binary (Download them to current folder). + -s Outputs each document to separate file inside database directory in current folder (title of directory is the same as the title of database) + -t Used with -s to add timestamp mark to the folder + -m Allowing multiprocessing (works only on UNIX/LINUX platform) + -P Pretty JSON output -y Include this PHP script that returns callback/function to check if document/revision needs to be dumped. USAGE: {$_SERVER['argv'][0]} -H localhost -p 5984 -d test > dump.json HELP; -$params = parseParameters($_SERVER['argv'], array('H', 'p', 'd', 'y')); + + +class Dumper{ + + private $host; + private $port; + private $database; + private $noHistory; + private $callbackFile; + private $inlineAttachment; + private $binaryAttachments; + private $prettyJsonOutput; + private $separateFiles; + private $callbackFilter; + private $fp; + private $backupFolder; + + function Dumper( + $host, + $port, + $database, + $noHistory, + $callbackFile, + $inlineAttachment, + $binaryAttachments, + $prettyJsonOutput, + $separateFiles, + $timestamp, + $callbackFilter, + $backupFolder = "" + ){ + + if (!isset($database) || '' === $database) { + fwrite(STDERR, "ERROR: Please specify database name (-d )." . PHP_EOL); + exit(1); + } + + $this->host = $host; + $this->port = $port; + $this->database = urlencode($database); + $this->noHistory = $noHistory; + $this->callbackFile = $callbackFile; + $this->inlineAttachment = $inlineAttachment; + $this->binaryAttachments = $binaryAttachments; + $this->prettyJsonOutput = $prettyJsonOutput; + $this->separateFiles = $separateFiles; + $this->callbackFilter = $callbackFilter; + $this->backupFolder = $backupFolder; + + $this->databaseName = $backupFolder . "/" . urlencode(($timestamp) ? $database . '-' . date('Y-m-d_H-i-s') . '_UTC' : $database); + $this->databaseName = $this->databaseName; + + $fileName = $backupFolder . "/" . $this->database . '.json'; + + if(!$this->separateFiles) + $this->fp = fopen($fileName,"w"); + } + + public function download(){ + + // get all docs IDs + $url = "http://{$this->host}:{$this->port}/" . $this->database . "/_all_docs"; + fwrite(STDERR, "Fetching all documents info from db '{$this->database}' at {$this->host}:{$this->port} ..." . PHP_EOL); + $curl = getCommonCurl($url); + $result = trim(curl_exec($curl)); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + if (200 == $statusCode) { + $all_docs = json_decode($result, true); + } else { + // unknown status + fwrite(STDERR, "ERROR: Unsupported response when fetching all documents info from db '{$this->database}' (http status code = {$this->statusCode}) " . PHP_EOL); + return; //exit(2); + } + + + if (!isset($all_docs['rows']) || !count($all_docs['rows']) || !is_array($all_docs['rows'])) { + + //if we want to save each document in separate file + if($this->separateFiles){ + if (!file_exists('./' . $this->databaseName)) + mkdir('./' . $this->databaseName , 0777, true); + + if(!count($all_docs['rows'])) { + $dummy = fopen( './' . $this->databaseName . '/' . 'dummy', "a+"); + fwrite($dummy, "1", 1); + fclose($dummy); + } + } + fwrite(STDERR, "ERROR: No documents found in db '{$this->database}'." . PHP_EOL); + } + + + if(!$this->separateFiles){ + // first part of dump + if (!$this->noHistory) { + fwrite($this->fp, '{"new_edits":false,"docs":[' . PHP_EOL); + } else { + fwrite($this->fp, '{"docs":[' . PHP_EOL); + } + } + + + $first = true; + $count = count($all_docs['rows']); + fwrite(STDERR, "Found {$count} documents..." . PHP_EOL); + + $i = 1; + foreach ($all_docs['rows'] as $doc) { + + // foreach DOC get all revs + if (!$this->noHistory) { + $url = "http://{$this->host}:{$this->port}/{$this->database}/" . urlencode($doc['id']) . "?revs=true&revs_info=true" . (($this->inlineAttachment) ? "&attachments=true" : ""); + } else { + $url = "http://{$this->host}:{$this->port}/{$this->database}/" . urlencode($doc['id']) . (($this->inlineAttachment || $this->binaryAttachments) ? "?attachments=true" : ""); + } + + //fwrite(STDERR, "[{$doc['id']}]"); + $percentage = round( ($i++/sizeof($all_docs['rows']))*100 , 2) ; + fwrite(STDERR, "Processing database \"$this->database\": $percentage%\n"); + + $curl = getCommonCurl($url); + + curl_setopt($curl, CURLOPT_HTTPHEADER, array( + 'Content-type: application/json', + 'Accept: *\/*' + )); + + $result = $wholeDocument = curl_exec($curl); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + + if (200 == $statusCode) { + + $doc_revs = json_decode($result); + $doc_revs = (array)$doc_revs; + + } else { + // unknown status + fwrite(STDERR, "ERROR: Unsupported response when fetching document [{$doc['id']}] from db '{$this->database}' (http status code = {$statusCode}) " . PHP_EOL); + return; //exit(2); + } + + //REVISIONS + if (isset($doc_revs['_revs_info']) && count($doc_revs['_revs_info']) > 1) { + + $revs_info = toArray($doc_revs["_revs_info"]); + $revs_info = clearEmptyKey($revs_info); + + fwrite(STDERR, "" . PHP_EOL); + // we have more than one revision + $revs_info = array_reverse( $revs_info ); + $lastRev = end($revs_info); + $lastRev = $lastRev['rev']; + reset($revs_info); + + foreach ($revs_info as $rev) { + + // foreach rev fetch DB/ID?rev=REV&revs=true + //fwrite(STDERR, "[{$doc['id']}] @ {$rev['rev']}"); + if ('available' === $rev['status']) { + $url = "http://{$this->host}:{$this->port}/{$this->database}/" . urlencode($doc['id']) . "?revs=true&rev=" . urlencode($rev['rev']); + $curl = getCommonCurl($url); + $result = curl_exec($curl); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + if (200 == $statusCode) { + $full_doc = trim($result); + } else { + // unknown status + fwrite(STDERR, "ERROR: Unsupported response when fetching document [{$doc['id']}] revision [{$rev['rev']}] from db '{$this->database}' (http status code = {$statusCode}) " . PHP_EOL); + return; //exit(2); + } + if (is_callable($this->callbackFilter) && !call_user_func($this->callbackFilter, json_decode($full_doc, true), $lastRev)) { + fwrite(STDERR, " = skipped" . PHP_EOL); + continue; // skip that doc version because callback returned false + } else { + //fwrite(STDERR, "" . PHP_EOL); + } + } elseif ('missing' === $rev['status']) { + //fwrite(STDERR, " = missing" . PHP_EOL); + continue; // missing docs are not available anyhow + } elseif ('deleted' === $rev['status']) { + //fwrite(STDERR, " = deleted" . PHP_EOL); + continue; // we will never get deleted docs as we do not have them in _all_docs list + } else { + //fwrite(STDERR, " = unsupported revision status" . PHP_EOL); + continue; // who knows :) + } + + if($this->prettyJsonOutput) + $full_doc = indent($full_doc); + + //if we want to save each document in separate file + if($this->separateFiles){ + + if (!file_exists('./' . $this->databaseName)) + mkdir('./' . $this->databaseName , 0777, true); + + $myfile = fopen("./" . $this->databaseName . "/" . $doc['id'] . '_rev' . $rev['rev'] . ".json", "w"); + fwrite($myfile, $full_doc); + fclose($myfile); + + //Or if we want to join them together + }else{ + // add document to dump + if (!$first) { + fwrite($this->fp, ', ' . PHP_EOL . $full_doc); + } else { + fwrite($this->fp, $full_doc); + } + $first = false; + } + } + + //NO REVISIONS + } else { + + // we have only one revision + unset($doc_revs['_revs_info']); + $lastRev = $doc_revs['_rev']; + if (is_callable($this->callbackFilter) && !call_user_func($this->callbackFilter, $doc_revs, $lastRev)) { + fwrite(STDERR, " = skipped" . PHP_EOL); + continue; // skip that doc version because callback returned false + } else { + fwrite(STDERR, "" . PHP_EOL); + } + if ($this->noHistory) { + unset($doc_revs['_rev']); + } + + if((!$this->inlineAttachment && !$this->binaryAttachments)) + unset($doc_revs["_attachments"]); + + $doc_revs = clearEmptyKey($doc_revs); + $full_doc = json_encode($doc_revs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + + $doc_revs = toArray($doc_revs); + + if($this->binaryAttachments && !$this->inlineAttachment && isset($doc_revs["_attachments"]) && $doc_revs["_attachments"]){ + foreach($doc_revs["_attachments"] as $key=>$value){ + $doc_revs["_attachments"][$key]["length"] = strlen($value["data"]); + $doc_revs["_attachments"][$key]["stub"] = true; + unset($doc_revs["_attachments"][$key]["data"]); + } + } + + if($this->prettyJsonOutput) + $full_doc = indent($full_doc); + + //IF we want to save each document in separate file + if($this->separateFiles){ + + if (!file_exists('./' . $this->databaseName)) + mkdir('./' . $this->databaseName, 0777, true); + + $myfile = fopen("./" . $this->databaseName . "/" . $doc['id']. ".json", "wb"); + + if($myfile != false){ + fwrite($myfile, $full_doc); + fclose($myfile); + } + + //Or if we want to join them together.. + }else{ + + if ($full_doc !== null && $full_doc !== false) { + if (!$first) { + fwrite($this->fp, ', ' . PHP_EOL . $full_doc); + } else { + fwrite($this->fp, $full_doc); + } + $first = false; + } + } + + /* + * Binary attachments + */ + if($this->binaryAttachments && $doc_revs["_attachments"]){ + + foreach($doc_revs["_attachments"] as $attachment_id => $content){ + + $tempUrl = "http://{$this->host}:{$this->port}/{$this->database}/" . urlencode($doc['id']) . "/" . urlencode($attachment_id); + $folder = $this->databaseName . '/' . $doc['id']; + + if (!file_exists('./' . $folder)) + mkdir('./' . $folder, 0777, true); + + $ch = getCommonCurl( $tempUrl ); + $fp = fopen( './' . $folder . '/' . $attachment_id, 'wb'); //download attachment to current folder + curl_setopt($ch, CURLOPT_FILE, $fp); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_exec($ch); + curl_close($ch); + fclose($fp); + } + } + } + + } + + // end of dump + if(!$this->separateFiles) + fwrite($this->fp, PHP_EOL . ']}' . PHP_EOL); + + if($this->fp) + fclose($this->fp); + + return; //exit(0); + } +}/* END OF CLASS */ + + + +$params = parseParameters($_SERVER['argv'], array('H', 'p', 'd', 'y','m' )); error_reporting(!empty($params['e']) ? -1 : 0); defined('JSON_UNESCAPED_SLASHES') || define('JSON_UNESCAPED_SLASHES', '0'); defined('JSON_UNESCAPED_UNICODE') || define('JSON_UNESCAPED_UNICODE', '0'); @@ -31,12 +356,21 @@ exit(1); } +$groupDownload = isset($params['g']) ? strval($params['g']) : false; $host = isset($params['H']) ? trim($params['H']) : 'localhost'; $port = isset($params['p']) ? intval($params['p']) : 5984; $database = isset($params['d']) ? strval($params['d']) : null; $noHistory = isset($params['X']) ? $params['X'] : false; $callbackFile = isset($params['y']) ? $params['y'] : null; +$inlineAttachment = isset($params['a']) ? $params['a'] : false; +$binaryAttachments = (isset($params['A']) && $noHistory) ? $params['A'] : false; +$prettyJsonOutput = (isset($params['P'])) ? $params['P'] : false; +$separateFiles = (isset($params['s'])) ? $params['s'] : false; +$timeStamp = (isset($params['t'])) ? $params['t'] : false; +$multiprocessing = (isset($params['m'])) ? intval($params['m']) : 0; +$compressData = (isset($params['z'])) ? $params['z'] : false; $callbackFilter = null; + if (null !== $callbackFile) { $callbackFilter = include $callbackFile; if (!is_callable($callbackFilter)) { @@ -50,132 +384,176 @@ exit(1); } -if (!isset($database) || '' === $database) { - fwrite(STDERR, "ERROR: Please specify database name (-d )." . PHP_EOL); +if (isset($params['A']) && !$noHistory) { + fwrite(STDERR, "ERROR: In order to fetch attachments binary, you must use -X option." . PHP_EOL); exit(1); } -// get all docs IDs -$url = "http://{$host}:{$port}/{$database}/_all_docs"; -fwrite(STDERR, "Fetching all documents info from db '{$database}' at {$host}:{$port} ..." . PHP_EOL); -$curl = getCommonCurl($url); -$result = trim(curl_exec($curl)); -$statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); -curl_close($curl); -if (200 == $statusCode) { - $all_docs = json_decode($result, true); -} else { - // unknown status - fwrite(STDERR, "ERROR: Unsupported response when fetching all documents info from db '{$database}' (http status code = {$statusCode}) " . PHP_EOL); - exit(2); -} -if (!isset($all_docs['rows']) || !count($all_docs['rows']) || !is_array($all_docs['rows'])) { - fwrite(STDERR, "ERROR: No documents found in db '{$database}'." . PHP_EOL); - exit(2); -} -// first part of dump -if (!$noHistory) { - fwrite(STDOUT, '{"new_edits":false,"docs":[' . PHP_EOL); -} else { - fwrite(STDOUT, '{"docs":[' . PHP_EOL); -} -$first = true; -$count = count($all_docs['rows']); -fwrite(STDERR, "Found {$count} documents..." . PHP_EOL); -foreach ($all_docs['rows'] as $doc) { - // foreach DOC get all revs - if (!$noHistory) { - $url = "http://{$host}:{$port}/{$database}/" . urlencode($doc['id']) . "?revs=true&revs_info=true"; - } else { - $url = "http://{$host}:{$port}/{$database}/" . urlencode($doc['id']); - } - fwrite(STDERR, "[{$doc['id']}]"); +if($groupDownload){ + + //Separate files is included for all databases automatically + $separateFiles = 1; + + fwrite(STDERR, "GROUP DOWNLOAD STARTED" . PHP_EOL); + + // get all docs IDs + $url = "http://{$host}:{$port}/_all_dbs"; + fwrite(STDERR, "Fetching all databases from {$host}:{$port} ..." . PHP_EOL); + + $curl = getCommonCurl($url); - $result = curl_exec($curl); + $result = trim(curl_exec($curl)); $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); curl_close($curl); + + if (200 == $statusCode) { - $doc_revs = json_decode($result, true); + $all_docs = json_decode($result, true); } else { // unknown status - fwrite(STDERR, "ERROR: Unsupported response when fetching document [{$doc['id']}] from db '{$database}' (http status code = {$statusCode}) " . PHP_EOL); - exit(2); + fwrite(STDERR, "ERROR: Unsupported response when fetching all documents info from db '{$database}' (http status code = {$statusCode}) " . PHP_EOL); + return; //exit(2); } - if (isset($doc_revs['_revs_info']) && count($doc_revs['_revs_info']) > 1) { - fwrite(STDERR, "" . PHP_EOL); - // we have more than one revision - $revs_info = array_reverse($doc_revs['_revs_info']); - $lastRev = end($revs_info); - $lastRev = $lastRev['rev']; - reset($revs_info); - foreach ($revs_info as $rev) { - // foreach rev fetch DB/ID?rev=REV&revs=true - fwrite(STDERR, "[{$doc['id']}] @ {$rev['rev']}"); - if ('available' === $rev['status']) { - $url = "http://{$host}:{$port}/{$database}/" . urlencode($doc['id']) . "?revs=true&rev=" . urlencode($rev['rev']); - $curl = getCommonCurl($url); - $result = curl_exec($curl); - $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); - curl_close($curl); - if (200 == $statusCode) { - $full_doc = trim($result); - } else { - // unknown status - fwrite(STDERR, "ERROR: Unsupported response when fetching document [{$doc['id']}] revision [{$rev['rev']}] from db '{$database}' (http status code = {$statusCode}) " . PHP_EOL); - exit(2); - } - if (is_callable($callbackFilter) && !call_user_func($callbackFilter, json_decode($full_doc, true), $lastRev)) { - fwrite(STDERR, " = skipped" . PHP_EOL); - continue; // skip that doc version because callback returned false - } else { - fwrite(STDERR, "" . PHP_EOL); - } - } elseif ('missing' === $rev['status']) { - fwrite(STDERR, " = missing" . PHP_EOL); - continue; // missing docs are not available anyhow - } elseif ('deleted' === $rev['status']) { - fwrite(STDERR, " = deleted" . PHP_EOL); - continue; // we will never get deleted docs as we do not have them in _all_docs list - } else { - fwrite(STDERR, " = unsupported revision status" . PHP_EOL); - continue; // who knows :) + + try{ + + $i = 1; + $processes = array(); + $backupFolder = "backup_" . strtolower(gmdate("l")) . gmdate("_j-m-Y_H_i_s_e"); + foreach($all_docs as $db){ + + if(substr($db, 0, 1) != '_'){ + + $allowMultiprocessing = false; + + if($multiprocessing || $i < $multiprocessing){ + + $processes[] = $pid = pcntl_fork(); + + if(!$pid){ + $allowMultiprocessing = true; + $i++; + }else + $pid = 0; + + }else + $pid = 0; + + if (!$pid) { + + $dumper = new Dumper( + $host, + $port, + $db, + $noHistory, + $callbackFile, + $inlineAttachment, + $binaryAttachments, + $prettyJsonOutput, + $separateFiles, + $timeStamp, + $callbackFilter, + $backupFolder + ); + $dumper->download(); + + if($allowMultiprocessing){ + $i--; + exit; + } + } } - // add document to dump - if (!$first) { - fwrite(STDOUT, ', ' . PHP_EOL . $full_doc); - } else { - fwrite(STDOUT, $full_doc); + } + + if($multiprocessing){ + + fwrite(STDERR, "Removing daemon processes!" . PHP_EOL); + + foreach($processes as $temp){ + pcntl_wait($temp, $status, WUNTRACED); + } + + fwrite(STDERR, "Daemon processes removed!" . PHP_EOL); + + /* + while (pcntl_waitpid(0, $status) != -1) { + $status = pcntl_wexitstatus($status); + echo "Child $status completed\n"; } - $first = false; + */ } - } else { - // we have only one revision - unset($doc_revs['_revs_info']); - $lastRev = $doc_revs['_rev']; - if (is_callable($callbackFilter) && !call_user_func($callbackFilter, $doc_revs, $lastRev)) { - fwrite(STDERR, " = skipped" . PHP_EOL); - continue; // skip that doc version because callback returned false + + if($compressData){ + fwrite(STDERR, "Compresing files and files.." . PHP_EOL); + + //Compres file + $a = new PharData( $backupFolder . '.tar'); + $a->buildFromDirectory(dirname(__FILE__) . '/' . $backupFolder); + + file_put_contents( $backupFolder . '.tar.gz' , gzencode(file_get_contents( $backupFolder . '.tar'), 9)); + + fwrite(STDERR, "Compresion complete!" . PHP_EOL); + fwrite(STDERR, "Removing temp folders and files.." . PHP_EOL); + + + //remove other files + unlink( realpath($backupFolder . '.tar') ); + deleteDir( realpath($backupFolder) ); + + fwrite(STDERR, "Temp folders and files removed!" . PHP_EOL); + } + + }catch(Exception $e){ + fwrite(STDERR, "$e" . PHP_EOL); + } + + return; //exit(1); + +}else{ + + $dumper = new Dumper( + $host, + $port, + $database, + $noHistory, + $callbackFile, + $inlineAttachment, + $binaryAttachments, + $prettyJsonOutput, + $separateFiles, + $timeStamp, + $callbackFilter + ); + $dumper->download(); +} + + + + + + + + + +function deleteDir($dirPath) { + if (! is_dir($dirPath)) { + throw new InvalidArgumentException("$dirPath must be a directory"); + } + if (substr($dirPath, strlen($dirPath) - 1, 1) != '/') { + $dirPath .= '/'; + } + $files = glob($dirPath . '*', GLOB_MARK); + foreach ($files as $file) { + if (is_dir($file)) { + deleteDir($file); } else { - fwrite(STDERR, "" . PHP_EOL); - } - if ($noHistory) { - unset($doc_revs['_rev']); - } - $full_doc = json_encode($doc_revs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); - if ($full_doc !== null && $full_doc !== false) { - if (!$first) { - fwrite(STDOUT, ', ' . PHP_EOL . $full_doc); - } else { - fwrite(STDOUT, $full_doc); - } - $first = false; + unlink($file); } } + rmdir($dirPath); } -// end of dump -fwrite(STDOUT, PHP_EOL . ']}' . PHP_EOL); -exit(0); + //////////////////////////////////////////////////////////////////////////////// @@ -194,6 +572,29 @@ function getCommonCurl($url) //////////////////////////////////////////////////////////////////////////////// +/** + * Convert incoming object to array (deep inspection, recursive function) + * @author Miralem Mehic + * @param array $obj Incoming object + * @return array + */ +function toArray($obj) +{ + if (is_object($obj)) $obj = (array)$obj; + if (is_array($obj)) { + $new = array(); + foreach ($obj as $key => $val) { + $new[$key] = toArray($val); + } + } else { + $new = $obj; + } + + return $new; +} + +//////////////////////////////////////////////////////////////////////////////// + /** * Parse incoming parameters like from $_SERVER['argv'] array. * @author Anton Bondar @@ -248,3 +649,78 @@ function parseParameters(array $params, array $reqs = array(), array $multiple = } return $result; } + +/** + * Indents a flat JSON string to make it more human-readable. + * + * @param string $json The original JSON string to process. + * + * @return string Indented version of the original JSON string. + */ +function indent(&$json) { + + $result = ''; + $pos = 0; + $strLen = strlen($json); + $indentStr = ' '; + $newLine = "\n"; + $prevChar = ''; + $outOfQuotes = true; + + for ($i=0; $i<=$strLen; $i++) { + + // Grab the next character in the string. + $char = substr($json, $i, 1); + + // Are we inside a quoted string? + if ($char == '"' && $prevChar != '\\') { + $outOfQuotes = !$outOfQuotes; + + // If this character is the end of an element, + // output a new line and indent the next line. + } else if(($char == '}' || $char == ']') && $outOfQuotes) { + $result .= $newLine; + $pos --; + for ($j=0; $j<$pos; $j++) { + $result .= $indentStr; + } + } + + // Add the character to the result string. + $result .= $char; + + // If the last character was the beginning of an element, + // output a new line and indent the next line. + if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) { + $result .= $newLine; + if ($char == '{' || $char == '[') { + $pos ++; + } + + for ($j = 0; $j < $pos; $j++) { + $result .= $indentStr; + } + } + $prevChar = $char; + } + + return $result; +} + +function clearEmptyKey($input){ + + if(!is_array($input)) + $input = toArray($input); + + foreach($input as $key=>$val){ + + if(is_array($val)) + $val = clearEmptyKey($val); + + if($key == "_empty_"){ + $input[""] = $val; + unset($input[$key]); + } + } + return $input; +} \ No newline at end of file diff --git a/bin/couchdb-restore.php b/bin/couchdb-restore.php index 39b63d6..816e9b9 100755 --- a/bin/couchdb-restore.php +++ b/bin/couchdb-restore.php @@ -1,8 +1,9 @@ #!/usr/bin/env php http://zebooka.com/soft/LICENSE/" . PHP_EOL . PHP_EOL); +ini_set('memory_limit', '-1'); +fwrite(STDOUT, "COUCH DB RESTORER | version: 1.0.0" . PHP_EOL); +fwrite(STDOUT, "(c) Copyright 2013, Anton Bondar http://zebooka.com/soft/LICENSE/" . PHP_EOL . PHP_EOL); +fwrite(STDOUT, "(c) Copyright 2014, Updated by Miralem Mehic . Sponsored by CloudPBX Inc. " . PHP_EOL . PHP_EOL); $help = << JSON file to restore. -D Drop and create database, if needed (default: create db, only if it does not exist). -F Force restore on existing db with documents. + -a Restore inline attachments (from base64 encoded format). + -s Specify directory from which documnest should be restored + -g Group upload of all databases to the server. + -z Decompress input group directory from .tar.gz archive + -r Delete folder after group upload + -au Admin Username + -ap Admin Password WARNING: Please note, that it is not a good idea to restore dump on existing database with documents. @@ -23,136 +31,427 @@ USAGE: {$_SERVER['argv'][0]} -H localhost -p 5984 -d test -f dump.json HELP; + + +class Restorer{ + + private $host; + private $port; + private $database; + private $filename; + private $inlineAttachment; + private $drop; + private $forceRestore; + private $separateFiles; + private $adminUsername; + private $adminPassword; + private $adminUrl; + + function Restorer( + $host, + $port, + $database, + $filename, + $inlineAttachment, + $drop, + $forceRestore, + $separateFiles, + $adminUsername, + $adminPassword + ) + { + $this->host = $host; + $this->port = $port; + $this->database = $database; + $this->filename = $filename; + $this->inlineAttachment = $inlineAttachment; + $this->drop = $drop; + $this->forceRestore = $forceRestore; + $this->separateFiles = $separateFiles; + $this->adminUsername = $adminUsername; + $this->adminPassword = $adminPassword; + + if(!empty($adminUsername) && !empty($adminPassword)){ + $this->adminUrl = $adminUsername . ':' . $adminPassword . '@'; + }else{ + $this->adminUrl = ''; + } + + if ('' === $this->host || $this->port < 1 || 65535 < $this->port) { + fwrite(STDOUT, "ERROR: Please specify valid hostname and port (-H and -p )." . PHP_EOL); + exit(1); + } + + if (!isset($this->database) || '' === $this->database) { + fwrite(STDOUT, "ERROR: Please specify database name (-d )." . PHP_EOL); + exit(1); + } + + if (!$this->separateFiles && (!isset($this->filename) || !is_file($this->filename) || !is_readable($this->filename))) { + fwrite(STDOUT, "ERROR: Please specify JSON file to restore (-f )." . PHP_EOL); + exit(1); + } + + if($this->separateFiles) { + if(!file_exists("./$this->separateFiles")){ + fwrite(STDOUT, "ERROR: There is no folder named same as database $this->separateFiles" . PHP_EOL); + exit(1); + } + } + + + + + } + + + public function restore(){ + + // check db + $url = "http://{$this->adminUrl}{$this->host}:{$this->port}/". urlencode($this->database) . "/"; + fwrite(STDOUT, "Checking db '{$this->database}' at {$this->host}:{$this->port} ..." . PHP_EOL); + $curl = getCommonCurl($url); + $result = trim(curl_exec($curl)); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + if (200 == $statusCode) { + // $this->database exists + $exists = true; + $db_info = json_decode($result, true); + $docCount = (isset($db_info['doc_count']) ? $db_info['doc_count'] : 0); + fwrite(STDOUT, "$this->database '{$this->database}' has {$docCount} documents." . PHP_EOL); + } elseif (404 == $statusCode) { + // $this->database not found + $exists = false; + $docCount = 0; + } else { + // unknown status + fwrite(STDOUT, "ERROR: Unsupported response when checking db '{$this->database}' status (http status code = {$statusCode}) " . $result . PHP_EOL); + return; + } + if ($this->drop && $exists) { + // drop $this->database + fwrite(STDOUT, "Deleting $this->database '{$this->database}'..." . PHP_EOL); + $curl = getCommonCurl($url); + curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'DELETE'); + $result = trim(curl_exec($curl)); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + if (200 != $statusCode) { + fwrite(STDOUT, "ERROR: Unsupported response when deleting db '{$this->database}' (http status code = {$statusCode}) " . $result . PHP_EOL); + return; + } + $exists = false; + $docCount = 0; + } + if ($docCount && !$this->forceRestore) { + // has documents, but no force + fwrite(STDOUT, "ERROR: $this->database '{$this->database}' has {$docCount} documents. Refusing to restore without -F force flag." . PHP_EOL); + return; + } + + if (!$exists) { + // create db + fwrite(STDOUT, "Creating $this->database '{$this->database}'..." . PHP_EOL); + $curl = getCommonCurl($url); + curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'PUT'); + $result = trim(curl_exec($curl)); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + if (201 != $statusCode) { + fwrite(STDOUT, "ERROR: Unsupported response when creating db '{$this->database}' (http status code = {$statusCode}) " . $result . PHP_EOL); + return; + } + } + + if($this->separateFiles){ + + $files = array(); + foreach(glob("$this->separateFiles/*") as $file) { + if($file != '.' && $file != '..' && $file != 'dummy'){ + $files[] = json_decode(file_get_contents($file), true); + } + } + + $decodedContent = new stdClass(); + $decodedContent->new_edits = false; + $decodedContent->docs = $files; + + + } else { + // post dump + $fileContent = file_get_contents($filename); + $decodedContent = json_decode($fileContent); + } + + fwrite(STDOUT, ">>>>>>>>>>>>>>>>> RESTORING STARTED <<<<<<<<<<<<<<<<<<<<<" . PHP_EOL); + + foreach($decodedContent->docs as $documentTemp){ + + if(!is_array($documentTemp)) + $documentTemp = (array)$documentTemp; + + //we need to fetch the latest revision of the document, because in order to upload a new version of document we MUST know latest rev ID + $url = "http://{$this->adminUrl}{$this->host}:{$this->port}/" . urlencode($this->database) . "/" . urlencode($documentTemp["_id"]); + $curl = getCommonCurl($url); + $result = trim(curl_exec($curl)); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + + + if($statusCode == 200){ + $result = json_decode($result,true); + if(isset($result["_rev"]) && $result["_rev"]) + $documentTemp["_rev"] = $result["_rev"]; + } + + if(isset($documentTemp["_revisions"])) + unset($documentTemp["_revisions"]); + + $url = "http://{$this->adminUrl}{$this->host}:{$this->port}/" . urlencode($this->database) . "/" . urlencode($documentTemp["_id"]); + + fwrite(STDOUT, "Restoring '{$documentTemp['_id']}|rev:{$documentTemp['_rev']}' into db '{$this->database}' at {$this->host}:{$this->port}.." . PHP_EOL); + + //If we don't wont to upload attachments then we need to remove content from the file used for upload + if(!$this->inlineAttachment && isset($documentTemp["_attachments"]) && $documentTemp["_attachments"]){ + unset($documentTemp["_attachments"]); + unset($documentTemp["unnamed"]); + } + + $documentTemp = clearEmptyKey($documentTemp); + + $curl = getCommonCurl($url); + curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'PUT'); /* or PUT */ + curl_setopt($curl, CURLOPT_POSTFIELDS, json_encode($documentTemp)); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curl, CURLOPT_HTTPHEADER, array( + 'Content-type: application/json', + 'Accept: */*' + )); -$params = parseParameters($_SERVER['argv'], array('H', 'p', 'd', 'f')); + // TODO: use next string when get ideas why it is not working and how to fix it. + //curl_setopt($curl, CURLOPT_INFILE, $filehandle); // strange, but this does not work + $result = trim(curl_exec($curl)); + + //fclose($filehandle); + $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + /* + if ($statusCode < 200 || 299 < $statusCode) { + fwrite(STDOUT, "ERROR: Unable to post data to \"{$url}\" (http status code = {$statusCode}) " . $result . PHP_EOL); + } + */ + + $messages = json_decode($result, true); + + $errors = 0; + if (is_array($messages)) { + if (isset($messages['error'])) { + $doc_id = isset($messages['id']) ? $messages['id'] : $documentTemp["_id"]; + $reason = isset($messages['reason']) ? $messages['reason'] : $messages['error']; + fwrite(STDOUT, "ERROR: [{$doc_id}] = {$reason}" . PHP_EOL); + $errors++; + } else if (isset($messages['ok'])) { + $doc_id = isset($messages['id']) ? $messages['id'] : '?'; + fwrite(STDOUT, "SUCCESS: [{$doc_id}] restored!" . PHP_EOL); + } + } + } + fwrite(STDOUT, ">>>>>>>>>>>>>>>>> RESTORING FINISHED! <<<<<<<<<<<<<<<<<<<<<" . PHP_EOL); + } +} + + + + + + +$params = parseParameters($_SERVER['argv'], array('H', 'p', 'd', 'f', 'a', 'D', 's' , 'au', 'ap')); error_reporting(!empty($params['e']) ? -1 : 0); if (isset($params['h'])) { - fwrite(STDERR, $help . PHP_EOL); + fwrite(STDOUT, $help . PHP_EOL); exit(1); } - + +$groupDownload = isset($params['g']) ? strval($params['g']) : false; $host = isset($params['H']) ? trim($params['H']) : 'localhost'; $port = isset($params['p']) ? intval($params['p']) : 5984; $database = isset($params['d']) ? strval($params['d']) : null; $filename = isset($params['f']) ? strval($params['f']) : null; +$inlineAttachment = isset($params['a']) ? $params['a'] : false; $drop = isset($params['D']) ? strval($params['D']) : false; $forceRestore = isset($params['F']) ? $params['F'] : false; +$separateFiles = isset($params['s']) ? strval($params['s']) : null; +$decompressData = (isset($params['z'])) ? $params['z'] : false; +$deleteAfterGroupUpload = (isset($params['r'])) ? $params['r'] : false; +$multiprocessing = (isset($params['m'])) ? intval($params['m']) : 0; + +$adminUsername = isset($params['au']) ? trim($params['au']) : ''; +$adminPassword = isset($params['ap']) ? trim($params['ap']) : ''; if ('' === $host || $port < 1 || 65535 < $port) { fwrite(STDERR, "ERROR: Please specify valid hostname and port (-H and -p )." . PHP_EOL); exit(1); } -if (!isset($database) || '' === $database) { - fwrite(STDERR, "ERROR: Please specify database name (-d )." . PHP_EOL); - exit(1); -} -if (!isset($filename) || !is_file($filename) || !is_readable($filename)) { - fwrite(STDERR, "ERROR: Please specify JSON file to restore (-f )." . PHP_EOL); - exit(1); -} -//$filehandle = fopen($filename, 'rb'); -//if (!$filehandle) { -// fwrite(STDERR, "ERROR: Unable to open '{$filename}'." . PHP_EOL); -// exit(2); -//} - -// check db -$url = "http://{$host}:{$port}/{$database}/"; -fwrite(STDERR, "Checking db '{$database}' at {$host}:{$port} ..." . PHP_EOL); -$curl = getCommonCurl($url); -$result = trim(curl_exec($curl)); -$statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); -curl_close($curl); -if (200 == $statusCode) { - // database exists - $exists = true; - $db_info = json_decode($result, true); - $docCount = (isset($db_info['doc_count']) ? $db_info['doc_count'] : 0); - fwrite(STDERR, "Database '{$database}' has {$docCount} documents." . PHP_EOL); -} elseif (404 == $statusCode) { - // database not found - $exists = false; - $docCount = 0; -} else { - // unknown status - fwrite(STDERR, "ERROR: Unsupported response when checking db '{$database}' status (http status code = {$statusCode}) " . $result . PHP_EOL); - exit(2); -} -if ($drop && $exists) { - // drop database - fwrite(STDERR, "Deleting database '{$database}'..." . PHP_EOL); - $curl = getCommonCurl($url); - curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'DELETE'); - $result = trim(curl_exec($curl)); - $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); - curl_close($curl); - if (200 != $statusCode) { - fwrite(STDERR, "ERROR: Unsupported response when deleting db '{$database}' (http status code = {$statusCode}) " . $result . PHP_EOL); - exit(2); +if($groupDownload){ + + if($decompressData){ + + fwrite(STDERR, "Decompresing files." . PHP_EOL); + + $clearFolderName = pathinfo($filename, PATHINFO_FILENAME); + $clearFolderName = pathinfo($clearFolderName, PATHINFO_FILENAME); + + if(!file_exists($clearFolderName . '.tar')){ + + fwrite(STDERR, "Extracting to $clearFolderName.tar" . PHP_EOL); + // decompress from gz + $p = new PharData($clearFolderName . '.tar.gz'); + $p->decompress(); // creates files.tar + } + + if(!file_exists($clearFolderName)){ + + fwrite(STDERR, "Extracting to $clearFolderName" . PHP_EOL); + // unarchive from the tar + $phar = new PharData($clearFolderName . '.tar'); + $phar->extractTo( $clearFolderName ); + } + + $filename = $clearFolderName; + + fwrite(STDERR, "Decompresing complete!" . PHP_EOL); } - $exists = false; - $docCount = 0; -} -if ($docCount && !$forceRestore) { - // has documents, but no force - fwrite(STDERR, "ERROR: Database '{$database}' has {$docCount} documents. Refusing to restore without -F force flag." . PHP_EOL); - exit(2); -} -if (!$exists) { - // create db - fwrite(STDERR, "Creating database '{$database}'..." . PHP_EOL); - $curl = getCommonCurl($url); - curl_setopt($curl, CURLOPT_CUSTOMREQUEST, 'PUT'); - $result = trim(curl_exec($curl)); - $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); - curl_close($curl); - if (201 != $statusCode) { - fwrite(STDERR, "ERROR: Unsupported response when creating db '{$database}' (http status code = {$statusCode}) " . $result . PHP_EOL); - exit(2); + + try{ + + $files = scandir($filename, 1); + $i = 1; + $processes = array(); + foreach($files as $file){ + + if( $file != '.' && $file != '..' && is_dir($filename . '/' . $file) ) { + $allowMultiprocessing = false; + + if($multiprocessing || $i < $multiprocessing){ + + $processes[] = $pid = pcntl_fork(); + + if(!$pid){ + $allowMultiprocessing = true; + $i++; + }else + $pid = 0; + + }else + $pid = 0; + + + if (!$pid) { + + $tempRestorer = new Restorer( + $host, + $port, + urldecode($file), + $file, + $inlineAttachment, + $drop, + $forceRestore, + $filename . '/' . $file, + $adminUsername, + $adminPassword + ); + $tempRestorer->restore(); + + if($allowMultiprocessing){ + $i--; + exit; + } + + } + } + } + + if($multiprocessing){ + + fwrite(STDERR, "Removing daemon processes!" . PHP_EOL); + foreach($processes as $temp){ + pcntl_wait($temp, $status, WUNTRACED); + } + fwrite(STDERR, "Daemon processes removed!" . PHP_EOL); + } + + if($deleteAfterGroupUpload){ + fwrite(STDERR, "Removing temp folders and files.." . PHP_EOL); + if(file_exists(realpath($filename . '.tar'))){ + unlink( realpath($filename . '.tar') ); + } + deleteDir( realpath($filename) ); + fwrite(STDERR, "Temp folders and files removed!" . PHP_EOL); + } + + }catch(Exception $e){ + fwrite(STDERR, "$e" . PHP_EOL); } -} + +}else{ + + $tempRestorer = new Restorer( + $host, + $port, + $database, + $filename, + $inlineAttachment, + $drop, + $forceRestore, + $separateFiles, + $adminUsername, + $adminPassword + ); + $tempRestorer->restore(); -// post dump -$url = "http://{$host}:{$port}/{$database}/_bulk_docs"; -fwrite(STDERR, "Restoring '{$filename}' into db '{$database}' at {$host}:{$port} ..." . PHP_EOL); -$curl = getCommonCurl($url); -curl_setopt($curl, CURLOPT_HTTPHEADER, array('Content-Type: application/json')); -curl_setopt($curl, CURLOPT_POST, true); -curl_setopt($curl, CURLOPT_POSTFIELDS, file_get_contents($filename)); -// TODO: use next string when get ideas why it is not working and how to fix it. -//curl_setopt($curl, CURLOPT_INFILE, $filehandle); // strange, but this does not work -$result = trim(curl_exec($curl)); -//fclose($filehandle); -$statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); -curl_close($curl); -if ($statusCode < 200 || 299 < $statusCode) { - fwrite(STDERR, "ERROR: Unable to post data to \"{$url}\" (http status code = {$statusCode}) " . $result . PHP_EOL); - exit(2); } -$messages = json_decode($result, true); -$errors = 0; -if (is_array($messages)) { - foreach ($messages as $message) { - if (isset($message['error'])) { - $doc_id = isset($message['id']) ? $message['id'] : '?'; - $reason = isset($message['reason']) ? $message['reason'] : $message['error']; - fwrite(STDERR, "WARNING: [{$doc_id}] = {$reason}" . PHP_EOL); - $errors++; + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////// + +function deleteDir($dirPath) { + if (! is_dir($dirPath)) { + throw new InvalidArgumentException("$dirPath must be a directory"); + } + if (substr($dirPath, strlen($dirPath) - 1, 1) != '/') { + $dirPath .= '/'; + } + $files = glob($dirPath . '*', GLOB_MARK); + foreach ($files as $file) { + if (is_dir($file)) { + deleteDir($file); + } else { + unlink($file); } } + rmdir($dirPath); } -if ($errors) { - fwrite(STDERR, "ERROR: There were {$errors} errors while restoring documents." . PHP_EOL); - exit(2); -} else { - fwrite(STDERR, "DONE!" . PHP_EOL); -} -exit(0); -//////////////////////////////////////////////////////////////////////////////// function getCommonCurl($url) { @@ -223,3 +522,22 @@ function parseParameters(array $params, array $reqs = array(), array $multiple = } return $result; } + + +function clearEmptyKey($input){ + + if(!is_array($input)) + $input = toArray($input); + + foreach($input as $key=>$val){ + + if(is_array($val)) + $val = clearEmptyKey($val); + + if($key == "_empty_"){ + $input[""] = $val; + unset($input[$key]); + } + } + return $input; +} \ No newline at end of file