diff --git a/.gitignore b/.gitignore index 4994ef15e..22e112755 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,19 @@ http_memprof_out # custom dc sqlite database sqlite + +# C++ client + +# CMake +cpp-client/build/ +cpp-client/CMakeCache.txt +cpp-client/CMakeFiles/ +cpp-client/cmake_install.cmake + +# Compiled files +*.o +*.so +*.a +*.dll +*.exe +*.out diff --git a/cpp-client/.gitignore b/cpp-client/.gitignore new file mode 100644 index 000000000..52ba4718d --- /dev/null +++ b/cpp-client/.gitignore @@ -0,0 +1,13 @@ +# CMake +build/ +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake + +# Compiled files +*.o +*.so +*.a +*.dll +*.exe +*.out diff --git a/cpp-client/CMakeLists.txt b/cpp-client/CMakeLists.txt new file mode 100644 index 000000000..7ad9a9c74 --- /dev/null +++ b/cpp-client/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.11) +project(datacommons-cpp) + +set(CMAKE_CXX_STANDARD 17) + +include(FetchContent) + +FetchContent_Declare( + cpr + GIT_REPOSITORY https://github.com/libcpr/cpr.git + GIT_TAG 1.8.3 +) + +FetchContent_Declare( + nlohmann_json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG v3.10.5 +) + +FetchContent_MakeAvailable(cpr nlohmann_json) + +add_library(datacommons src/DataCommons.cpp) + +target_include_directories(datacommons PUBLIC include) + +target_link_libraries(datacommons PUBLIC cpr::cpr nlohmann_json::nlohmann_json) + +add_executable(example examples/main.cpp) +target_link_libraries(example PRIVATE datacommons) diff --git a/cpp-client/README.md b/cpp-client/README.md new file mode 100644 index 000000000..55020544d --- /dev/null +++ b/cpp-client/README.md @@ -0,0 +1,58 @@ +# Data Commons C++ Client Library + +A C++ client library for accessing the Data Commons API. + +## Authentication + +The recommended way to provide your API key is by setting the `DC_API_KEY` environment variable. The client will automatically detect and use it. + +```bash +export DC_API_KEY="YOUR_API_KEY" +``` + +Alternatively, you can pass the key directly to the constructor: + +```cpp +#include "DataCommons.h" + +int main() { + datacommons::DataCommons dc("YOUR_API_KEY"); + // ... + return 0; +} +``` + +## Building and Running the Example + +### Prerequisites + +- C++17 compiler (g++ or Clang) +- CMake (3.11+) +- Git +- OpenSSL development libraries (`libssl-dev` on Debian/Ubuntu) + +### Steps + +1. **Clone the repository and navigate to the client directory.** +2. **Create a build directory:** + ```bash + mkdir build + cd build + ``` +3. **Configure and build the project:** + ```bash + cmake .. + make + ``` +4. **Set your API key:** + ```bash + export DC_API_KEY="YOUR_API_KEY" + ``` +5. **Run the example:** + ```bash + ./example + ``` + +## Usage + +See the `USAGE.md` file for a detailed guide to the library's functions. diff --git a/cpp-client/USAGE.md b/cpp-client/USAGE.md new file mode 100644 index 000000000..0c20af795 --- /dev/null +++ b/cpp-client/USAGE.md @@ -0,0 +1,212 @@ +# Data Commons C++ Client Usage Guide + +This guide provides a summary of the available endpoints in the Data Commons C++ client library and examples of how to use them. + +## Getting Started + +First, ensure you have set your Data Commons API key as an environment variable: + +```bash +export DC_API_KEY="YOUR_API_KEY" +``` + +Then, you can create a `DataCommons` client object in your C++ code: + +```cpp +#include "DataCommons.h" +#include +#include + +int main() { + try { + datacommons::DataCommons dc; + // Your code here... + } catch (const std::runtime_error& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + return 0; +} +``` + +## Core V2 API Endpoints + +The C++ client provides access to the four core V2 endpoints of the Data Commons REST API. + +### 1. GetPropertyValues + +Fetches property values for one or more nodes. This method returns the raw JSON response from the API, giving you the flexibility to parse it as needed. + +**Use Case:** Find the name and type of a place, like a state or city. + +**Example:** Get the `name` and `typeOf` for California (`geoId/06`) and Colorado (`geoId/08`). + +```cpp +std::vector dcids = {"geoId/06", "geoId/08"}; +std::vector properties = {"name", "typeOf"}; +auto result = dc.GetPropertyValues(dcids, "->", properties); + +std::cout << result.dump(2) << std::endl; +``` + +### 2. GetObservations + +Fetches statistical observations. This endpoint provides a flexible way to query for data by specifying variables, entities, and dates in various combinations. + +**Use Case:** Get the total, male, and female population counts for California and Colorado in the year 2020. + +**Example:** + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; + +datacommons::ObservationEntity entities; +entities.dcids = {"geoId/06", "geoId/08"}; + +datacommons::ObservationDate date = "2020"; + +auto result = dc.GetObservations(variables, entities, date); + +for (const auto& [variable, entity_map] : result) { + std::cout << "Variable: " << variable << std::endl; + for (const auto& [entity, observations] : entity_map) { + std::cout << " Entity: " << entity << std::endl; + for (const auto& obs : observations) { + std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl; + } + } +} +``` + +You can also use expressions to select entities, for example, to get the population of all counties in California: + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person"}; + +datacommons::ObservationEntity entities; +entities.expression = "<-containedInPlace{typeOf:County, dcid:geoId/06}"; + +datacommons::ObservationDate date = "LATEST"; + +auto result = dc.GetObservations(variables, entities, date); +// ... (process results as above) +``` + +### 3. Resolve + +Resolves human-readable identifiers (like names or coordinates) to Data Commons IDs (DCIDs). + +**Use Case:** Find the unique DCID for a place when you only know its name. + +**Example:** Find the DCIDs for "California" and "Colorado". + +```cpp +std::vector nodes = {"California", "Colorado"}; +std::string property = "<-description->dcid"; +auto result = dc.Resolve(nodes, property); + +for (const auto& [node, candidates] : result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; + } +} +``` + +### 2. GetObservations + +Fetches statistical observations. This endpoint provides a flexible way to query for data by specifying variables, entities, and dates in various combinations. + +**Use Case:** Get the total, male, and female population counts for California and Colorado in the year 2020. + +**Example:** + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; + +datacommons::ObservationEntity entities; +entities.dcids = {"geoId/06", "geoId/08"}; + +datacommons::ObservationDate date = "2020"; + +auto result = dc.GetObservations(variables, entities, date); + +for (const auto& [variable, entity_map] : result) { + std::cout << "Variable: " << variable << std::endl; + for (const auto& [entity, observations] : entity_map) { + std::cout << " Entity: " << entity << std::endl; + for (const auto& obs : observations) { + std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl; + } + } +} +``` + +You can also use expressions to select entities, for example, to get the population of all counties in California: + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person"}; + +datacommons::ObservationEntity entities; +entities.expression = "<-containedInPlace{typeOf:County, dcid:geoId/06}"; + +datacommons::ObservationDate date = "LATEST"; + +auto result = dc.GetObservations(variables, entities, date); +// ... (process results as above) +``` + + +### 3. Resolve + +Resolves human-readable identifiers (like names or coordinates) to Data Commons IDs (DCIDs). + +**Use Case:** Find the unique DCID for a place when you only know its name. + +**Example:** Find the DCIDs for "California" and "Colorado". + +```cpp +std::vector nodes = {"California", "Colorado"}; +std::string from_property = "description"; +std::string to_property = "dcid"; +auto result = dc.Resolve(nodes, from_property, to_property); + +for (const auto& [node, candidates] : result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; + } +} +``` + +### 4. Query + +Executes a SPARQL query directly against the Data Commons knowledge graph for advanced use cases. + +**Use Case:** Retrieve a custom table of data, such as the names and DCIDs of the first 10 states found in the graph. + +**Example:** + +```cpp +std::string query = "SELECT ?name ?dcid WHERE { ?place typeOf State . ?place name ?name . ?place dcid ?dcid . } LIMIT 10"; +auto result = dc.Query(query); + +// Print header +for (const auto& header : result.header) { + std::cout << header << "\t"; +} +std::cout << std::endl; + +// Print rows +for (const auto& row : result.rows) { + for (const auto& header : result.header) { + std::cout << row.at(header) << "\t"; + } + std::cout << std::endl; +} +``` + diff --git a/cpp-client/examples/main.cpp b/cpp-client/examples/main.cpp new file mode 100644 index 000000000..c8ab1dc1b --- /dev/null +++ b/cpp-client/examples/main.cpp @@ -0,0 +1,71 @@ +#include "DataCommons.h" +#include + +void TestGetPropertyValues(datacommons::DataCommons& dc) { + std::cout << "--- Testing GetPropertyValues ---" << std::endl; + std::vector dcids = {"geoId/06", "geoId/08"}; + std::vector properties = {"name", "typeOf"}; + auto result = dc.GetPropertyValues(dcids, "->", properties); + std::cout << result.dump(2) << std::endl; + std::cout << std::endl; +} + +void TestGetObservations(datacommons::DataCommons& dc) { + std::cout << "--- Testing GetObservations ---" << std::endl; + std::vector select = {"variable", "entity"}; + datacommons::ObservationVariable variables; + datacommons::ObservationEntity entities; + entities.dcids = {"country/TGO"}; + datacommons::ObservationDate date = ""; // Empty to get all dates + auto result = dc.GetObservations(select, variables, entities, date); + std::cout << result.dump(2) << std::endl; + std::cout << std::endl; +} + +void TestResolve(datacommons::DataCommons& dc) { + std::cout << "--- Testing Resolve ---" << std::endl; + std::vector nodes = {"California", "Mountain View"}; + std::string property = "<-description->dcid"; + auto result = dc.Resolve(nodes, property); + for (const auto& [node, candidates] : result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; + } + } + std::cout << std::endl; +} + +void TestQuery(datacommons::DataCommons& dc) { + std::cout << "--- Testing Query ---" << std::endl; + std::string query = "SELECT ?name ?dcid WHERE { ?place typeOf State . ?place name ?name . ?place dcid ?dcid . } LIMIT 5"; + auto result = dc.Query(query); + for (const auto& header : result.header) { + std::cout << header << "\t\t"; + } + std::cout << std::endl; + for (const auto& row : result.rows) { + for (const auto& header : result.header) { + std::cout << row.at(header) << "\t"; + } + std::cout << std::endl; + } + std::cout << std::endl; +} + +int main() { + try { + datacommons::DataCommons dc; + TestGetPropertyValues(dc); + TestGetObservations(dc); + TestResolve(dc); + TestQuery(dc); + } catch (const datacommons::DataCommonsException& e) { + std::cerr << "Data Commons Error: " << e.what() << std::endl; + return 1; + } catch (const std::runtime_error& e) { + std::cerr << "Runtime Error: " << e.what() << std::endl; + return 1; + } + return 0; +} diff --git a/cpp-client/include/DataCommons.h b/cpp-client/include/DataCommons.h new file mode 100644 index 000000000..17014514c --- /dev/null +++ b/cpp-client/include/DataCommons.h @@ -0,0 +1,81 @@ +#ifndef DATACOMMONS_H +#define DATACOMMONS_H + +#include +#include +#include +#include +#include +#include + +namespace datacommons { + +class DataCommonsException : public std::runtime_error { +public: + DataCommonsException(const std::string& message) : std::runtime_error(message) {} +}; + +struct Observation { + std::string date; + double value; + std::string provenance_id; +}; + +struct ResolvedId { + std::string dcid; + std::string dominant_type; +}; + +struct QueryResult { + std::vector header; + std::vector> rows; +}; + +struct ObservationVariable { + std::vector dcids; + std::string expression; +}; + +struct ObservationEntity { + std::vector dcids; + std::string expression; +}; + +using ObservationDate = std::variant>; + +struct ObservationFilter { + std::vector facet_ids; + std::vector domains; +}; + +class DataCommons { +public: + DataCommons(); + DataCommons(const std::string& api_key); + + // V2 Endpoints + nlohmann::json GetPropertyValues( + const std::vector& dcids, + const std::string& prop_direction, + const std::vector& properties); + nlohmann::json GetObservations( + const std::vector& select, + const ObservationVariable& variable, + const ObservationEntity& entity, + const ObservationDate& date, + const ObservationFilter& filter = {}); + std::map> Resolve( + const std::vector& nodes, + const std::string& property); + QueryResult Query(const std::string& query); + +private: + std::string api_key_; + std::string base_url_ = "https://api.datacommons.org"; + + std::string Post(const std::string& endpoint, const std::string& body); +}; + +} // namespace datacommons + +#endif // DATACOMMONS_H diff --git a/cpp-client/src/DataCommons.cpp b/cpp-client/src/DataCommons.cpp new file mode 100644 index 000000000..6dc492765 --- /dev/null +++ b/cpp-client/src/DataCommons.cpp @@ -0,0 +1,210 @@ +#include "DataCommons.h" +#include +#include +#include +#include +#include +#include + +namespace datacommons { + +DataCommons::DataCommons() { + const char* api_key_env = std::getenv("DC_API_KEY"); + if (api_key_env == nullptr || std::string(api_key_env).empty()) { + throw std::runtime_error("API key not found. Please set the DC_API_KEY environment variable."); + } + api_key_ = api_key_env; +} + +DataCommons::DataCommons(const std::string& api_key) : api_key_(api_key) {} + +nlohmann::json DataCommons::GetPropertyValues( + const std::vector& dcids, + const std::string& prop_direction, + const std::vector& properties) { + std::stringstream ss; + ss << prop_direction; + if (properties.size() > 1) { + ss << "["; + } + for (size_t i = 0; i < properties.size(); ++i) { + if (i != 0) { + ss << ","; + } + ss << properties[i]; + } + if (properties.size() > 1) { + ss << "]"; + } + + nlohmann::json body = { + {"nodes", dcids}, + {"property", ss.str()} + }; + + std::string response = Post("/v2/node", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + throw DataCommonsException("Failed to parse JSON response."); + } + + return json; +} + +nlohmann::json DataCommons::GetObservations( + const std::vector& select, + const ObservationVariable& variable, + const ObservationEntity& entity, + const ObservationDate& date, + const ObservationFilter& filter) { + nlohmann::json body; + body["select"] = select; + + nlohmann::json variable_json = nlohmann::json::object(); + if (!variable.dcids.empty()) { + variable_json["dcids"] = variable.dcids; + } + if (!variable.expression.empty()) { + variable_json["expression"] = variable.expression; + } + body["variable"] = variable_json; + + nlohmann::json entity_json = nlohmann::json::object(); + if (!entity.dcids.empty()) { + entity_json["dcids"] = entity.dcids; + } + if (!entity.expression.empty()) { + entity_json["expression"] = entity.expression; + } + body["entity"] = entity_json; + + std::visit([&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + if (!arg.empty()) { + body["date"] = arg; + } + } else if constexpr (std::is_same_v>) { + body["date"] = arg; + } + }, date); + + if (!filter.facet_ids.empty() || !filter.domains.empty()) { + nlohmann::json filter_json = nlohmann::json::object(); + if (!filter.facet_ids.empty()) { + filter_json["facet_ids"] = filter.facet_ids; + } + if (!filter.domains.empty()) { + filter_json["domains"] = filter.domains; + } + body["filter"] = filter_json; + } + + std::string response = Post("/v2/observation", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + throw DataCommonsException("Failed to parse JSON response."); + } + + return json; +} + +std::map> DataCommons::Resolve( + const std::vector& nodes, + const std::string& property) { + nlohmann::json body = { + {"nodes", nodes}, + {"property", property} + }; + + std::string response = Post("/v2/resolve", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + throw DataCommonsException("Failed to parse JSON response."); + } + + std::map> result; + if (json.contains("entities")) { + for (const auto& entity : json["entities"]) { + if (entity.contains("node") && entity.contains("candidates")) { + std::string node = entity["node"]; + for (const auto& candidate : entity["candidates"]) { + if (candidate.contains("dcid")) { + result[node].push_back({ + candidate["dcid"], + candidate.value("dominantType", "") + }); + } + } + } + } + } + + return result; +} + +QueryResult DataCommons::Query(const std::string& query) { + nlohmann::json body = { + {"query", query} + }; + + std::string response = Post("/v2/sparql", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + throw DataCommonsException("Failed to parse JSON response."); + } + + QueryResult result; + if (json.contains("header")) { + for (const auto& header : json["header"]) { + result.header.push_back(header); + } + } + if (json.contains("rows")) { + for (const auto& row : json["rows"]) { + std::map row_map; + for (size_t i = 0; i < result.header.size(); ++i) { + if (row.contains("cells") && i < row["cells"].size() && row["cells"][i].contains("value")) { + row_map[result.header[i]] = row["cells"][i]["value"]; + } + } + result.rows.push_back(row_map); + } + } + + return result; +} + +std::string DataCommons::Post(const std::string& endpoint, const std::string& body) { + cpr::Url url = cpr::Url{base_url_ + endpoint}; + + cpr::Session session; + session.SetUrl(url); + session.SetHeader({{"X-API-Key", api_key_}, {"Content-Type", "application/json"}}); + session.SetBody(body); + + cpr::Response r = session.Post(); + if (r.status_code == 200) { + return r.text; + } else { + throw DataCommonsException("Error: " + std::to_string(r.status_code) + " - " + r.error.message + "\n" + r.text); + } +} + +} // namespace datacommons