From 70b79ccc2b92c89c5e06eaed834806c09ee78217 Mon Sep 17 00:00:00 2001 From: Alyssa Guo Date: Wed, 13 Aug 2025 15:24:00 -0400 Subject: [PATCH 1/5] Started c++ client library --- cpp-client/.gitignore | 13 ++++ cpp-client/CMakeLists.txt | 29 ++++++++ cpp-client/README.md | 3 + cpp-client/examples/main.cpp | 35 ++++++++++ cpp-client/include/DataCommons.h | 41 +++++++++++ cpp-client/src/DataCommons.cpp | 114 +++++++++++++++++++++++++++++++ 6 files changed, 235 insertions(+) create mode 100644 cpp-client/.gitignore create mode 100644 cpp-client/CMakeLists.txt create mode 100644 cpp-client/README.md create mode 100644 cpp-client/examples/main.cpp create mode 100644 cpp-client/include/DataCommons.h create mode 100644 cpp-client/src/DataCommons.cpp diff --git a/cpp-client/.gitignore b/cpp-client/.gitignore new file mode 100644 index 000000000..52ba4718d --- /dev/null +++ b/cpp-client/.gitignore @@ -0,0 +1,13 @@ +# CMake +build/ +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake + +# Compiled files +*.o +*.so +*.a +*.dll +*.exe +*.out diff --git a/cpp-client/CMakeLists.txt b/cpp-client/CMakeLists.txt new file mode 100644 index 000000000..7ad9a9c74 --- /dev/null +++ b/cpp-client/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.11) +project(datacommons-cpp) + +set(CMAKE_CXX_STANDARD 17) + +include(FetchContent) + +FetchContent_Declare( + cpr + GIT_REPOSITORY https://github.com/libcpr/cpr.git + GIT_TAG 1.8.3 +) + +FetchContent_Declare( + nlohmann_json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG v3.10.5 +) + +FetchContent_MakeAvailable(cpr nlohmann_json) + +add_library(datacommons src/DataCommons.cpp) + +target_include_directories(datacommons PUBLIC include) + +target_link_libraries(datacommons PUBLIC cpr::cpr nlohmann_json::nlohmann_json) + +add_executable(example examples/main.cpp) +target_link_libraries(example PRIVATE datacommons) diff --git a/cpp-client/README.md b/cpp-client/README.md new file mode 100644 index 000000000..c2dc789a9 --- /dev/null +++ b/cpp-client/README.md @@ -0,0 +1,3 @@ +# Data Commons C++ Client Library + +A C++ client library for accessing the Data Commons API. diff --git a/cpp-client/examples/main.cpp b/cpp-client/examples/main.cpp new file mode 100644 index 000000000..a0ac82436 --- /dev/null +++ b/cpp-client/examples/main.cpp @@ -0,0 +1,35 @@ +#include "DataCommons.h" +#include + +int main() { + // NOTE: Please provide a valid API key. + datacommons::DataCommons dc("AIzaSyCTI4Xz-UW_G2Q2RfknhcfdAnTHq5X5XuI"); + + // GetPropertyValues example + // std::vector dcids = {"geoId/06", "geoId/08"}; + // std::string prop = "name"; + // auto result = dc.GetPropertyValues(dcids, prop); + // for (const auto& [dcid, values] : result) { + // std::cout << "DCID: " << dcid << std::endl; + // for (const auto& value : values) { + // std::cout << " Value: " << value.value << std::endl; + // } + // } + + // GetObservations example + std::vector variables = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; + std::vector entities = {"geoId/06", "geoId/08"}; + std::string date = "2020"; + auto obs_result = dc.GetObservations(variables, entities, date); + for (const auto& [variable, entity_map] : obs_result) { + std::cout << "Variable: " << variable << std::endl; + for (const auto& [entity, observations] : entity_map) { + std::cout << " Entity: " << entity << std::endl; + for (const auto& obs : observations) { + std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl; + } + } + } + + return 0; +} diff --git a/cpp-client/include/DataCommons.h b/cpp-client/include/DataCommons.h new file mode 100644 index 000000000..63b44c545 --- /dev/null +++ b/cpp-client/include/DataCommons.h @@ -0,0 +1,41 @@ +#ifndef DATACOMMONS_H +#define DATACOMMONS_H + +#include +#include +#include + +namespace datacommons { + +struct PropertyValue { + std::string dcid; + std::string value; +}; + +struct Observation { + std::string date; + double value; + std::string provenance_id; +}; + +class DataCommons { +public: + DataCommons(const std::string& api_key); + + // V2 Endpoints + std::map> GetPropertyValues(const std::vector& dcids, const std::string& prop); + std::map>> GetObservations( + const std::vector& variables, + const std::vector& entities, + const std::string& date); + +private: + std::string api_key_; + std::string base_url_ = "https://api.datacommons.org"; + + std::string Post(const std::string& endpoint, const std::string& body); +}; + +} // namespace datacommons + +#endif // DATACOMMONS_H diff --git a/cpp-client/src/DataCommons.cpp b/cpp-client/src/DataCommons.cpp new file mode 100644 index 000000000..2bd84f3fd --- /dev/null +++ b/cpp-client/src/DataCommons.cpp @@ -0,0 +1,114 @@ +#include "DataCommons.h" +#include +#include +#include + +namespace datacommons { + +DataCommons::DataCommons(const std::string& api_key) : api_key_(api_key) {} + +std::map> DataCommons::GetPropertyValues(const std::vector& dcids, const std::string& prop) { + nlohmann::json body = { + {"nodes", dcids}, + {"property", "->" + prop} + }; + + std::string response = Post("/v2/node", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + std::cerr << "Failed to parse JSON response." << std::endl; + return {}; + } + + std::map> result; + if (json.contains("data")) { + for (auto const& [dcid, data] : json["data"].items()) { + if (data.contains("arcs")) { + std::string arc_prop = "name"; + if (data["arcs"].contains(arc_prop)) { + for (const auto& node : data["arcs"][arc_prop]["nodes"]) { + if (node.contains("provenanceId") && node.contains("value")) { + result[dcid].push_back({node["provenanceId"], node["value"]}); + } + } + } + } + } + } + + return result; +} + +std::map>> DataCommons::GetObservations( + const std::vector& variables, + const std::vector& entities, + const std::string& date) { + nlohmann::json body = { + {"select", {"variable", "entity", "date", "value", "provenanceId"}}, + {"from", { + {"variable", {{"dcids", variables}}}, + {"entity", {{"dcids", entities}}}, + {"date", date} + }} + }; + + std::string response = Post("/v2/observation", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + std::cerr << "Failed to parse JSON response." << std::endl; + return {}; + } + + std::map>> result; + if (json.contains("byVariable")) { + for (const auto& var_data : json["byVariable"]) { + if (var_data.contains("variable") && var_data.contains("byEntity")) { + std::string variable = var_data["variable"]; + for (const auto& entity_data : var_data["byEntity"]) { + if (entity_data.contains("entity") && entity_data.contains("observations")) { + std::string entity = entity_data["entity"]; + for (const auto& obs : entity_data["observations"]) { + if (obs.contains("date") && obs.contains("value") && obs.contains("provenanceId")) { + result[variable][entity].push_back({ + obs["date"], + obs["value"], + obs["provenanceId"] + }); + } + } + } + } + } + } + } + + return result; +} + +std::string DataCommons::Post(const std::string& endpoint, const std::string& body) { + cpr::Url url = cpr::Url{base_url_ + endpoint}; + + cpr::Session session; + session.SetUrl(url); + session.SetHeader({{"X-API-Key", api_key_}, {"Content-Type", "application/json"}}); + session.SetBody(body); + + cpr::Response r = session.Post(); + if (r.status_code == 200) { + return r.text; + } else { + std::cerr << "Error: " << r.status_code << " - " << r.error.message << std::endl; + std::cerr << r.text << std::endl; + return ""; + } +} + +} // namespace datacommons From 67a4bd2fb7701fb0bbaebf1e90798ea6020d8ec5 Mon Sep 17 00:00:00 2001 From: Alyssa Guo Date: Wed, 13 Aug 2025 16:14:16 -0400 Subject: [PATCH 2/5] Finished adding 4 core endpoints --- .gitignore | 16 ++++ cpp-client/README.md | 24 ++++++ cpp-client/USAGE.md | 130 +++++++++++++++++++++++++++++++ cpp-client/examples/main.cpp | 40 ++++------ cpp-client/include/DataCommons.h | 16 ++++ cpp-client/src/DataCommons.cpp | 89 +++++++++++++++++++++ 6 files changed, 291 insertions(+), 24 deletions(-) create mode 100644 cpp-client/USAGE.md diff --git a/.gitignore b/.gitignore index 4994ef15e..22e112755 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,19 @@ http_memprof_out # custom dc sqlite database sqlite + +# C++ client + +# CMake +cpp-client/build/ +cpp-client/CMakeCache.txt +cpp-client/CMakeFiles/ +cpp-client/cmake_install.cmake + +# Compiled files +*.o +*.so +*.a +*.dll +*.exe +*.out diff --git a/cpp-client/README.md b/cpp-client/README.md index c2dc789a9..624b1fbd2 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -1,3 +1,27 @@ # Data Commons C++ Client Library A C++ client library for accessing the Data Commons API. + +## Authentication + +The recommended way to provide your API key is by setting the `DC_API_KEY` environment variable. The client will automatically detect and use it. + +```bash +export DC_API_KEY="YOUR_API_KEY" +``` + +Alternatively, you can pass the key directly to the constructor: + +```cpp +#include "DataCommons.h" + +int main() { + datacommons::DataCommons dc("YOUR_API_KEY"); + // ... + return 0; +} +``` + +## Usage + +See the `examples/main.cpp` file for a demonstration of how to use the library. diff --git a/cpp-client/USAGE.md b/cpp-client/USAGE.md new file mode 100644 index 000000000..d368c3b4e --- /dev/null +++ b/cpp-client/USAGE.md @@ -0,0 +1,130 @@ +# Data Commons C++ Client Usage Guide + +This guide provides a summary of the available endpoints in the Data Commons C++ client library and examples of how to use them. + +## Getting Started + +First, ensure you have set your Data Commons API key as an environment variable: + +```bash +export DC_API_KEY="YOUR_API_KEY" +``` + +Then, you can create a `DataCommons` client object in your C++ code: + +```cpp +#include "DataCommons.h" +#include +#include + +int main() { + try { + datacommons::DataCommons dc; + // Your code here... + } catch (const std::runtime_error& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + return 0; +} +``` + +## Core V2 API Endpoints + +The C++ client provides access to the four core V2 endpoints of the Data Commons REST API. + +### 1. GetPropertyValues + +Fetches the values of a specific property for one or more nodes (entities). + +**Use Case:** Find the name, population, or any other property of a place, like a state or city. + +**Example:** Get the names for California (`geoId/06`) and Colorado (`geoId/08`). + +```cpp +std::vector dcids = {"geoId/06", "geoId/08"}; +std::string prop = "name"; +auto result = dc.GetPropertyValues(dcids, prop); + +for (const auto& [dcid, values] : result) { + std::cout << "DCID: " << dcid << std::endl; + for (const auto& value : values) { + std::cout << " Value: " << value.value << std::endl; + } +} +``` + +### 2. GetObservations + +Fetches statistical observations for a set of variables and entities at a specific date. + +**Use Case:** Get the total, male, and female population counts for California in the year 2020. + +**Example:** + +```cpp +std::vector variables = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; +std::vector entities = {"geoId/06"}; +std::string date = "2020"; +auto result = dc.GetObservations(variables, entities, date); + +for (const auto& [variable, entity_map] : result) { + std::cout << "Variable: " << variable << std::endl; + for (const auto& [entity, observations] : entity_map) { + std::cout << " Entity: " << entity << std::endl; + for (const auto& obs : observations) { + std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl; + } + } +} +``` + +### 3. Resolve + +Resolves human-readable identifiers (like names or coordinates) to Data Commons IDs (DCIDs). + +**Use Case:** Find the unique DCID for a place when you only know its name. + +**Example:** Find the DCIDs for "California" and "Colorado". + +```cpp +std::vector nodes = {"California", "Colorado"}; +std::string from_property = "description"; +std::string to_property = "dcid"; +auto result = dc.Resolve(nodes, from_property, to_property); + +for (const auto& [node, candidates] : result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; + } +} +``` + +### 4. Query + +Executes a SPARQL query directly against the Data Commons knowledge graph for advanced use cases. + +**Use Case:** Retrieve a custom table of data, such as the names and DCIDs of the first 10 states found in the graph. + +**Example:** + +```cpp +std::string query = "SELECT ?name ?dcid WHERE { ?place typeOf State . ?place name ?name . ?place dcid ?dcid . } LIMIT 10"; +auto result = dc.Query(query); + +// Print header +for (const auto& header : result.header) { + std::cout << header << "\t"; +} +std::cout << std::endl; + +// Print rows +for (const auto& row : result.rows) { + for (const auto& header : result.header) { + std::cout << row.at(header) << "\t"; + } + std::cout << std::endl; +} +``` + diff --git a/cpp-client/examples/main.cpp b/cpp-client/examples/main.cpp index a0ac82436..76b98be1c 100644 --- a/cpp-client/examples/main.cpp +++ b/cpp-client/examples/main.cpp @@ -1,34 +1,26 @@ #include "DataCommons.h" #include +#include int main() { - // NOTE: Please provide a valid API key. - datacommons::DataCommons dc("AIzaSyCTI4Xz-UW_G2Q2RfknhcfdAnTHq5X5XuI"); + try { + // The DataCommons client will automatically look for the DC_API_KEY + // environment variable. + datacommons::DataCommons dc; - // GetPropertyValues example - // std::vector dcids = {"geoId/06", "geoId/08"}; - // std::string prop = "name"; - // auto result = dc.GetPropertyValues(dcids, prop); - // for (const auto& [dcid, values] : result) { - // std::cout << "DCID: " << dcid << std::endl; - // for (const auto& value : values) { - // std::cout << " Value: " << value.value << std::endl; - // } - // } - - // GetObservations example - std::vector variables = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; - std::vector entities = {"geoId/06", "geoId/08"}; - std::string date = "2020"; - auto obs_result = dc.GetObservations(variables, entities, date); - for (const auto& [variable, entity_map] : obs_result) { - std::cout << "Variable: " << variable << std::endl; - for (const auto& [entity, observations] : entity_map) { - std::cout << " Entity: " << entity << std::endl; - for (const auto& obs : observations) { - std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl; + // GetPropertyValues example + std::vector dcids = {"geoId/06", "geoId/08"}; + std::string prop = "name"; + auto result = dc.GetPropertyValues(dcids, prop); + for (const auto& [dcid, values] : result) { + std::cout << "DCID: " << dcid << std::endl; + for (const auto& value : values) { + std::cout << " Value: " << value.value << std::endl; } } + } catch (const std::runtime_error& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; } return 0; diff --git a/cpp-client/include/DataCommons.h b/cpp-client/include/DataCommons.h index 63b44c545..b9cdf5568 100644 --- a/cpp-client/include/DataCommons.h +++ b/cpp-client/include/DataCommons.h @@ -18,8 +18,19 @@ struct Observation { std::string provenance_id; }; +struct ResolvedId { + std::string dcid; + std::string dominant_type; +}; + +struct QueryResult { + std::vector header; + std::vector> rows; +}; + class DataCommons { public: + DataCommons(); DataCommons(const std::string& api_key); // V2 Endpoints @@ -28,6 +39,11 @@ class DataCommons { const std::vector& variables, const std::vector& entities, const std::string& date); + std::map> Resolve( + const std::vector& nodes, + const std::string& from_property, + const std::string& to_property); + QueryResult Query(const std::string& query); private: std::string api_key_; diff --git a/cpp-client/src/DataCommons.cpp b/cpp-client/src/DataCommons.cpp index 2bd84f3fd..b0e43d1eb 100644 --- a/cpp-client/src/DataCommons.cpp +++ b/cpp-client/src/DataCommons.cpp @@ -2,9 +2,20 @@ #include #include #include +#include +#include +#include namespace datacommons { +DataCommons::DataCommons() { + const char* api_key_env = std::getenv("DC_API_KEY"); + if (api_key_env == nullptr || std::string(api_key_env).empty()) { + throw std::runtime_error("API key not found. Please set the DC_API_KEY environment variable."); + } + api_key_ = api_key_env; +} + DataCommons::DataCommons(const std::string& api_key) : api_key_(api_key) {} std::map> DataCommons::GetPropertyValues(const std::vector& dcids, const std::string& prop) { @@ -93,6 +104,84 @@ std::map>> DataCommo return result; } +std::map> DataCommons::Resolve( + const std::vector& nodes, + const std::string& from_property, + const std::string& to_property) { + std::string expression = "<-" + from_property + "->" + to_property; + nlohmann::json body = { + {"nodes", nodes}, + {"property", expression} + }; + + std::string response = Post("/v2/resolve", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + std::cerr << "Failed to parse JSON response." << std::endl; + return {}; + } + + std::map> result; + if (json.contains("entities")) { + for (const auto& entity : json["entities"]) { + if (entity.contains("node") && entity.contains("candidates")) { + std::string node = entity["node"]; + for (const auto& candidate : entity["candidates"]) { + if (candidate.contains("dcid")) { + result[node].push_back({ + candidate["dcid"], + candidate.value("dominantType", "") + }); + } + } + } + } + } + + return result; +} + +QueryResult DataCommons::Query(const std::string& query) { + nlohmann::json body = { + {"query", query} + }; + + std::string response = Post("/v2/sparql", body.dump()); + if (response.empty()) { + return {}; + } + + auto json = nlohmann::json::parse(response, nullptr, false); + if (json.is_discarded()) { + std::cerr << "Failed to parse JSON response." << std::endl; + return {}; + } + + QueryResult result; + if (json.contains("header")) { + for (const auto& header : json["header"]) { + result.header.push_back(header); + } + } + if (json.contains("rows")) { + for (const auto& row : json["rows"]) { + std::map row_map; + for (size_t i = 0; i < result.header.size(); ++i) { + if (row.contains("cells") && i < row["cells"].size() && row["cells"][i].contains("value")) { + row_map[result.header[i]] = row["cells"][i]["value"]; + } + } + result.rows.push_back(row_map); + } + } + + return result; +} + std::string DataCommons::Post(const std::string& endpoint, const std::string& body) { cpr::Url url = cpr::Url{base_url_ + endpoint}; From 085405aadb26c4eebba28a94121a510b289e17e0 Mon Sep 17 00:00:00 2001 From: Alyssa Guo Date: Wed, 13 Aug 2025 16:41:06 -0400 Subject: [PATCH 3/5] added full features --- cpp-client/USAGE.md | 110 +++++++++++++++++++++++++++---- cpp-client/examples/main.cpp | 16 ++--- cpp-client/include/DataCommons.h | 33 ++++++---- cpp-client/src/DataCommons.cpp | 87 ++++++++++++++---------- 4 files changed, 179 insertions(+), 67 deletions(-) diff --git a/cpp-client/USAGE.md b/cpp-client/USAGE.md index d368c3b4e..0c20af795 100644 --- a/cpp-client/USAGE.md +++ b/cpp-client/USAGE.md @@ -35,37 +35,103 @@ The C++ client provides access to the four core V2 endpoints of the Data Commons ### 1. GetPropertyValues -Fetches the values of a specific property for one or more nodes (entities). +Fetches property values for one or more nodes. This method returns the raw JSON response from the API, giving you the flexibility to parse it as needed. -**Use Case:** Find the name, population, or any other property of a place, like a state or city. +**Use Case:** Find the name and type of a place, like a state or city. -**Example:** Get the names for California (`geoId/06`) and Colorado (`geoId/08`). +**Example:** Get the `name` and `typeOf` for California (`geoId/06`) and Colorado (`geoId/08`). ```cpp std::vector dcids = {"geoId/06", "geoId/08"}; -std::string prop = "name"; -auto result = dc.GetPropertyValues(dcids, prop); +std::vector properties = {"name", "typeOf"}; +auto result = dc.GetPropertyValues(dcids, "->", properties); -for (const auto& [dcid, values] : result) { - std::cout << "DCID: " << dcid << std::endl; - for (const auto& value : values) { - std::cout << " Value: " << value.value << std::endl; +std::cout << result.dump(2) << std::endl; +``` + +### 2. GetObservations + +Fetches statistical observations. This endpoint provides a flexible way to query for data by specifying variables, entities, and dates in various combinations. + +**Use Case:** Get the total, male, and female population counts for California and Colorado in the year 2020. + +**Example:** + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; + +datacommons::ObservationEntity entities; +entities.dcids = {"geoId/06", "geoId/08"}; + +datacommons::ObservationDate date = "2020"; + +auto result = dc.GetObservations(variables, entities, date); + +for (const auto& [variable, entity_map] : result) { + std::cout << "Variable: " << variable << std::endl; + for (const auto& [entity, observations] : entity_map) { + std::cout << " Entity: " << entity << std::endl; + for (const auto& obs : observations) { + std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl; + } + } +} +``` + +You can also use expressions to select entities, for example, to get the population of all counties in California: + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person"}; + +datacommons::ObservationEntity entities; +entities.expression = "<-containedInPlace{typeOf:County, dcid:geoId/06}"; + +datacommons::ObservationDate date = "LATEST"; + +auto result = dc.GetObservations(variables, entities, date); +// ... (process results as above) +``` + +### 3. Resolve + +Resolves human-readable identifiers (like names or coordinates) to Data Commons IDs (DCIDs). + +**Use Case:** Find the unique DCID for a place when you only know its name. + +**Example:** Find the DCIDs for "California" and "Colorado". + +```cpp +std::vector nodes = {"California", "Colorado"}; +std::string property = "<-description->dcid"; +auto result = dc.Resolve(nodes, property); + +for (const auto& [node, candidates] : result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; } } ``` ### 2. GetObservations -Fetches statistical observations for a set of variables and entities at a specific date. +Fetches statistical observations. This endpoint provides a flexible way to query for data by specifying variables, entities, and dates in various combinations. -**Use Case:** Get the total, male, and female population counts for California in the year 2020. +**Use Case:** Get the total, male, and female population counts for California and Colorado in the year 2020. **Example:** ```cpp -std::vector variables = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; -std::vector entities = {"geoId/06"}; -std::string date = "2020"; +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person", "Count_Person_Male", "Count_Person_Female"}; + +datacommons::ObservationEntity entities; +entities.dcids = {"geoId/06", "geoId/08"}; + +datacommons::ObservationDate date = "2020"; + auto result = dc.GetObservations(variables, entities, date); for (const auto& [variable, entity_map] : result) { @@ -79,6 +145,22 @@ for (const auto& [variable, entity_map] : result) { } ``` +You can also use expressions to select entities, for example, to get the population of all counties in California: + +```cpp +datacommons::ObservationVariable variables; +variables.dcids = {"Count_Person"}; + +datacommons::ObservationEntity entities; +entities.expression = "<-containedInPlace{typeOf:County, dcid:geoId/06}"; + +datacommons::ObservationDate date = "LATEST"; + +auto result = dc.GetObservations(variables, entities, date); +// ... (process results as above) +``` + + ### 3. Resolve Resolves human-readable identifiers (like names or coordinates) to Data Commons IDs (DCIDs). diff --git a/cpp-client/examples/main.cpp b/cpp-client/examples/main.cpp index 76b98be1c..9f35e3d97 100644 --- a/cpp-client/examples/main.cpp +++ b/cpp-client/examples/main.cpp @@ -8,14 +8,14 @@ int main() { // environment variable. datacommons::DataCommons dc; - // GetPropertyValues example - std::vector dcids = {"geoId/06", "geoId/08"}; - std::string prop = "name"; - auto result = dc.GetPropertyValues(dcids, prop); - for (const auto& [dcid, values] : result) { - std::cout << "DCID: " << dcid << std::endl; - for (const auto& value : values) { - std::cout << " Value: " << value.value << std::endl; + // Resolve example + std::vector nodes = {"California", "Colorado"}; + std::string property = "<-description->dcid"; + auto resolve_result = dc.Resolve(nodes, property); + for (const auto& [node, candidates] : resolve_result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; } } } catch (const std::runtime_error& e) { diff --git a/cpp-client/include/DataCommons.h b/cpp-client/include/DataCommons.h index b9cdf5568..956175afd 100644 --- a/cpp-client/include/DataCommons.h +++ b/cpp-client/include/DataCommons.h @@ -1,17 +1,14 @@ #ifndef DATACOMMONS_H #define DATACOMMONS_H +#include #include #include #include +#include namespace datacommons { -struct PropertyValue { - std::string dcid; - std::string value; -}; - struct Observation { std::string date; double value; @@ -28,21 +25,35 @@ struct QueryResult { std::vector> rows; }; +struct ObservationVariable { + std::vector dcids; + std::string expression; +}; + +struct ObservationEntity { + std::vector dcids; + std::string expression; +}; + +using ObservationDate = std::variant>; + class DataCommons { public: DataCommons(); DataCommons(const std::string& api_key); // V2 Endpoints - std::map> GetPropertyValues(const std::vector& dcids, const std::string& prop); + nlohmann::json GetPropertyValues( + const std::vector& dcids, + const std::string& prop_direction, + const std::vector& properties); std::map>> GetObservations( - const std::vector& variables, - const std::vector& entities, - const std::string& date); + const ObservationVariable& variable, + const ObservationEntity& entity, + const ObservationDate& date); std::map> Resolve( const std::vector& nodes, - const std::string& from_property, - const std::string& to_property); + const std::string& property); QueryResult Query(const std::string& query); private: diff --git a/cpp-client/src/DataCommons.cpp b/cpp-client/src/DataCommons.cpp index b0e43d1eb..ea9713e26 100644 --- a/cpp-client/src/DataCommons.cpp +++ b/cpp-client/src/DataCommons.cpp @@ -18,10 +18,28 @@ DataCommons::DataCommons() { DataCommons::DataCommons(const std::string& api_key) : api_key_(api_key) {} -std::map> DataCommons::GetPropertyValues(const std::vector& dcids, const std::string& prop) { +nlohmann::json DataCommons::GetPropertyValues( + const std::vector& dcids, + const std::string& prop_direction, + const std::vector& properties) { + std::stringstream ss; + ss << prop_direction; + if (properties.size() > 1) { + ss << "["; + } + for (size_t i = 0; i < properties.size(); ++i) { + if (i != 0) { + ss << ","; + } + ss << properties[i]; + } + if (properties.size() > 1) { + ss << "]"; + } + nlohmann::json body = { {"nodes", dcids}, - {"property", "->" + prop} + {"property", ss.str()} }; std::string response = Post("/v2/node", body.dump()); @@ -35,37 +53,40 @@ std::map> DataCommons::GetPropertyValues return {}; } - std::map> result; - if (json.contains("data")) { - for (auto const& [dcid, data] : json["data"].items()) { - if (data.contains("arcs")) { - std::string arc_prop = "name"; - if (data["arcs"].contains(arc_prop)) { - for (const auto& node : data["arcs"][arc_prop]["nodes"]) { - if (node.contains("provenanceId") && node.contains("value")) { - result[dcid].push_back({node["provenanceId"], node["value"]}); - } - } - } - } - } - } - - return result; + return json; } std::map>> DataCommons::GetObservations( - const std::vector& variables, - const std::vector& entities, - const std::string& date) { - nlohmann::json body = { - {"select", {"variable", "entity", "date", "value", "provenanceId"}}, - {"from", { - {"variable", {{"dcids", variables}}}, - {"entity", {{"dcids", entities}}}, - {"date", date} - }} - }; + const ObservationVariable& variable, + const ObservationEntity& entity, + const ObservationDate& date) { + nlohmann::json body; + body["select"] = {"variable", "entity", "date", "value", "provenanceId"}; + + nlohmann::json from; + if (!variable.dcids.empty()) { + from["variable"]["dcids"] = variable.dcids; + } + if (!variable.expression.empty()) { + from["variable"]["expression"] = variable.expression; + } + if (!entity.dcids.empty()) { + from["entity"]["dcids"] = entity.dcids; + } + if (!entity.expression.empty()) { + from["entity"]["expression"] = entity.expression; + } + + std::visit([&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + from["date"] = arg; + } else if constexpr (std::is_same_v>) { + from["date"] = arg; + } + }, date); + + body["from"] = from; std::string response = Post("/v2/observation", body.dump()); if (response.empty()) { @@ -106,12 +127,10 @@ std::map>> DataCommo std::map> DataCommons::Resolve( const std::vector& nodes, - const std::string& from_property, - const std::string& to_property) { - std::string expression = "<-" + from_property + "->" + to_property; + const std::string& property) { nlohmann::json body = { {"nodes", nodes}, - {"property", expression} + {"property", property} }; std::string response = Post("/v2/resolve", body.dump()); From 698611dfceed293850221b3f7b1d2bc9b1b13299 Mon Sep 17 00:00:00 2001 From: Alyssa Guo Date: Wed, 13 Aug 2025 17:34:09 -0400 Subject: [PATCH 4/5] fix observations endpoint --- cpp-client/README.md | 33 ++++++++++++- cpp-client/examples/main.cpp | 76 +++++++++++++++++++++++------- cpp-client/include/DataCommons.h | 17 ++++++- cpp-client/src/DataCommons.cpp | 80 ++++++++++++++------------------ 4 files changed, 141 insertions(+), 65 deletions(-) diff --git a/cpp-client/README.md b/cpp-client/README.md index 624b1fbd2..55020544d 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -22,6 +22,37 @@ int main() { } ``` +## Building and Running the Example + +### Prerequisites + +- C++17 compiler (g++ or Clang) +- CMake (3.11+) +- Git +- OpenSSL development libraries (`libssl-dev` on Debian/Ubuntu) + +### Steps + +1. **Clone the repository and navigate to the client directory.** +2. **Create a build directory:** + ```bash + mkdir build + cd build + ``` +3. **Configure and build the project:** + ```bash + cmake .. + make + ``` +4. **Set your API key:** + ```bash + export DC_API_KEY="YOUR_API_KEY" + ``` +5. **Run the example:** + ```bash + ./example + ``` + ## Usage -See the `examples/main.cpp` file for a demonstration of how to use the library. +See the `USAGE.md` file for a detailed guide to the library's functions. diff --git a/cpp-client/examples/main.cpp b/cpp-client/examples/main.cpp index 9f35e3d97..ce488afad 100644 --- a/cpp-client/examples/main.cpp +++ b/cpp-client/examples/main.cpp @@ -1,27 +1,71 @@ #include "DataCommons.h" #include -#include + +void TestGetPropertyValues(datacommons::DataCommons& dc) { + std::cout << "--- Testing GetPropertyValues ---" << std::endl; + std::vector dcids = {"geoId/06", "geoId/08"}; + std::vector properties = {"name", "typeOf"}; + auto result = dc.GetPropertyValues(dcids, "->", properties); + std::cout << result.dump(2) << std::endl; + std::cout << std::endl; +} + +void TestGetObservations(datacommons::DataCommons& dc) { + std::cout << "--- Testing GetObservations (Example 1 from Docs) ---" << std::endl; + std::vector select = {"variable", "entity"}; + datacommons::ObservationVariable variables; + datacommons::ObservationEntity entities; + entities.dcids = {"country/TGO"}; + datacommons::ObservationDate date = ""; // Empty to get all dates + auto result = dc.GetObservations(select, variables, entities, date); + std::cout << result.dump(2) << std::endl; + std::cout << std::endl; +} + +void TestResolve(datacommons::DataCommons& dc) { + std::cout << "--- Testing Resolve ---" << std::endl; + std::vector nodes = {"California", "Mountain View"}; + std::string property = "<-description->dcid"; + auto result = dc.Resolve(nodes, property); + for (const auto& [node, candidates] : result) { + std::cout << "Node: " << node << std::endl; + for (const auto& candidate : candidates) { + std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; + } + } + std::cout << std::endl; +} + +void TestQuery(datacommons::DataCommons& dc) { + std::cout << "--- Testing Query ---" << std::endl; + std::string query = "SELECT ?name ?dcid WHERE { ?place typeOf State . ?place name ?name . ?place dcid ?dcid . } LIMIT 5"; + auto result = dc.Query(query); + for (const auto& header : result.header) { + std::cout << header << "\t\t"; + } + std::cout << std::endl; + for (const auto& row : result.rows) { + for (const auto& header : result.header) { + std::cout << row.at(header) << "\t"; + } + std::cout << std::endl; + } + std::cout << std::endl; +} int main() { try { - // The DataCommons client will automatically look for the DC_API_KEY - // environment variable. datacommons::DataCommons dc; - - // Resolve example - std::vector nodes = {"California", "Colorado"}; - std::string property = "<-description->dcid"; - auto resolve_result = dc.Resolve(nodes, property); - for (const auto& [node, candidates] : resolve_result) { - std::cout << "Node: " << node << std::endl; - for (const auto& candidate : candidates) { - std::cout << " DCID: " << candidate.dcid << ", Type: " << candidate.dominant_type << std::endl; - } - } + TestGetPropertyValues(dc); + TestGetObservations(dc); + TestResolve(dc); + TestQuery(dc); + } catch (const datacommons::DataCommonsException& e) { + std::cerr << "Data Commons Error: " << e.what() << std::endl; + return 1; } catch (const std::runtime_error& e) { - std::cerr << "Error: " << e.what() << std::endl; + std::cerr << "Runtime Error: " << e.what() << std::endl; return 1; } - return 0; } diff --git a/cpp-client/include/DataCommons.h b/cpp-client/include/DataCommons.h index 956175afd..17014514c 100644 --- a/cpp-client/include/DataCommons.h +++ b/cpp-client/include/DataCommons.h @@ -6,9 +6,15 @@ #include #include #include +#include namespace datacommons { +class DataCommonsException : public std::runtime_error { +public: + DataCommonsException(const std::string& message) : std::runtime_error(message) {} +}; + struct Observation { std::string date; double value; @@ -37,6 +43,11 @@ struct ObservationEntity { using ObservationDate = std::variant>; +struct ObservationFilter { + std::vector facet_ids; + std::vector domains; +}; + class DataCommons { public: DataCommons(); @@ -47,10 +58,12 @@ class DataCommons { const std::vector& dcids, const std::string& prop_direction, const std::vector& properties); - std::map>> GetObservations( + nlohmann::json GetObservations( + const std::vector& select, const ObservationVariable& variable, const ObservationEntity& entity, - const ObservationDate& date); + const ObservationDate& date, + const ObservationFilter& filter = {}); std::map> Resolve( const std::vector& nodes, const std::string& property); diff --git a/cpp-client/src/DataCommons.cpp b/cpp-client/src/DataCommons.cpp index ea9713e26..6dc492765 100644 --- a/cpp-client/src/DataCommons.cpp +++ b/cpp-client/src/DataCommons.cpp @@ -49,44 +49,60 @@ nlohmann::json DataCommons::GetPropertyValues( auto json = nlohmann::json::parse(response, nullptr, false); if (json.is_discarded()) { - std::cerr << "Failed to parse JSON response." << std::endl; - return {}; + throw DataCommonsException("Failed to parse JSON response."); } return json; } -std::map>> DataCommons::GetObservations( +nlohmann::json DataCommons::GetObservations( + const std::vector& select, const ObservationVariable& variable, const ObservationEntity& entity, - const ObservationDate& date) { + const ObservationDate& date, + const ObservationFilter& filter) { nlohmann::json body; - body["select"] = {"variable", "entity", "date", "value", "provenanceId"}; + body["select"] = select; - nlohmann::json from; + nlohmann::json variable_json = nlohmann::json::object(); if (!variable.dcids.empty()) { - from["variable"]["dcids"] = variable.dcids; + variable_json["dcids"] = variable.dcids; } if (!variable.expression.empty()) { - from["variable"]["expression"] = variable.expression; + variable_json["expression"] = variable.expression; } + body["variable"] = variable_json; + + nlohmann::json entity_json = nlohmann::json::object(); if (!entity.dcids.empty()) { - from["entity"]["dcids"] = entity.dcids; + entity_json["dcids"] = entity.dcids; } if (!entity.expression.empty()) { - from["entity"]["expression"] = entity.expression; + entity_json["expression"] = entity.expression; } + body["entity"] = entity_json; std::visit([&](auto&& arg) { using T = std::decay_t; if constexpr (std::is_same_v) { - from["date"] = arg; + if (!arg.empty()) { + body["date"] = arg; + } } else if constexpr (std::is_same_v>) { - from["date"] = arg; + body["date"] = arg; } }, date); - body["from"] = from; + if (!filter.facet_ids.empty() || !filter.domains.empty()) { + nlohmann::json filter_json = nlohmann::json::object(); + if (!filter.facet_ids.empty()) { + filter_json["facet_ids"] = filter.facet_ids; + } + if (!filter.domains.empty()) { + filter_json["domains"] = filter.domains; + } + body["filter"] = filter_json; + } std::string response = Post("/v2/observation", body.dump()); if (response.empty()) { @@ -95,34 +111,10 @@ std::map>> DataCommo auto json = nlohmann::json::parse(response, nullptr, false); if (json.is_discarded()) { - std::cerr << "Failed to parse JSON response." << std::endl; - return {}; + throw DataCommonsException("Failed to parse JSON response."); } - std::map>> result; - if (json.contains("byVariable")) { - for (const auto& var_data : json["byVariable"]) { - if (var_data.contains("variable") && var_data.contains("byEntity")) { - std::string variable = var_data["variable"]; - for (const auto& entity_data : var_data["byEntity"]) { - if (entity_data.contains("entity") && entity_data.contains("observations")) { - std::string entity = entity_data["entity"]; - for (const auto& obs : entity_data["observations"]) { - if (obs.contains("date") && obs.contains("value") && obs.contains("provenanceId")) { - result[variable][entity].push_back({ - obs["date"], - obs["value"], - obs["provenanceId"] - }); - } - } - } - } - } - } - } - - return result; + return json; } std::map> DataCommons::Resolve( @@ -140,8 +132,7 @@ std::map> DataCommons::Resolve( auto json = nlohmann::json::parse(response, nullptr, false); if (json.is_discarded()) { - std::cerr << "Failed to parse JSON response." << std::endl; - return {}; + throw DataCommonsException("Failed to parse JSON response."); } std::map> result; @@ -176,8 +167,7 @@ QueryResult DataCommons::Query(const std::string& query) { auto json = nlohmann::json::parse(response, nullptr, false); if (json.is_discarded()) { - std::cerr << "Failed to parse JSON response." << std::endl; - return {}; + throw DataCommonsException("Failed to parse JSON response."); } QueryResult result; @@ -213,9 +203,7 @@ std::string DataCommons::Post(const std::string& endpoint, const std::string& bo if (r.status_code == 200) { return r.text; } else { - std::cerr << "Error: " << r.status_code << " - " << r.error.message << std::endl; - std::cerr << r.text << std::endl; - return ""; + throw DataCommonsException("Error: " + std::to_string(r.status_code) + " - " + r.error.message + "\n" + r.text); } } From f7b473de4a0f871923627d210c8516f422835e09 Mon Sep 17 00:00:00 2001 From: Alyssa Guo Date: Wed, 13 Aug 2025 17:34:51 -0400 Subject: [PATCH 5/5] update example --- cpp-client/examples/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp-client/examples/main.cpp b/cpp-client/examples/main.cpp index ce488afad..c8ab1dc1b 100644 --- a/cpp-client/examples/main.cpp +++ b/cpp-client/examples/main.cpp @@ -11,7 +11,7 @@ void TestGetPropertyValues(datacommons::DataCommons& dc) { } void TestGetObservations(datacommons::DataCommons& dc) { - std::cout << "--- Testing GetObservations (Example 1 from Docs) ---" << std::endl; + std::cout << "--- Testing GetObservations ---" << std::endl; std::vector select = {"variable", "entity"}; datacommons::ObservationVariable variables; datacommons::ObservationEntity entities;