Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cpp-client/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# CMake
build/
CMakeCache.txt
CMakeFiles/
cmake_install.cmake

# Compiled files
*.o
*.so
*.a
*.dll
*.exe
*.out
29 changes: 29 additions & 0 deletions cpp-client/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
cmake_minimum_required(VERSION 3.11)
project(datacommons-cpp)

set(CMAKE_CXX_STANDARD 17)

include(FetchContent)

FetchContent_Declare(
cpr
GIT_REPOSITORY https://github.com/libcpr/cpr.git
GIT_TAG 1.8.3
)

FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.10.5
)

FetchContent_MakeAvailable(cpr nlohmann_json)

add_library(datacommons src/DataCommons.cpp)

target_include_directories(datacommons PUBLIC include)

target_link_libraries(datacommons PUBLIC cpr::cpr nlohmann_json::nlohmann_json)

add_executable(example examples/main.cpp)
target_link_libraries(example PRIVATE datacommons)
3 changes: 3 additions & 0 deletions cpp-client/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Data Commons C++ Client Library

A C++ client library for accessing the Data Commons API.
35 changes: 35 additions & 0 deletions cpp-client/examples/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "DataCommons.h"
#include <iostream>

int main() {
// NOTE: Please provide a valid API key.
datacommons::DataCommons dc("AIzaSyCTI4Xz-UW_G2Q2RfknhcfdAnTHq5X5XuI");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

A hardcoded API key is present in the example. This is a major security vulnerability. API keys and other secrets should never be committed to source code. It's recommended to load the key from an environment variable or a configuration file at runtime.

To use std::getenv, you'll also need to add #include <cstdlib> at the top of the file.

Suggested change
// NOTE: Please provide a valid API key.
datacommons::DataCommons dc("AIzaSyCTI4Xz-UW_G2Q2RfknhcfdAnTHq5X5XuI");
// NOTE: Please provide a valid API key via the DATACOMMONS_API_KEY environment variable.
const char* api_key = std::getenv("DATACOMMONS_API_KEY");
if (!api_key) {
std::cerr << "Error: DATACOMMONS_API_KEY environment variable not set." << std::endl;
return 1;
}
datacommons::DataCommons dc(api_key);


// GetPropertyValues example
// std::vector<std::string> dcids = {"geoId/06", "geoId/08"};
// std::string prop = "name";
// auto result = dc.GetPropertyValues(dcids, prop);
// for (const auto& [dcid, values] : result) {
// std::cout << "DCID: " << dcid << std::endl;
// for (const auto& value : values) {
// std::cout << " Value: " << value.value << std::endl;
// }
// }

// GetObservations example
std::vector<std::string> variables = {"Count_Person", "Count_Person_Male", "Count_Person_Female"};
std::vector<std::string> entities = {"geoId/06", "geoId/08"};
std::string date = "2020";
auto obs_result = dc.GetObservations(variables, entities, date);
for (const auto& [variable, entity_map] : obs_result) {
std::cout << "Variable: " << variable << std::endl;
for (const auto& [entity, observations] : entity_map) {
std::cout << " Entity: " << entity << std::endl;
for (const auto& obs : observations) {
std::cout << " Date: " << obs.date << ", Value: " << obs.value << std::endl;
}
}
}

return 0;
}
41 changes: 41 additions & 0 deletions cpp-client/include/DataCommons.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef DATACOMMONS_H
#define DATACOMMONS_H

#include <string>
#include <vector>
#include <map>

namespace datacommons {

struct PropertyValue {
std::string dcid;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The field dcid in the PropertyValue struct is misleading. The implementation populates this field with the provenanceId from the API response, not a dcid. Renaming it to provenance_id would make the code clearer and less prone to misuse.

Suggested change
std::string dcid;
std::string provenance_id;

std::string value;
};

struct Observation {
std::string date;
double value;
std::string provenance_id;
};

class DataCommons {
public:
DataCommons(const std::string& api_key);

// V2 Endpoints
std::map<std::string, std::vector<PropertyValue>> GetPropertyValues(const std::vector<std::string>& dcids, const std::string& prop);
std::map<std::string, std::map<std::string, std::vector<Observation>>> GetObservations(
const std::vector<std::string>& variables,
const std::vector<std::string>& entities,
const std::string& date);

private:
std::string api_key_;
std::string base_url_ = "https://api.datacommons.org";

std::string Post(const std::string& endpoint, const std::string& body);
};

} // namespace datacommons

#endif // DATACOMMONS_H
114 changes: 114 additions & 0 deletions cpp-client/src/DataCommons.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#include "DataCommons.h"
#include <cpr/cpr.h>
#include <nlohmann/json.hpp>
#include <iostream>

namespace datacommons {

DataCommons::DataCommons(const std::string& api_key) : api_key_(api_key) {}

std::map<std::string, std::vector<PropertyValue>> DataCommons::GetPropertyValues(const std::vector<std::string>& dcids, const std::string& prop) {
nlohmann::json body = {
{"nodes", dcids},
{"property", "->" + prop}
};

std::string response = Post("/v2/node", body.dump());
if (response.empty()) {
return {};
}

auto json = nlohmann::json::parse(response, nullptr, false);
if (json.is_discarded()) {
std::cerr << "Failed to parse JSON response." << std::endl;
return {};
}

std::map<std::string, std::vector<PropertyValue>> result;
if (json.contains("data")) {
for (auto const& [dcid, data] : json["data"].items()) {
if (data.contains("arcs")) {
std::string arc_prop = "name";
if (data["arcs"].contains(arc_prop)) {
for (const auto& node : data["arcs"][arc_prop]["nodes"]) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The property to look for in the response JSON is hardcoded to "name". This is incorrect and should use the prop argument passed to the function. This bug prevents retrieving values for any property other than name.

                if (data["arcs"].contains(prop)) {
                    for (const auto& node : data["arcs"][prop]["nodes"]) {

if (node.contains("provenanceId") && node.contains("value")) {
result[dcid].push_back({node["provenanceId"], node["value"]});
}
}
}
}
}
}

return result;
}

std::map<std::string, std::map<std::string, std::vector<Observation>>> DataCommons::GetObservations(
const std::vector<std::string>& variables,
const std::vector<std::string>& entities,
const std::string& date) {
nlohmann::json body = {
{"select", {"variable", "entity", "date", "value", "provenanceId"}},
{"from", {
{"variable", {{"dcids", variables}}},
{"entity", {{"dcids", entities}}},
{"date", date}
}}
};

std::string response = Post("/v2/observation", body.dump());
if (response.empty()) {
return {};
}

auto json = nlohmann::json::parse(response, nullptr, false);
if (json.is_discarded()) {
std::cerr << "Failed to parse JSON response." << std::endl;
return {};
}

std::map<std::string, std::map<std::string, std::vector<Observation>>> result;
if (json.contains("byVariable")) {
for (const auto& var_data : json["byVariable"]) {
if (var_data.contains("variable") && var_data.contains("byEntity")) {
std::string variable = var_data["variable"];
for (const auto& entity_data : var_data["byEntity"]) {
if (entity_data.contains("entity") && entity_data.contains("observations")) {
std::string entity = entity_data["entity"];
for (const auto& obs : entity_data["observations"]) {
if (obs.contains("date") && obs.contains("value") && obs.contains("provenanceId")) {
result[variable][entity].push_back({
obs["date"],
obs["value"],
obs["provenanceId"]
});
}
}
}
}
}
}
}

return result;
}

std::string DataCommons::Post(const std::string& endpoint, const std::string& body) {
cpr::Url url = cpr::Url{base_url_ + endpoint};

cpr::Session session;
session.SetUrl(url);
session.SetHeader({{"X-API-Key", api_key_}, {"Content-Type", "application/json"}});
session.SetBody(body);

cpr::Response r = session.Post();
if (r.status_code == 200) {
return r.text;
} else {
std::cerr << "Error: " << r.status_code << " - " << r.error.message << std::endl;
std::cerr << r.text << std::endl;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Writing errors directly to std::cerr is not ideal for a library, as it removes control from the client application on how to handle or log errors. A better approach would be to propagate errors to the caller, for example by throwing an exception or returning an object that encapsulates the result or error. This would allow the user of your library to implement their own error handling logic (e.g., logging to a file, showing a UI message).

return "";
}
}

} // namespace datacommons