bes Updated for version 3.20.10
NgapApi.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of ngap_module, A C++ module that can be loaded in to
4// the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5
6// Copyright (c) 2020 OPeNDAP, Inc.
7// Author: Nathan Potter <ndp@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27#include <cstdio>
28#include <cstring>
29#include <iostream>
30#include <sstream>
31#include <memory>
32#include <time.h>
33#include <curl/curl.h>
34
35#include <libdap/util.h>
36#include <libdap/debug.h>
37
38#include "rapidjson/document.h"
39#include "rapidjson/writer.h"
40#include "rapidjson/prettywriter.h"
41#include "rapidjson/stringbuffer.h"
42#include "rapidjson/filereadstream.h"
43
44#include "BESError.h"
45#include "BESNotFoundError.h"
46#include "BESSyntaxUserError.h"
47#include "BESDebug.h"
48#include "BESUtil.h"
49#include "BESStopWatch.h"
50#include "BESLog.h"
51#include "TheBESKeys.h"
52#include "CurlUtils.h"
53#include "url_impl.h"
54#include "RemoteResource.h"
55
56#include "NgapApi.h"
57#include "NgapNames.h"
58#include "NgapError.h"
59
60using namespace std;
61
62#define prolog string("NgapApi::").append(__func__).append("() - ")
63
64namespace ngap {
65
66const unsigned int REFRESH_THRESHOLD = 3600; // An hour
67
68
69NgapApi::NgapApi() : d_cmr_hostname(DEFAULT_CMR_ENDPOINT_URL), d_cmr_search_endpoint_path(DEFAULT_CMR_SEARCH_ENDPOINT_PATH) {
70 bool found;
71 string cmr_hostname;
72 TheBESKeys::TheKeys()->get_value(NGAP_CMR_HOSTNAME_KEY, cmr_hostname, found);
73 if (found) {
74 d_cmr_hostname = cmr_hostname;
75 }
76
77 string cmr_search_endpoint_path;
78 TheBESKeys::TheKeys()->get_value(NGAP_CMR_SEARCH_ENDPOINT_PATH_KEY, cmr_search_endpoint_path, found);
79 if (found) {
80 d_cmr_search_endpoint_path = cmr_search_endpoint_path;
81 }
82
83
84}
85
86std::string NgapApi::get_cmr_search_endpoint_url(){
87 return BESUtil::assemblePath(d_cmr_hostname , d_cmr_search_endpoint_path);
88}
89
90
91
99std::string NgapApi::build_cmr_query_url_old_rpath_format(const std::string &restified_path) {
100
101 // Make sure it starts with a '/' (see key strings above)
102 string r_path = ( restified_path[0] != '/' ? "/" : "") + restified_path;
103
104 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
105 if(provider_index == string::npos){
106 stringstream msg;
107 msg << prolog << "The specified path '" << r_path << "'";
108 msg << " does not contain the required path element '" << NGAP_PROVIDERS_KEY << "'";
109 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
110 }
111 if(provider_index != 0){
112 stringstream msg;
113 msg << prolog << "The specified path '" << r_path << "'";
114 msg << " has the path element '" << NGAP_PROVIDERS_KEY << "' located in the incorrect position (";
115 msg << provider_index << ") expected 0.";
116 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
117 }
118 provider_index += string(NGAP_PROVIDERS_KEY).length();
119
120 bool use_collection_concept_id = false;
121 size_t collection_index = r_path.find(NGAP_COLLECTIONS_KEY);
122 if(collection_index == string::npos) {
123 size_t concepts_index = r_path.find(NGAP_CONCEPTS_KEY);
124 if (concepts_index == string::npos) {
125 stringstream msg;
126 msg << prolog << "The specified path '" << r_path << "'";
127 msg << " contains neither the '" << NGAP_COLLECTIONS_KEY << "'";
128 msg << " nor the '" << NGAP_CONCEPTS_KEY << "'";
129 msg << " key, one must be provided.";
130 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
131 }
132 collection_index = concepts_index;
133 use_collection_concept_id = true;
134 }
135 if(collection_index <= provider_index+1){ // The value of provider has to be at least 1 character
136 stringstream msg;
137 msg << prolog << "The specified path '" << r_path << "'";
138 msg << " has the path element '" << (use_collection_concept_id?NGAP_CONCEPTS_KEY:NGAP_COLLECTIONS_KEY) << "' located in the incorrect position (";
139 msg << collection_index << ") expected at least " << provider_index+1;
140 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
141 }
142 string provider = r_path.substr(provider_index,collection_index - provider_index);
143 collection_index += use_collection_concept_id?string(NGAP_CONCEPTS_KEY).length():string(NGAP_COLLECTIONS_KEY).length();
144
145
146 size_t granule_index = r_path.find(NGAP_GRANULES_KEY);
147 if(granule_index == string::npos){
148 stringstream msg;
149 msg << prolog << "The specified path '" << r_path << "'";
150 msg << " does not contain the required path element '" << NGAP_GRANULES_KEY << "'";
151 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
152 }
153 if(granule_index <= collection_index+1){ // The value of collection must have at least one character.
154 stringstream msg;
155 msg << prolog << "The specified path '" << r_path << "'";
156 msg << " has the path element '" << NGAP_GRANULES_KEY << "' located in the incorrect position (";
157 msg << granule_index << ") expected at least " << collection_index+1;
158 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
159 }
160 string collection = r_path.substr(collection_index,granule_index - collection_index);
161 granule_index += string(NGAP_GRANULES_KEY).length();
162
163 // The granule value is the path terminus so it's every thing after the key
164 string granule = r_path.substr(granule_index);
165
166 // Build the CMR query URL for the dataset
167 string cmr_url = get_cmr_search_endpoint_url() + "?";
168 {
169 // This easy handle is only created so we can use the curl_easy_escape() on the token values
170 CURL *ceh = curl_easy_init();
171 char *esc_url_content;
172
173 // Add provider
174 esc_url_content = curl_easy_escape(ceh, provider.c_str(), provider.size());
175 cmr_url += string(CMR_PROVIDER).append("=").append(esc_url_content).append("&");
176 curl_free(esc_url_content);
177
178 esc_url_content = curl_easy_escape(ceh, collection.c_str(), collection.size());
179 if(use_collection_concept_id){
180 // Add collection_concept_id
181 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append("=").append(esc_url_content).append("&");
182 }
183 else {
184 // Add entry_title
185 cmr_url += string(CMR_ENTRY_TITLE).append("=").append(esc_url_content).append("&");
186
187 }
188 curl_free(esc_url_content);
189
190 esc_url_content = curl_easy_escape(ceh, granule.c_str(), granule.size());
191 cmr_url += string(CMR_GRANULE_UR).append("=").append(esc_url_content);
192 curl_free(esc_url_content);
193
194 curl_easy_cleanup(ceh);
195 }
196 return cmr_url;
197}
198
215std::string NgapApi::build_cmr_query_url(const std::string &restified_path) {
216
217 // Make sure it starts with a '/' (see key strings above)
218 string r_path = ( restified_path[0] != '/' ? "/" : "") + restified_path;
219
220 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
221 if(provider_index != string::npos){
222 return build_cmr_query_url_old_rpath_format(restified_path);
223 }
224
225 size_t collections_key_index = r_path.find(NGAP_COLLECTIONS_KEY);
226 if(collections_key_index == string::npos) {
227 stringstream msg;
228 msg << prolog << "The specified path '" << r_path << "'";
229 msg << " contains neither the '" << NGAP_COLLECTIONS_KEY << "'";
230 msg << " nor the '" << NGAP_CONCEPTS_KEY << "'";
231 msg << " one must be provided.";
232 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
233 }
234 if(collections_key_index != 0){ // The COLLECTIONS_KEY comes first
235 stringstream msg;
236 msg << prolog << "The specified path '" << r_path << "'";
237 msg << " has the path element '" << NGAP_COLLECTIONS_KEY << "' located in the incorrect position (";
238 msg << collections_key_index << ") expected at least " << provider_index + 1;
239 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
240 }
241 // This is now the beginning of the collection_concept_id value.
242 size_t collections_index = collections_key_index + string(NGAP_COLLECTIONS_KEY).length();
243
244 size_t granules_key_index = r_path.find(NGAP_GRANULES_KEY);
245 if(granules_key_index == string::npos){
246 stringstream msg;
247 msg << prolog << "The specified path '" << r_path << "'";
248 msg << " does not contain the required path element '" << NGAP_GRANULES_KEY << "'";
249 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
250 }
251
252 // The collection key must precede the granules key in the path,
253 // and the collection name must have at least one character.
254 if(granules_key_index <= collections_index + 1){
255 stringstream msg;
256 msg << prolog << "The specified path '" << r_path << "'";
257 msg << " has the path element '" << NGAP_GRANULES_KEY << "' located in the incorrect position (";
258 msg << granules_key_index << ") expected at least " << collections_index + 1;
259 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
260 }
261 size_t granules_index = granules_key_index + string(NGAP_GRANULES_KEY).length();
262 // The granule_name value is the path terminus so it's every thing after the key
263 string granule_name = r_path.substr(granules_index);
264
265 // Now we need to work on the collections value to eliminate the optional parts.
266 // This is the entire collections string including any optional components.
267 string collection_name = r_path.substr(collections_index, granules_key_index - collections_index);
268
269 // Since there may be optional parameters we need to strip them off to get the collection_concept_id
270 // And, since we know that collection_concept_id will never contain a '/', and we know that the optional
271 // part is separated from the collection_concept_id by a '/' we look for that and of we find it we truncate
272 // the value at that spot.
273 string optional_part;
274 size_t slash_pos = collection_name.find('/');
275 if(slash_pos != string::npos){
276 optional_part = collection_name.substr(slash_pos);
277 BESDEBUG(MODULE, prolog << "Found optional collections name component: " << optional_part << endl);
278 collection_name = collection_name.substr(0,slash_pos);
279 }
280 BESDEBUG(MODULE, prolog << "Found collection_name (aka collection_concept_id): " << collection_name << endl);
281
282 // Build the CMR query URL for the dataset
283 string cmr_url = get_cmr_search_endpoint_url() + "?";
284 {
285 // This easy handle is only created so we can use the curl_easy_escape() on the token values
286 CURL *ceh = curl_easy_init();
287 char *esc_url_content;
288
289 esc_url_content = curl_easy_escape(ceh, collection_name.c_str(), collection_name.size());
290 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append("=").append(esc_url_content).append("&");
291 curl_free(esc_url_content);
292
293 esc_url_content = curl_easy_escape(ceh, granule_name.c_str(), granule_name.size());
294 cmr_url += string(CMR_GRANULE_UR).append("=").append(esc_url_content);
295 curl_free(esc_url_content);
296
297 curl_easy_cleanup(ceh);
298 }
299 return cmr_url;
300}
301
312std::string NgapApi::find_get_data_url_in_granules_umm_json_v1_4(const std::string &restified_path, rapidjson::Document &cmr_granule_response)
313{
314
315 string data_access_url;
316
317 rapidjson::Value &val = cmr_granule_response["hits"];
318 int hits = val.GetInt();
319 if (hits < 1) {
320 throw BESNotFoundError(string("The specified path '").append(restified_path).append(
321 "' does not identify a granule in CMR."), __FILE__, __LINE__);
322 }
323
324 rapidjson::Value &items = cmr_granule_response["items"];
325 if (items.IsArray()) {
326 stringstream ss;
327 if(BESDebug::IsSet(MODULE)){
328 const string RJ_TYPE_NAMES[] = {string("kNullType"),string("kFalseType"),string("kTrueType"),
329 string("kObjectType"),string("kArrayType"),string("kStringType"),string("kNumberType")};
330 for (rapidjson::SizeType i = 0; i < items.Size(); i++) // Uses SizeType instead of size_t
331 ss << "items[" << i << "]: " << RJ_TYPE_NAMES[items[i].GetType()] << endl;
332 BESDEBUG(MODULE, prolog << "items size: " << items.Size() << endl << ss.str() << endl);
333 }
334
335 rapidjson::Value &items_obj = items[0];
336 // rapidjson::GenericMemberIterator<false, rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator>> mitr = items_obj.FindMember("umm");
337 auto mitr = items_obj.FindMember("umm");
338
339 rapidjson::Value &umm = mitr->value;
340 mitr = umm.FindMember("RelatedUrls");
341 if (mitr == umm.MemberEnd()) {
342 throw BESInternalError("Error! The umm/RelatedUrls object was not located!", __FILE__, __LINE__);
343 }
344 rapidjson::Value &related_urls = mitr->value;
345
346 if (!related_urls.IsArray()) {
347 throw BESNotFoundError("Error! The RelatedUrls object in the CMR response is not an array!", __FILE__,
348 __LINE__);
349 }
350
351 BESDEBUG(MODULE, prolog << " Found RelatedUrls array in CMR response." << endl);
352
353 bool noSubtype;
354 for (rapidjson::SizeType i = 0; i < related_urls.Size() && data_access_url.empty(); i++) {
355 rapidjson::Value &obj = related_urls[i];
356 mitr = obj.FindMember("URL");
357 if (mitr == obj.MemberEnd()) {
358 stringstream err;
359 err << "Error! The umm/RelatedUrls[" << i << "] does not contain the URL object";
360 throw BESInternalError(err.str(), __FILE__, __LINE__);
361 }
362 rapidjson::Value &r_url = mitr->value;
363
364 mitr = obj.FindMember("Type");
365 if (mitr == obj.MemberEnd()) {
366 stringstream err;
367 err << "Error! The umm/RelatedUrls[" << i << "] does not contain the Type object";
368 throw BESInternalError(err.str(), __FILE__, __LINE__);
369 }
370 rapidjson::Value &r_type = mitr->value;
371
372 noSubtype = obj.FindMember("Subtype") == obj.MemberEnd();
373
374 BESDEBUG(MODULE, prolog << "RelatedUrl Object:" <<
375 " URL: '" << r_url.GetString() << "'" <<
376 " Type: '" << r_type.GetString() << "'" <<
377 " SubType: '" << (noSubtype ? "Absent" : "Present") << "'" << endl);
378
379 if ((r_type.GetString() == string(CMR_URL_TYPE_GET_DATA)) && noSubtype) {
380
381 // Because a member of RelatedUrls may contain a URL of Type GET DATA with the s3:// protocol
382 // as well as a Type GET DATA URL which uses https:// or http://
383 string candidate_url = r_url.GetString();
384 if(candidate_url.substr(0,8) == "https://" || candidate_url.substr(0,7) == "http://"){
385 data_access_url = candidate_url;
386 }
387 }
388 }
389 }
390
391 if (data_access_url.empty()) {
392 throw BESInternalError(string("ERROR! Failed to locate a data access URL for the path: ") + restified_path,
393 __FILE__, __LINE__);
394 }
395
396 return data_access_url;
397}
398
399
400
423 string NgapApi::convert_ngap_resty_path_to_data_access_url(
424 const std::string &restified_path,
425 const std::string &uid
426 ) {
427 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
428 string data_access_url;
429
430 string cmr_query_url = build_cmr_query_url(restified_path);
431
432 BESDEBUG(MODULE, prolog << "CMR Request URL: " << cmr_query_url << endl);
433
434 BESDEBUG(MODULE, prolog << "Building new RemoteResource." << endl);
435 std::shared_ptr<http::url> cmr_query_url_ptr(new http::url(cmr_query_url));
436 http::RemoteResource cmr_query(cmr_query_url_ptr, uid);
437 {
438 BESStopWatch besTimer;
439 if (BESISDEBUG(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) || BESLog::TheLog()->is_verbose()){
440 besTimer.start("CMR Query: " + cmr_query_url);
441 }
442 cmr_query.retrieveResource();
443 }
444 rapidjson::Document cmr_response = cmr_query.get_as_json();
445
446 data_access_url = find_get_data_url_in_granules_umm_json_v1_4(restified_path, cmr_response);
447
448 BESDEBUG(MODULE, prolog << "END (data_access_url: "<< data_access_url << ")" << endl);
449
450 return data_access_url;
451 }
452
453
454
455
456 bool NgapApi::signed_url_is_expired(const http::url &signed_url)
457 {
458 bool is_expired;
459 time_t now;
460 time(&now); /* get current time; same as: timer = time(NULL) */
461 BESDEBUG(MODULE, prolog << "now: " << now << endl);
462
463 time_t expires = now;
464 string cf_expires = signed_url.query_parameter_value(CLOUDFRONT_EXPIRES_HEADER_KEY);
465 string aws_expires = signed_url.query_parameter_value(AMS_EXPIRES_HEADER_KEY);
466 time_t ingest_time = signed_url.ingest_time();
467
468 if(!cf_expires.empty()){ // CloudFront expires header?
469 expires = stoll(cf_expires);
470 BESDEBUG(MODULE, prolog << "Using "<< CLOUDFRONT_EXPIRES_HEADER_KEY << ": " << expires << endl);
471 }
472 else if(!aws_expires.empty()){
473 // AWS Expires header?
474 //
475 // By default we'll use the time we made the URL object, ingest_time
476 time_t start_time = ingest_time;
477 // But if there's an AWS Date we'll parse that and compute the time
478 // @TODO move to NgapApi::decompose_url() and add the result to the map
479 string aws_date = signed_url.query_parameter_value(AWS_DATE_HEADER_KEY);
480 if(!aws_date.empty()){
481 string date = aws_date; // 20200624T175046Z
482 string year = date.substr(0,4);
483 string month = date.substr(4,2);
484 string day = date.substr(6,2);
485 string hour = date.substr(9,2);
486 string minute = date.substr(11,2);
487 string second = date.substr(13,2);
488
489 BESDEBUG(MODULE, prolog << "date: "<< date <<
490 " year: " << year << " month: " << month << " day: " << day <<
491 " hour: " << hour << " minute: " << minute << " second: " << second << endl);
492
493 struct tm *ti = gmtime(&now);
494 ti->tm_year = stoll(year) - 1900;
495 ti->tm_mon = stoll(month) - 1;
496 ti->tm_mday = stoll(day);
497 ti->tm_hour = stoll(hour);
498 ti->tm_min = stoll(minute);
499 ti->tm_sec = stoll(second);
500
501 BESDEBUG(MODULE, prolog << "ti->tm_year: "<< ti->tm_year <<
502 " ti->tm_mon: " << ti->tm_mon <<
503 " ti->tm_mday: " << ti->tm_mday <<
504 " ti->tm_hour: " << ti->tm_hour <<
505 " ti->tm_min: " << ti->tm_min <<
506 " ti->tm_sec: " << ti->tm_sec << endl);
507
508
509 start_time = mktime(ti);
510 BESDEBUG(MODULE, prolog << "AWS (computed) start_time: "<< start_time << endl);
511 }
512 expires = start_time + stoll(aws_expires);
513 BESDEBUG(MODULE, prolog << "Using "<< AMS_EXPIRES_HEADER_KEY << ": " << aws_expires <<
514 " (expires: " << expires << ")" << endl);
515 }
516 time_t remaining = expires - now;
517 BESDEBUG(MODULE, prolog << "expires_time: " << expires <<
518 " remaining_time: " << remaining <<
519 " refresh_threshold: " << REFRESH_THRESHOLD << endl);
520
521 is_expired = remaining < REFRESH_THRESHOLD;
522 BESDEBUG(MODULE, prolog << "is_expired: " << (is_expired?"true":"false") << endl);
523
524 return is_expired;
525 }
526
527} // namespace ngap
528
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
error thrown if there is a user syntax error in the request or any other user error
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
rapidjson::Document get_as_json()
get_as_json() This function returns the cached resource parsed into a JSON document.
virtual std::string query_parameter_value(const std::string &key) const
Definition: url_impl.cc:252
GenericValue< UTF8<> > Value
GenericValue with UTF8 encoding.
Definition: document.h:2189
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
Definition: document.h:2585
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384