bes Updated for version 3.20.10
retriever.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2016 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24
25#include "config.h"
26
27#include <fcntl.h>
28
29#include <unistd.h>
30#include <time.h>
31
32#include <memory>
33#include <cstdlib>
34#include <cstring>
35#include <cassert>
36#include <cerrno>
37#include <sstream>
38#include <iostream>
39#include <fstream>
40#include <GetOpt.h>
41
42#include <curl/curl.h>
43
44
45#include <libdap/D4Dimensions.h>
46#include <libdap/D4StreamMarshaller.h>
47
48#include "BESInternalError.h"
49#include "BESUtil.h"
50#include "CurlUtils.h"
51#include "TheBESKeys.h"
52#include "BESLog.h"
53#include "BESDebug.h"
54#include "BESStopWatch.h"
55
56#include "awsv4.h"
57#include "HttpNames.h"
58#include "url_impl.h"
59#include "EffectiveUrl.h"
60#include "EffectiveUrlCache.h"
61#include "RemoteResource.h"
62
63#include "Chunk.h"
64#include "CredentialsManager.h"
65#include "AccessCredentials.h"
66#include "CredentialsManager.h"
67#include "CurlHandlePool.h"
68#include "DmrppCommon.h"
69#include "DmrppRequestHandler.h"
70#include "DmrppByte.h"
71#include "DmrppArray.h"
72#include "DMRpp.h"
73#include "DmrppTypeFactory.h"
74#include "DmrppD4Group.h"
75#include "DmrppParserSax2.h"
76
77//#include <memory>
78//#include <iterator>
79//#include <algorithm>
80
81
82bool Debug = false;
83bool debug = false;
84bool bes_debug = false;
85
86using std::cerr;
87using std::endl;
88using std::string;
89
90#define prolog std::string("retriever::").append(__func__).append("() - ")
91
92#define NULL_BODY_HASH "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
93
94
99string get_errno() {
100 char *s_err = strerror(errno);
101 if (s_err)
102 return s_err;
103 else
104 return "Unknown error.";
105}
106
107
118 const string &bes_config_file,
119 const string &bes_log_file,
120 const string &bes_debug_log_file,
121 const string &bes_debug_keys,
122 const string &http_netrc_file,
123 const string &http_cache_dir
124) {
125 if (debug) cerr << prolog << "BEGIN" << endl;
126
127 TheBESKeys::ConfigFile = bes_config_file; // Set the config file for TheBESKeys
128 TheBESKeys::TheKeys()->set_key("BES.LogName", bes_log_file); // Set the log file so it goes where we say.
129 TheBESKeys::TheKeys()->set_key("AllowedHosts", "^https?:\\/\\/.*$", false); // Set AllowedHosts to allow any URL
130 TheBESKeys::TheKeys()->set_key("AllowedHosts", "^file:\\/\\/\\/.*$", true); // Set AllowedHosts to allow any file
131
132 if (bes_debug) BESDebug::SetUp(bes_debug_log_file + "," + bes_debug_keys); // Enable BESDebug settings
133
134
135 if (!http_netrc_file.empty()) {
136 TheBESKeys::TheKeys()->set_key(HTTP_NETRC_FILE_KEY, http_netrc_file, false); // Set the netrc file
137 }
138
139 if (!http_cache_dir.empty()) {
140 TheBESKeys::TheKeys()->set_key(HTTP_CACHE_DIR_KEY, http_cache_dir, false); // Set the netrc file
141 }
142
143 // Initialize the dmr++ goodness.
144 auto foo = new dmrpp::DmrppRequestHandler("Chaos");
145
146 if (debug) cerr << prolog << "END" << endl;
147 return foo;
148}
149
150curl_slist *aws_sign_request_url(shared_ptr<http::url> &target_url, curl_slist *request_headers) {
151
152 if (debug) cerr << prolog << "BEGIN" << endl;
153
154 AccessCredentials *credentials = CredentialsManager::theCM()->get(target_url);
155 if (credentials && credentials->is_s3_cred()) {
156 if (debug)
157 cerr << prolog << "Got AWS S3 AccessCredentials instance: " << endl << credentials->to_json() << endl;
158 // If there are available credentials, and they are S3 credentials then we need to sign
159 // the request
160 const std::time_t request_time = std::time(0);
161
162 const std::string auth_header =
163 AWSV4::compute_awsv4_signature(
164 target_url,
165 request_time,
166 credentials->get(AccessCredentials::ID_KEY),
167 credentials->get(AccessCredentials::KEY_KEY),
168 credentials->get(AccessCredentials::REGION_KEY),
169 "s3");
170
171 // passing nullptr for the first call allocates the curl_slist
172 // The following code builds the slist that holds the headers. This slist is freed
173 // once the URL is dereferenced in dmrpp_easy_handle::read_data(). jhrg 11/26/19
174 request_headers = curl::append_http_header(request_headers, "Authorization", auth_header);
175
176 // We pre-compute the sha256 hash of a null message body
177 request_headers = curl::append_http_header(request_headers, "x-amz-content-sha256", NULL_BODY_HASH);
178 request_headers = curl::append_http_header(request_headers, "x-amz-date", AWSV4::ISO8601_date(request_time));
179 }
180 if (debug) cerr << prolog << "END" << endl;
181 return request_headers;
182}
183
189size_t get_remote_size(shared_ptr<http::url> &target_url, bool aws_signing) {
190 if (debug) cerr << prolog << "BEGIN" << endl;
191
192 char error_buffer[CURL_ERROR_SIZE];
193 std::vector<std::string> resp_hdrs;
194 curl_slist *request_headers = nullptr;
195
196 request_headers = curl::add_edl_auth_headers(request_headers);
197
198 if (aws_signing)
199 request_headers = aws_sign_request_url(target_url, request_headers);
200
201 CURL *ceh = curl::init(target_url->str(), request_headers, &resp_hdrs);
202 curl::set_error_buffer(ceh, error_buffer);
203
204 // In cURLville, CURLOPT_NOBODY means a HEAD request i.e. Don't send the response body a.k.a. "NoBody"
205 CURLcode curl_status = curl_easy_setopt(ceh, CURLOPT_NOBODY, 1L);
206 curl::eval_curl_easy_setopt_result(curl_status, prolog, "CURLOPT_NOBODY", error_buffer, __FILE__, __LINE__);
207
208 if (Debug) cerr << prolog << "cURL HEAD request is configured" << endl;
209
210 curl::super_easy_perform(ceh);
211
212 curl::unset_error_buffer(ceh);
213 if (request_headers)
214 curl_slist_free_all(request_headers);
215 if (ceh)
216 curl_easy_cleanup(ceh);
217
218 bool done = false;
219 size_t how_big_it_is = 0;
220 string content_length_hdr_key("content-length: ");
221 for (size_t i = 0; !done && i < resp_hdrs.size(); i++) {
222 if (Debug) cerr << prolog << "HEADER[" << i << "]: " << resp_hdrs[i] << endl;
223 string lc_header = BESUtil::lowercase(resp_hdrs[i]);
224 size_t index = lc_header.find(content_length_hdr_key);
225 if (index == 0) {
226 string value = lc_header.substr(content_length_hdr_key.size());
227 how_big_it_is = stol(value);
228 done = true;
229 }
230 }
231 if (!done)
232 throw BESInternalError(prolog + "Failed to determine size of target resource: " + target_url->str(), __FILE__, __LINE__);
233
234 if (debug) cerr << prolog << "END" << endl;
235
236 return how_big_it_is;
237}
238size_t get_max_retrival_size(const size_t &max_target_size, shared_ptr<http::url> &target_url) {
239 size_t target_size = max_target_size;
240 if (max_target_size == 0) {
241 target_size = get_remote_size(target_url, true);
242 if (debug) cerr << prolog << "Remote resource size is " << max_target_size << " bytes. " << endl;
243 }
244 return target_size;
245}
246
252void simple_get(const string target_url_str, const string output_file_base) {
253
254 string output_file = output_file_base + "_simple_get.out";
255 vector<string> resp_hdrs;
256 mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
257 int fd;
258 if ((fd = open(output_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, mode)) < 0) {
259 throw BESInternalError(get_errno(), __FILE__, __LINE__);
260 }
261 {
262 BESStopWatch sw;
263 sw.start(prolog + "url: " + target_url_str);
264 shared_ptr<http::url> target_url(new http::url(target_url_str));
265 curl::http_get_and_write_resource(target_url, fd,
266 &resp_hdrs); // Throws BESInternalError if there is a curl error.
267 }
268 close(fd);
269
270 if (Debug) {
271 for (size_t i = 0; i < resp_hdrs.size(); i++) {
272 cerr << prolog << "ResponseHeader[" << i << "]: " << resp_hdrs[i] << endl;
273 }
274 }
275}
276
277
285void make_chunks(shared_ptr<http::url> &target_url, const size_t &target_size, const size_t &chunk_count,
286 vector<dmrpp::Chunk *> &chunks) {
287 if (debug) cerr << prolog << "BEGIN" << endl;
288 size_t chunk_size = target_size / chunk_count;
289 size_t chunk_start = 0;
290 size_t chunk_index;
291 for (chunk_index = 0; chunk_index < chunk_count; chunk_index++) {
292 vector<unsigned long long> position_in_array;
293 position_in_array.push_back(chunk_index);
294 if (debug)
295 cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
296 << chunk_size << endl;
297 auto chunk = new dmrpp::Chunk(target_url, "LE", chunk_size, chunk_start, position_in_array);
298 chunk_start += chunk_size;
299 chunks.push_back(chunk);
300 }
301 if (target_size % chunk_size) {
302 // So there's a remainder and we should make a final chunk for it too.
303 size_t last_chunk_size = target_size - chunk_start;
304 if (debug)
305 cerr << prolog << "Remainder chunk. chunk[" << chunks.size() << "] last_chunk_size: " << last_chunk_size
306 << endl;
307 if (debug)
308 cerr << prolog << "Remainder chunk! target_size: " << target_size << " index: " << chunk_index
309 << " last_chunk_start: " << chunk_start << " last_chunk_size: " << last_chunk_size << endl;
310 if (last_chunk_size > 0) {
311 vector<unsigned long long> position_in_array;
312 position_in_array.push_back(chunk_index);
313 if (debug)
314 cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
315 << last_chunk_size << endl;
316 auto last_chunk = new dmrpp::Chunk(target_url, "LE", last_chunk_size, chunk_start, position_in_array);
317 chunks.push_back(last_chunk);
318 }
319 }
320 if (debug) cerr << prolog << "END chunks: " << chunks.size() << endl;
321}
322
323
330void serial_chunky_get(shared_ptr<http::url> &target_url, const size_t target_size, const unsigned long chunk_count,
331 const string &output_file_base) {
332
333 shared_ptr<http::url> effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
334 if (debug) cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl->str() << endl;
335 size_t retrieval_size = get_max_retrival_size(target_size, effectiveUrl);
336
337 string output_file = output_file_base + "_serial_chunky_get.out";
338 vector<dmrpp::Chunk *> chunks;
339 make_chunks(target_url, retrieval_size, chunk_count, chunks);
340
341 std::ofstream ofs;
342 ofs.open(output_file, std::fstream::in | std::fstream::out | std::ofstream::trunc | std::ofstream::binary);
343 if (ofs.fail())
344 throw BESInternalError(prolog + "Failed to open file: " + output_file, __FILE__, __LINE__);
345
346 for (size_t i = 0; i < chunks.size(); i++) {
347 stringstream ss;
348 ss << prolog << "chunk={index: " << i << ", offset: " << chunks[i]->get_offset() << ", size: "
349 << chunks[i]->get_size() << "}";
350
351 {
352 BESStopWatch sw;
353 sw.start(ss.str());
354 chunks[i]->read_chunk();
355 }
356
357 if (debug) cerr << ss.str() << " retrieval from: " << target_url << " completed, timing finished." << endl;
358 ofs.write(chunks[i]->get_rbuf(), chunks[i]->get_rbuf_size());
359 if (debug) cerr << ss.str() << " has been written to: " << output_file << endl;
360 }
361 auto itr = chunks.begin();
362 while (itr != chunks.end()) {
363 delete *itr;
364 *itr = 0;
365 itr++;
366 }
367
368}
369
370
371void parse_dmrpp(const string &dmrpp_filename_url){
372 if(debug) cerr << prolog << "BEGIN" << endl;
373
375 string target_file_url = dmrpp_filename_url;
376 string target_file;
377
378 const string http_protocol("http://");
379 const string https_protocol("https://");
380 const string file_protocol("file://");
381
382 if(debug) cerr << prolog << "dmrpp_filename_url: " << dmrpp_filename_url << endl;
383
384 if(target_file_url.empty())
385 throw BESInternalError(prolog + "The dmr++ filename was empty.", __FILE__, __LINE__);
386
387
388 if(target_file_url.rfind(http_protocol,0)==0 || target_file_url.rfind(https_protocol,0)==0 ){
389 // Use RemoteResource to get the thing.
390 shared_ptr<http::url> tfile_url(new http::url(target_file_url));
391 http::RemoteResource target_resource(tfile_url,prolog+"Timer");
392 target_resource.retrieveResource();
393 target_file = target_resource.getCacheFileName();
394 }
395 else if(target_file_url.rfind(file_protocol,0)==0){
396 target_file = target_file_url.substr(file_protocol.length());
397 }
398 else {
399 target_file_url = file_protocol + target_file_url;
400 }
401
402 if(debug) cerr << prolog << " target_file: " << target_file << endl;
403
404 ifstream ifs(target_file);
405 if(ifs.fail())
406 throw BESInternalError(prolog + "Failed open to dmr++ file: " + dmrpp_filename_url, __FILE__, __LINE__);
407
409 dmrpp::DMRpp dmr(&factory);
410 dmr.set_href(target_file_url);
411 stringstream msg;
412 msg << prolog << dmrpp_filename_url;
413 {
414 BESStopWatch sw;
415 sw.start(msg.str());
416 parser.intern(ifs, &dmr);
417 }
418
419 if (Debug) {
420 cerr << prolog << "Built dataset: " << endl;
422 libdap::XMLWriter xmlWriter;
423 dmr.print_dmrpp(xmlWriter, dmr.get_href());
424 cerr << xmlWriter.get_doc() << endl;
425 }
426 if(debug) cerr << prolog << "END" << endl;
427
428
429}
430
431
432
439void add_chunks(shared_ptr<http::url> &target_url, const size_t &target_size, const size_t &chunk_count,
440 dmrpp::DmrppArray *target_array) {
441
442 if (debug) cerr << prolog << "BEGIN" << endl;
443
444 size_t chunk_size = target_size / chunk_count;
445 if (chunk_size == 0)
446 throw BESInternalError(prolog + "Chunk size was zero.", __FILE__, __LINE__);
447 stringstream chunk_dim_size;
448 chunk_dim_size << chunk_size;
449 target_array->parse_chunk_dimension_sizes(chunk_dim_size.str());
450
451 size_t chunk_start = 0;
452 size_t chunk_index;
453 for (chunk_index = 0; chunk_index < chunk_count; chunk_index++) {
454 vector<unsigned long long> position_in_array;
455 position_in_array.push_back(chunk_start);
456 if (debug)
457 cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
458 << chunk_size << " chunk_poa: " << position_in_array[0] << endl;
459 target_array->add_chunk(target_url, "LE", chunk_size, chunk_start, position_in_array);
460 chunk_start += chunk_size;
461 }
462 if (target_size % chunk_size) {
463 // So there's a remainder and we should make a final chunk for it too.
464 size_t last_chunk_size = target_size - chunk_start;
465 if (debug)
466 cerr << prolog << "Remainder chunk! target_size: " << target_size << " index: " << chunk_index
467 << " last_chunk_start: " << chunk_start << " last_chunk_size: " << last_chunk_size << endl;
468 if (last_chunk_size > 0) {
469 vector<unsigned long long> position_in_array;
470 position_in_array.push_back(chunk_start);
471 if (debug)
472 cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
473 << last_chunk_size << " chunk_poa: " << position_in_array[0] << endl;
474 target_array->add_chunk(target_url, "LE", last_chunk_size, chunk_start, position_in_array);
475 }
476 }
477 if (debug) cerr << prolog << "END" << endl;
478}
479
480
481
489size_t array_get(shared_ptr<http::url> &target_url, const size_t &target_size, const size_t &chunk_count,
490 const string &output_file_base) {
491
492 if (debug) cerr << prolog << "BEGIN" << endl;
493 string output_file = output_file_base + "_array_get.out";
494 std::ofstream ofs;
495 ofs.open(output_file, std::fstream::in | std::fstream::out | std::ofstream::trunc | std::ofstream::binary);
496 if (ofs.fail())
497 throw BESInternalError(prolog + "Failed to open file: " + output_file, __FILE__, __LINE__);
498
499 auto *tmplt = new dmrpp::DmrppByte("data");
500 auto *target_array = new dmrpp::DmrppArray("data", tmplt);
501 delete tmplt; // Because the Vector() constructor made a copy and it's our problem...
502
503 target_array->append_dim(target_size);
504 add_chunks(target_url, target_size, chunk_count, target_array);
505 target_array->set_send_p(true); // Mark it to be sent so that it will be read.
506
508 dmrpp::DMRpp dmr(&factory);
509 dmr.set_href(target_url->str());
510 dmrpp::DmrppD4Group *root = dynamic_cast<dmrpp::DmrppD4Group *>(dmr.root());
511 root->add_var_nocopy(target_array);
512 root->set_in_selection(true);
513
514 if (debug) {
515 cerr << prolog << "Built dataset: " << endl;
517 libdap::XMLWriter xmlWriter;
518 dmr.print_dmrpp(xmlWriter, dmr.get_href());
519 cerr << xmlWriter.get_doc() << endl;
520 }
521
522 {
523 stringstream timer_msg;
524 timer_msg << prolog << "DmrppD4Group.intern_data() for " << target_size << " bytes in " << chunk_count <<
525 " chunks, parallel transfers ";
526 if (dmrpp::DmrppRequestHandler::d_use_transfer_threads) {
527 timer_msg << "enabled. (max: " << dmrpp::DmrppRequestHandler::d_max_transfer_threads << ")";
528 } else {
529 timer_msg << "disabled.";
530 }
531 BESStopWatch sw;
532 sw.start(timer_msg.str());
533 root->intern_data();
534 }
535
536 size_t started = ofs.tellp();
537 libdap::D4StreamMarshaller streamMarshaller(ofs);
538 root->serialize(streamMarshaller, dmr);
539
540 size_t stopped = ofs.tellp();
541 size_t numberOfBytesWritten = stopped - started;
542 if (debug) cerr << prolog << "target_size: " << target_size << " numberOfBytesWritten: " << numberOfBytesWritten << endl;
543
544 // delete target_array; // Don't have to delete this because we added it to the DMR using add_var_nocopy()
545 if (debug) cerr << prolog << "END" << endl;
546 return numberOfBytesWritten;
547
548}
549
550
551
580#if 0
581int test_plan_01(const string &target_url,
582 const string &output_prefix,
583 const unsigned int reps,
584 const size_t retrieval_size,
585 const unsigned int power_of_two_chunk_count,
586 const unsigned int power_of_two_threads_max,
587 const string &output_file_base
588 ) {
589 int result = 0;
590 if (debug)
591 cerr << prolog << "BEGIN" << endl;
592
593 try {
594 string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
595 if (debug)
596 cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
597 size_t target_size = get_max_retrival_size(retrieval_size, effectiveUrl);
598
599 // Outer loop on chunk size
600 size_t chunk_count = 2;
601 for (size_t chunk_pwr = 1; chunk_pwr <= power_of_two_chunk_count; chunk_pwr++) {
602
603 // We turn off parallel transfers to get a baseline that is the single threaded, serial retrieval of the chunks.
604 dmrpp::DmrppRequestHandler::d_use_transfer_threads = false;
605 for ( unsigned int rep = 0; rep < reps; rep++) {
606 array_get(effectiveUrl, target_size, chunk_count, output_file_base );
607 }
608
609 // Now we enable threads and starting with 2 work up to power_of_two_threads_max
610 dmrpp::DmrppRequestHandler::d_use_transfer_threads = true;
611 unsigned int thread_count = 2;
612 for ( unsigned int tpwr = 1; tpwr <= power_of_two_threads_max; tpwr++) {
613 dmrpp::DmrppRequestHandler::d_max_transfer_threads = thread_count;
614 for ( unsigned int rep = 0; rep < reps; rep++) {
615 array_get(effectiveUrl, target_size, chunk_count, output_file_base);
616 }
617 thread_count *= 2;
618 }
619 chunk_count *= 2;
620 }
621 }
622 catch (
623 BESError e
624 ) {
625 cerr << prolog << "Caught BESError. Message: " << e.get_message() << " " << e.get_file()<< ":" << e. get_line() << endl;
626 result = 1;
627 }
628 catch (...) {
629 cerr << prolog << "Caught Unknown Exception." <<
630 endl;
631 result = 2;
632 }
633 cerr << prolog << "END" << endl;
634 return result;
635}
636#endif
637
644int main(int argc, char *argv[]) {
645
646 int result = 0;
647 string bes_log_file;
648 string bes_debug_log_file = "cerr";
649 string bes_debug_keys = "bes,http,curl,dmrpp,dmrpp:3,dmrpp:4,rr";
650 shared_ptr<http::url> target_url(new http::url("https://www.opendap.org/pub/binary/hyrax-1.16/centos-7.x/bes-debuginfo-3.20.7-1.static.el7.x86_64.rpm"));
651 string output_file_base("retriever");
652 string http_cache_dir;
653 string prefix;
654 size_t pwr2_number_o_chunks = 18;
655 size_t max_target_size = 0;
656 string http_netrc_file;
657 unsigned int reps=10;
658 unsigned pwr2_parallel_reads = 0;
659 // Unused bool aws_sign_request_url = false;
660
661 char *prefixCstr = getenv("prefix");
662 if (prefixCstr) {
663 prefix = prefixCstr;
664 } else {
665 prefix = "/";
666 }
667 auto bes_config_file = BESUtil::assemblePath(prefix, "/etc/bes/bes.conf", true);
668
669
670 GetOpt getopt(argc, argv, "h:r:n:C:c:o:u:l:S:dbDp:"); // Removed A. Unused jhrg 11/23/21
671 int option_char;
672 while ((option_char = getopt()) != -1) {
673 switch (option_char) {
674 case 'D':
675 Debug = true;
676 debug = true;
677 break;
678 case 'd':
679 debug = true;
680 break;
681 case 'b':
682 bes_debug = true;
683 break;
684#if 0
685 case 'A':
686 // Unused aws_sign_request_url = true;
687 break;
688#endif
689 case 'c':
690 bes_config_file = getopt.optarg;
691 break;
692 case 'u':
693 target_url = shared_ptr<http::url>(new http::url(getopt.optarg));
694 break;
695 case 'l':
696 bes_log_file = getopt.optarg;
697 break;
698 case 'n':
699 http_netrc_file = getopt.optarg;
700 break;
701 case 'o':
702 output_file_base = getopt.optarg;
703 break;
704 case 'C':
705 pwr2_number_o_chunks = atol(getopt.optarg);
706 break;
707 case 'S':
708 max_target_size = atol(getopt.optarg);
709 break;
710 case 'p':
711 pwr2_parallel_reads = atol(getopt.optarg);
712 break;
713 case 'r':
714 reps = atol(getopt.optarg);
715 break;
716 case 'h':
717 http_cache_dir = getopt.optarg;
718 break;
719
720 default:
721 break;
722 }
723 }
724
725 if (bes_log_file.empty()) {
726 bes_log_file = output_file_base + "_bes.log";
727 }
728
729 cerr << prolog << "-- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - " << endl;
730 cerr << prolog << "debug: " << (debug ? "true" : "false") << endl;
731 cerr << prolog << "Debug: " << (Debug ? "true" : "false") << endl;
732 cerr << prolog << "bes_debug: " << (bes_debug ? "true" : "false") << endl;
733 cerr << prolog << "output_file_base: '" << output_file_base << "'" << endl;
734 cerr << prolog << "bes_config_file: '" << bes_config_file << "'" << endl;
735 cerr << prolog << "bes_log_file: '" << bes_log_file << "'" << endl;
736 cerr << prolog << "bes_debug_log_file: '" << bes_debug_log_file << "'" << endl;
737 cerr << prolog << "bes_debug_keys: '" << bes_debug_keys << "'" << endl;
738 cerr << prolog << "http_netrc_file: '" << http_netrc_file << "'" << endl;
739 cerr << prolog << "target_url: '" << target_url->str() << "'" << endl;
740 cerr << prolog << "max_target_size: '" << max_target_size << "'" << endl;
741 cerr << prolog << "number_o_chunks: 2^" << pwr2_number_o_chunks << endl;
742 cerr << prolog << "reps: " << reps << endl;
743 if (pwr2_parallel_reads)
744 cerr << prolog << "parallel_reads: ENABLED (max: 2^" << pwr2_parallel_reads << ")" << endl;
745 else
746 cerr << prolog << "parallel_reads: DISABLED" << endl;
747 cerr << prolog << "-- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - " << endl;
748
749
750 try {
751 if(pwr2_parallel_reads){
752 unsigned long long int max_threads = 1ULL << pwr2_parallel_reads;
753 dmrpp::DmrppRequestHandler::d_use_transfer_threads = true;
754 dmrpp::DmrppRequestHandler::d_max_transfer_threads = max_threads;
755 }
756 else {
757 dmrpp::DmrppRequestHandler::d_use_transfer_threads = false;
758 dmrpp::DmrppRequestHandler::d_max_transfer_threads = 1;
759 }
760
761 dmrpp::DmrppRequestHandler *dmrppRH = bes_setup(bes_config_file, bes_log_file, bes_debug_log_file,
762 bes_debug_keys, http_netrc_file,http_cache_dir);
763
764 shared_ptr<http::url> effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
765 if (debug) cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
766 size_t target_size = get_max_retrival_size(max_target_size, effectiveUrl);
767
768 unsigned long long int chunks = 1ULL << pwr2_number_o_chunks;
769 if (debug) cerr << prolog << "Dividing target into " << chunks << " chunks." << endl;
770
771
772
773 array_get(effectiveUrl, target_size, chunks, output_file_base);
774
775
776#if 0 // these work but are parked a.t.m.
777 result = test_plan_01(
778 target_url,
779 output_file_base,
780 reps,
781 max_target_size,
782 pwr2_number_o_chunks,
783 pwr2_parallel_reads,
784 output_file_base) ;
785
786 simple_get(effectiveUrl, output_file_base);
787 serial_chunky_get( effectiveUrl, max_target_size, pwr2_number_o_chunks, output_file_base);
788
789 parse_dmrpp(target_url);
790
791
792 string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
793 if (debug)
794 cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
795 target_size = get_max_retrival_size(retrieval_size, effectiveUrl);
796 array_get(effectiveUrl, max_target_size, pwr2_number_o_chunks, output_file_base);
797#endif
798
799 curl_global_cleanup();
800 delete dmrppRH;
801 }
802 catch (BESError e) {
803 cerr << prolog << "Caught BESError. Message: " << e.get_message() << " " << e.get_file() << ":" << e.get_line()
804 << endl;
805 result = 1;
806 }
807 catch (...) {
808 cerr << prolog << "Caught Unknown Exception." << endl;
809 result = 2;
810 }
811
812 return result;
813}
virtual std::string get(const std::string &key)
virtual bool is_s3_cred()
Do the URL, ID, Key amd Region items make up an S3 Credential?
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Definition: BESDebug.cc:98
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
exception thrown if internal error encountered
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:206
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
AccessCredentials * get(std::shared_ptr< http::url > &url)
static CredentialsManager * theCM()
Returns the singleton instance of the CrednetialsManager.
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void set_key(const std::string &key, const std::string &val, bool addto=false)
allows the user to set key/value pairs from within the application.
Definition: TheBESKeys.cc:206
static std::string ConfigFile
Definition: TheBESKeys.h:185
Provide a way to print the DMR++ response.
Definition: DMRpp.h:44
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition: DmrppArray.h:68
static bool d_print_chunks
if true, print_dap4() prints chunk elements
Definition: DmrppCommon.h:118
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
Definition: DmrppCommon.cc:134
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
void intern(std::istream &f, libdap::DMR *dest_dmr)
static EffectiveUrlCache * TheCache()
Get the singleton EffectiveUrlCache instance.
std::shared_ptr< EffectiveUrl > get_effective_url(std::shared_ptr< url > source_url)