bes Updated for version 3.20.10
DMZ.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2021 OPeNDAP, Inc.
6// Author: James Gallagher <jgallagher@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24// #include "config.h"
25
26#include <vector>
27#include <string>
28#include <iostream>
29#include <fstream>
30
31#include <cstring>
32
33#include <libdap/BaseType.h>
34#include <libdap/Array.h>
35#include <libdap/Type.h>
36#include <libdap/D4Dimensions.h>
37#include <libdap/D4Group.h>
38#include <libdap/D4BaseTypeFactory.h>
39#include <libdap/D4Enum.h>
40#include <libdap/D4EnumDefs.h>
41#include <libdap/D4Attributes.h>
42#include <libdap/D4Maps.h>
43#include <libdap/DMR.h>
44#include <libdap/util.h> // is_simple_type()
45
46// TODO Needed? jhrg 11/23/21
47#define PUGIXML_NO_XPATH
48#define PUGIXML_HEADER_ONLY
49#include <pugixml.hpp>
50
51#include "url_impl.h" // see bes/http
52#include "DMRpp.h"
53#include "DMZ.h" // this includes the pugixml header
54#include "DmrppCommon.h"
55#include "DmrppArray.h"
56#include "DmrppD4Group.h"
57#include "Base64.h"
58#include "DmrppRequestHandler.h"
59#include "BESInternalError.h"
60#include "BESDebug.h"
61
62using namespace pugi;
63using namespace std;
64using namespace libdap;
65
66// The pugixml library does not grok namespaces. So, for a tag named 'dmrpp:chunks'
67// if TREAT_NAMESPACES_AS_LITERALS is '1' the parser matches the whole string. If it
68// is '0' the parser only matches the characters after the colon. In both cases the
69// namespace (as XML intends) is not used. Using '1' is a bit more efficient.
70// jhrg 11/2/21
71#define TREAT_NAMESPACES_AS_LITERALS 1
72
73// THe code can either search for a DAP variable's information in the XML, or it can
74// record that during the parse process. Set this when/if the code does the latter.
75// using this simplifies the lazy-load process, particularly for the DAP2 dds and
76// data responses (which have not yet been coded completely). jhrg 11/17/21
77#define USE_CACHED_XML_NODE 1
78
79#define PARSER "dmz"
80#define prolog std::string("DMZ::").append(__func__).append("() - ")
81
82namespace dmrpp {
83
84const std::set<std::string> variable_elements{"Byte", "Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32",
85 "UInt64", "Float32", "Float64", "String", "Structure", "Sequence",
86 "Enum", "Opaque"};
87
89static inline bool is_eq(const char *value, const char *key)
90{
91#if TREAT_NAMESPACES_AS_LITERALS
92 return strcmp(value, key) == 0;
93#else
94 bool found = strcmp(value, key) == 0;
95 if (found) {
96 return true;
97 }
98 else {
99 const char* colon = strchr(value, ':');
100 return colon && strcmp(colon + 1, key) == 0;
101 }
102#endif
103}
104
106static inline bool has_dim_nodes(const xml_node &var_node)
107{
108 return var_node.child("Dim"); // just one is enough
109}
110
112static inline bool member_of(const set<string> &elements_set, const string &element_name)
113{
114 return elements_set.find(element_name) != elements_set.end();
115}
116
118static inline DmrppCommon *dc(BaseType *btp)
119{
120 auto *dc = dynamic_cast<DmrppCommon*>(btp);
121 if (!dc)
122 throw BESInternalError(string("Expected a BaseType that was also a DmrppCommon instance (")
123 .append((btp) ? btp->name() : "unknown").append(")."), __FILE__, __LINE__);
124 return dc;
125}
126
132DMZ::DMZ(const string &file_name)
133{
134 parse_xml_doc(file_name);
135}
136
141void
142DMZ::parse_xml_doc(const std::string &file_name)
143{
144 std::ifstream stream(file_name);
145
146 // Free memory used by a previously parsed document.
147 d_xml_doc.reset();
148
149 // parse_ws_pcdata_single will include the space when it appears in a <Value> </Value>
150 // DAP Attribute element. jhrg 11/3/21
151 pugi::xml_parse_result result = d_xml_doc.load(stream, pugi::parse_default | pugi::parse_ws_pcdata_single);
152
153 if (!result)
154 throw BESInternalError(string("DMR++ parse error: ").append(result.description()), __FILE__, __LINE__);
155
156 if (!d_xml_doc.document_element())
157 throw BESInternalError("No DMR++ data present.", __FILE__, __LINE__);
158}
159
169void DMZ::process_dataset(DMR *dmr, const xml_node &xml_root)
170{
171 // Process the attributes
172 int required_attrs_found = 0; // there are 1
173 string href_attr;
174 bool href_trusted = false;
175 string dmrpp_version; // empty or holds a value if dmrpp::version is present
176 for (xml_attribute attr = xml_root.first_attribute(); attr; attr = attr.next_attribute()) {
177 if (is_eq(attr.name(), "name")) {
178 ++required_attrs_found;
179 dmr->set_name(attr.value());
180 }
181 else if (is_eq(attr.name(), "dapVersion")) {
182 dmr->set_dap_version(attr.value());
183 }
184 else if (is_eq(attr.name(), "dmrVersion")) {
185 dmr->set_dmr_version(attr.value());
186 }
187 else if (is_eq(attr.name(), "base")) {
188 dmr->set_request_xml_base(attr.value());
189 BESDEBUG(PARSER, prolog << "Dataset xml:base is set to '" << dmr->request_xml_base() << "'" << endl);
190 }
191 // The pugixml library does not use XML namespaces AFAIK. jhrg 11/2/21
192 else if (is_eq(attr.name(), "xmlns")) {
193 dmr->set_namespace(attr.value());
194 }
195 // This code does not use namespaces. By default, we assume the DMR++ elements
196 // all use the namespace prefix 'dmrpp'. jhrg 11/2/21
197 else if (is_eq(attr.name(), "dmrpp:href")) {
198 href_attr = attr.value();
199 }
200 else if (is_eq(attr.name(), "dmrpp:trust")) {
201 href_trusted = is_eq(attr.value(), "true");
202 }
203 else if (is_eq(attr.name(), "dmrpp:version")) {
204 dmrpp_version = attr.value();
205 }
206 // We allow other, non recognized attributes, so there is no 'else' jhrg 10/20/21
207 }
208
209 if (dmrpp_version.empty()) { // old style DMR++, set enable-kludge flag
210 DmrppRequestHandler::d_emulate_original_filter_order_behavior = true;
211 }
212 else {
213 auto dmrpp = dynamic_cast<DMRpp*>(dmr);
214 if (dmrpp) {
215 dmrpp->set_version(dmrpp_version);
216 }
217 }
218
219 if (required_attrs_found != 1)
220 throw BESInternalError("DMR++ XML dataset element missing one or more required attributes.", __FILE__, __LINE__);
221
222 d_dataset_elem_href.reset(new http::url(href_attr, href_trusted));
223}
224
230void DMZ::process_dimension(D4Group *grp, const xml_node &dimension_node)
231{
232 string name_value;
233 string size_value;
234 for (xml_attribute attr = dimension_node.first_attribute(); attr; attr = attr.next_attribute()) {
235 if (is_eq(attr.name(), "name")) {
236 name_value = attr.value();
237 }
238 else if (is_eq(attr.name(), "size")) {
239 size_value = attr.value();
240 }
241 }
242
243 if (name_value.empty() || size_value.empty())
244 throw BESInternalError("The required attribute 'name' or 'size' was missing from a Dimension element.", __FILE__, __LINE__);
245
246 // This getter (dim_def) allocates a new object if needed.
247 try {
248 auto *dimension = new D4Dimension();
249 dimension->set_name(name_value);
250 dimension->set_size(size_value);
251 grp->dims()->add_dim_nocopy(dimension);
252 }
253 catch (Error &e) {
254 throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
255 }
256}
257
265void DMZ::process_dim(DMR *dmr, D4Group *grp, Array *array, const xml_node &dim_node)
266{
267 assert(array->is_vector_type());
268
269 string name_value;
270 string size_value;
271 for (xml_attribute attr = dim_node.first_attribute(); attr; attr = attr.next_attribute()) {
272 if (is_eq(attr.name(), "name")) {
273 name_value = attr.value();
274 }
275 else if (is_eq(attr.name(), "size")) {
276 size_value = attr.value();
277 }
278 }
279
280 if (name_value.empty() && size_value.empty())
281 throw BESInternalError("Either 'size' or 'name' must be used in a Dim element.", __FILE__, __LINE__);
282 if (!name_value.empty() && !size_value.empty())
283 throw BESInternalError("Only one of 'size' and 'name' are allowed in a Dim element, but both were used.", __FILE__, __LINE__);
284
285 if (!size_value.empty()) {
286 BESDEBUG(PARSER, prolog << "Processing nameless Dim of size: " << stoi(size_value) << endl);
287 array->append_dim(stoi(size_value));
288 }
289 else if (!name_value.empty()) {
290 BESDEBUG(PARSER, prolog << "Processing Dim with named Dimension reference: " << name_value << endl);
291
292 D4Dimension *dim;
293 if (name_value[0] == '/') // lookup the Dimension in the root group
294 dim = dmr->root()->find_dim(name_value);
295 else
296 // get enclosing Group and lookup Dimension there
297 dim = grp->find_dim(name_value);
298
299 if (!dim)
300 throw BESInternalError("The dimension '" + name_value + "' was not found while parsing the variable '" + array->name() + "'.",__FILE__,__LINE__);
301
302 array->append_dim(dim);
303 }
304}
305
306void DMZ::process_map(DMR *dmr, D4Group *grp, Array *array, const xml_node &map_node)
307{
308 assert(array->is_vector_type());
309
310 string name_value;
311 string size_value;
312 for (xml_attribute attr = map_node.first_attribute(); attr; attr = attr.next_attribute()) {
313 if (is_eq(attr.name(), "name")) {
314 name_value = attr.value();
315 }
316 }
317
318 // All map names are FQNs. If we get one that isn't, assume it's within the most current group.
319 if (name_value[0] != '/')
320 name_value = grp->FQN() + name_value;
321
322 // The array variable that holds the data for the Map
323 Array *map_source = dmr->root()->find_map_source(name_value);
324
325 // In the SAX2 parser, we had 'strict' and 'permissive' modes. For Maps, permissive
326 // allowed the DAP variable for a Map to be missing so that users could request just
327 // the data with the maps. I'm implementing that behavior. Below is the original
328 // comment from DmrppParserSAX2.cc. jhrg 11/3/21
329
330 // Change: If the parser is in 'strict' mode (the default) and the Array named by
331 // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
332 // mode), then this is not an error. However, the Array referenced by the Map will
333 // be null. This is a change in the parser's behavior to accommodate requests for
334 // Arrays that include Maps that do not also include the Map(s) in the request.
335 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
336
337 array->maps()->add_map(new D4Map(name_value, map_source));
338}
339
354void DMZ::process_variable(DMR *dmr, D4Group *group, Constructor *parent, const xml_node &var_node)
355{
356 assert(group);
357
358 // Variables are declared using nodes with type names (e.g., <Float32...>)
359 // Variables are arrays if they have one or more <Dim...> child nodes.
360 Type t = get_type(var_node.name());
361
362 assert(t != dods_group_c); // Groups are special and handled elsewhere
363
364 bool is_array_type = has_dim_nodes(var_node);
365 BaseType *btp;
366 if (is_array_type) {
367 btp = add_array_variable(dmr, group, parent, t, var_node);
368 if (t == dods_structure_c || t == dods_sequence_c) {
369 assert(btp->type() == dods_array_c && btp->var()->type() == t);
370 // NB: For an array of a Constructor, add children to the Constructor, not the array
371 parent = dynamic_cast<Constructor*>(btp->var());
372 assert(parent);
373 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
374 if (member_of(variable_elements, child.name()))
375 process_variable(dmr, group, parent, child);
376 }
377 }
378 }
379 else {
380 btp = add_scalar_variable(dmr, group, parent, t, var_node);
381 if (t == dods_structure_c || t == dods_sequence_c) {
382 assert(btp->type() == t);
383 parent = dynamic_cast<Constructor*>(btp);
384 assert(parent);
385 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
386 if (member_of(variable_elements, child.name()))
387 process_variable(dmr, group, parent, child);
388 }
389 }
390 }
391
392 dc(btp)->set_xml_node(var_node);
393}
394
402BaseType *DMZ::build_variable(DMR *dmr, D4Group *group, Type t, const xml_node &var_node)
403{
404 assert(dmr->factory());
405
406 string name_value;
407 string enum_value;
408 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
409 if (is_eq(attr.name(), "name")) {
410 name_value = attr.value();
411 }
412 if (is_eq(attr.name(), "enum")) {
413 enum_value = attr.value();
414 }
415 }
416
417 if (name_value.empty())
418 throw BESInternalError("The variable 'name' attribute was missing.", __FILE__, __LINE__);
419
420 BaseType *btp = dmr->factory()->NewVariable(t, name_value);
421 if (!btp)
422 throw BESInternalError("Could not instantiate the variable ' "+ name_value +"'.", __FILE__, __LINE__);
423
424 btp->set_is_dap4(true);
425
426 if (t == dods_enum_c) {
427 if (enum_value.empty())
428 throw BESInternalError("The variable ' " + name_value + "' lacks an 'enum' attribute.", __FILE__, __LINE__);
429
430 D4EnumDef *enum_def;
431 if (enum_value[0] == '/')
432 enum_def = dmr->root()->find_enum_def(enum_value);
433 else
434 enum_def = group->find_enum_def(enum_value);
435
436 if (!enum_def)
437 throw BESInternalError("Could not find the Enumeration definition '" + enum_value + "'.", __FILE__, __LINE__);
438
439 dynamic_cast<D4Enum&>(*btp).set_enumeration(enum_def);
440 }
441
442 return btp;
443}
444
455BaseType *DMZ::add_scalar_variable(DMR *dmr, D4Group *group, Constructor *parent, Type t, const xml_node &var_node)
456{
457 assert(group);
458
459 BaseType *btp = build_variable(dmr, group, t, var_node);
460
461 // if parent is non-null, the code should add the new var to a constructor,
462 // else add the new var to the group.
463 if (parent)
464 parent->add_var_nocopy(btp);
465 else
466 group->add_var_nocopy(btp);
467
468 return btp;
469}
470
485BaseType *DMZ::add_array_variable(DMR *dmr, D4Group *group, Constructor *parent, Type t, const xml_node &var_node)
486{
487 assert(group);
488
489 BaseType *btp = build_variable(dmr, group, t, var_node);
490
491 // Transform the scalar to an array
492 auto *array = static_cast<Array*>(dmr->factory()->NewVariable(dods_array_c, btp->name()));
493 array->set_is_dap4(true);
494 array->add_var_nocopy(btp);
495
496 // The SAX parser set up the parse of attributes here. For the thin DMR, we won't
497 // parse those from the DMR now. jhrg 10/21/21
498
499 // Now grab the dimension elements
500 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
501 if (is_eq(child.name(), "Dim")) {
502 process_dim(dmr, group, array, child);
503 }
504 else if (is_eq(child.name(), "Map")) {
505 process_map(dmr, group, array, child);
506 }
507 }
508
509 if (parent)
510 parent->add_var_nocopy(array);
511 else
512 group->add_var_nocopy(array);
513
514 return array;
515}
516
525void DMZ::process_group(DMR *dmr, D4Group *parent, const xml_node &var_node)
526{
527 string name_value;
528 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
529 if (is_eq(attr.name(), "name")) {
530 name_value = attr.value();
531 }
532 }
533
534 if (name_value.empty())
535 throw BESInternalError("The required attribute 'name' was missing from a Group element.", __FILE__, __LINE__);
536
537 BaseType *btp = dmr->factory()->NewVariable(dods_group_c, name_value);
538 if (!btp)
539 throw BESInternalError("Could not instantiate the Group '" + name_value + "'.", __FILE__, __LINE__);
540
541 auto new_group = dynamic_cast<DmrppD4Group*>(btp);
542
543 // Need to set this to get the D4Attribute behavior in the type classes
544 // shared between DAP2 and DAP4. jhrg 4/18/13
545 new_group->set_is_dap4(true);
546
547 // link it up and change the current group
548 new_group->set_parent(parent);
549 parent->add_group_nocopy(new_group);
550
551 // Save the xml_node so that we can later find unprocessed XML without searching
552 new_group->set_xml_node(var_node);
553
554 // Now parse all the child nodes of the Group.
555 // NB: this is the same block of code as in build_thin_dmr(); refactor. jhrg 10/21/21
556 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
557 if (is_eq(child.name(), "Dimension")) {
558 process_dimension(new_group, child);
559 }
560 else if (is_eq(child.name(), "Group")) {
561 process_group(dmr, new_group, child);
562 }
563 else if (member_of(variable_elements, child.name())) {
564 process_variable(dmr, new_group, nullptr, child);
565 }
566 }
567}
568
575{
576 auto xml_root_node = d_xml_doc.first_child();
577
578 process_dataset(dmr, xml_root_node);
579
580 auto root_group = dmr->root();
581
582 auto *dg = dynamic_cast<DmrppD4Group*>(root_group);
583 if (!dg)
584 throw BESInternalError("Expected the root group to also be an instance of DmrppD4Group.", __FILE__, __LINE__);
585
586 dg->set_xml_node(xml_root_node);
587
588 for (auto child = xml_root_node.first_child(); child; child = child.next_sibling()) {
589 if (is_eq(child.name(), "Dimension")) {
590 process_dimension(dg, child);
591 }
592 else if (is_eq(child.name(), "Group")) {
593 process_group(dmr, dg, child);
594 }
595 // TODO Add EnumDef
596 else if (member_of(variable_elements, child.name())) {
597 process_variable(dmr, dg, nullptr, child);
598 }
599 }
600}
601
610void DMZ::process_attribute(D4Attributes *attributes, const xml_node &dap_attr_node)
611{
612 string name_value;
613 string type_value;
614 for (xml_attribute attr = dap_attr_node.first_attribute(); attr; attr = attr.next_attribute()) {
615 if (is_eq(attr.name(), "name")) {
616 name_value = attr.value();
617 }
618 if (is_eq(attr.name(), "type")) {
619 type_value = attr.value();
620 }
621 }
622
623 if (name_value.empty() || type_value.empty())
624 throw BESInternalError("The required attribute 'name' or 'type' was missing from an Attribute element.", __FILE__, __LINE__);
625
626 if (type_value == "Container") {
627 // Make the new attribute container and add it to current container
628 auto *dap_attr_cont = new D4Attribute(name_value, attr_container_c);
629 attributes->add_attribute_nocopy(dap_attr_cont);
630 // In this call, 'attributes()' will allocate the D4Attributes object
631 // that will hold the container's attributes.
632 // Test to see if there really are child "Attribute" nodes - empty containers
633 // are allowed. jhrg 11/4/21
634 if (dap_attr_node.first_child()) {
635 for (auto attr_node: dap_attr_node.children("Attribute")) {
636 process_attribute(dap_attr_cont->attributes(), attr_node);
637 }
638 }
639 }
640 else if (type_value == "OtherXML") {
641 // TODO Add support for OtherXML
642 }
643 else {
644 // Make the D4Attribute and add it to the D4Attributes attribute container
645 auto *attribute = new D4Attribute(name_value, StringToD4AttributeType(type_value));
646 attributes->add_attribute_nocopy(attribute);
647 // Process one or more Value elements
648 for (auto value_elem = dap_attr_node.first_child(); value_elem; value_elem = value_elem.next_sibling()) {
649 if (is_eq(value_elem.name(), "Value")) {
650 attribute->add_value(value_elem.child_value()); // returns the text of the first data node
651 }
652 }
653 }
654}
655
673void DMZ::build_basetype_chain(BaseType *btp, stack<BaseType*> &bt)
674{
675 auto parent = btp->get_parent();
676 bt.push(btp);
677
678 // The parent must be non-null and not the root group (the root group has no parent).
679 if (parent && !(parent->type() == dods_group_c && parent->get_parent() == nullptr))
680 build_basetype_chain(parent, bt);
681}
682
683xml_node DMZ::get_variable_xml_node_helper(const xml_node &/*parent_node*/, stack<BaseType*> &/*bt*/)
684{
685#if !USE_CACHED_XML_NODE
686 // When we have an array of Structure or Sequence, both the Array and the
687 // Structure BaseType are pushed on the stack. This happens because, for
688 // constructors, other variables reference them as a parent node (while that's
689 // not the case for the cardinal types held by an array). Here we pop the
690 // Array off the stack. A better solution might be to better control what gets
691 // pushed by build_basetype_chain(). jhrg 10/24/21
692 if (bt.top()->type() == dods_array_c && bt.top()->var()->is_constructor_type())
693 bt.pop();
694
695 // The DMR XML stores both scalar and array variables using XML elements
696 // named for the cardinal type. For an array, that is the type of the
697 // element, so we use BaseType->var()->type_name() for an Array.
698 string type_name = bt.top()->type() == dods_array_c ? bt.top()->var()->type_name(): bt.top()->type_name();
699 string var_name = bt.top()->name();
700 bt.pop();
701
702 // Now look for the node with the correct element type and matching name
703 for (auto node = parent_node.child(type_name.c_str()); node; node = node.next_sibling()) {
704 for (xml_attribute attr = node.first_attribute(); attr; attr = attr.next_attribute()) {
705 if (is_eq(attr.name(), "name") && is_eq(attr.value(), var_name.c_str())) {
706 // if this is the last BaseType on the stack, return the node
707 if (bt.empty())
708 return node;
709 else
710 return get_variable_xml_node_helper(node, bt);
711 }
712 }
713 }
714
715 return xml_node(); // return an empty node
716#else
717 return xml_node(); // return an empty node
718#endif
719}
720
727xml_node DMZ::get_variable_xml_node(BaseType *btp) const
728{
729#if USE_CACHED_XML_NODE
730 auto node = dc(btp)->get_xml_node();
731 if (node == nullptr)
732 throw BESInternalError(string("The xml_node for '").append(btp->name()).append("' was not recorded."), __FILE__, __LINE__);
733
734 return node;
735#else
736 // load the BaseType objects onto a stack, since we start at the leaf and
737 // go backward using its 'parent' pointer, the order of BaseTypes on the
738 // stack will match the order in the hierarchy of the DOM tree.
739 stack<BaseType*> bt;
740 build_basetype_chain(btp, bt);
741
742 xml_node dataset = d_xml_doc.first_child();
743 if (!dataset || !is_eq(dataset.name(), "Dataset"))
744 throw BESInternalError("No DMR++ has been parsed.", __FILE__, __LINE__);
745
746 auto node = get_variable_xml_node_helper(dataset, bt);
747 return node;
748#endif
749}
750
756
768void
769DMZ::load_attributes(BaseType *btp)
770{
771 if (dc(btp)->get_attributes_loaded())
772 return;
773
774 load_attributes(btp, get_variable_xml_node(btp));
775
776 // TODO Remove redundant
777 dc(btp)->set_attributes_loaded(true);
778
779 switch (btp->type()) {
780 // When we load attributes for an Array, the set_send_p() method
781 // is called for its 'template' variable, but that call fails (and
782 // the attributes are already loaded). This block marks the attributes
783 // as loaded so the 'var_node == nullptr' exception above does not
784 // get thrown. Maybe a better fix would be to mark 'child variables'
785 // as having their attributes loaded. jhrg 11/16/21
786 case dods_array_c: {
787 dc(btp->var())->set_attributes_loaded(true);
788 break;
789 }
790
791 // FIXME There are no tests for this code. The above bock for Array
792 // was needed, so it seems likely that this will be too, but ...
793 // jhrg 11/16/21
794 case dods_structure_c:
795 case dods_sequence_c:
796 case dods_grid_c: {
797 auto *c = dynamic_cast<Constructor*>(btp);
798 if (c) {
799 for (auto i = c->var_begin(), e = c->var_end(); i != e; i++) {
800 dc(btp->var())->set_attributes_loaded(true);
801 }
802 break;
803 }
804 }
805
806 default:
807 break;
808 }
809}
810
816void
817DMZ::load_attributes(BaseType *btp, xml_node var_node) const
818{
819 if (dc(btp)->get_attributes_loaded())
820 return;
821
822 // Attributes for this node will be held in the var_node siblings.
823 // NB: Make an explict call to the BaseType implementation in case
824 // the attributes() method is specialized for this DMR++ code to
825 // trigger a lazy-load of the variables' attributes. jhrg 10/24/21
826 // Could also use BaseType::set_attributes(). jhrg
827 auto attributes = btp->BaseType::attributes();
828 for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
829 if (is_eq(child.name(), "Attribute")) {
830 process_attribute(attributes, child);
831 }
832 }
833
834 dc(btp)->set_attributes_loaded(true);
835}
836
841void
842DMZ::load_attributes(Constructor *constructor)
843{
844 load_attributes(constructor, get_variable_xml_node(constructor));
845 for (auto i = constructor->var_begin(), e = constructor->var_end(); i != e; ++i) {
846 // Groups are not allowed inside a Constructor
847 assert((*i)->type() != dods_group_c);
848 load_attributes(*i);
849 }
850}
851
852void
853DMZ::load_attributes(D4Group *group) {
854 // The root group is special; look for its DAP Attributes in the Dataset element
855 if (group->get_parent() == nullptr) {
856 xml_node dataset = d_xml_doc.child("Dataset");
857 if (!dataset)
858 throw BESInternalError("Could not find the 'Dataset' element in the DMR++ XML document.", __FILE__, __LINE__);
859 load_attributes(group, dataset);
860 }
861 else {
862 load_attributes(group, get_variable_xml_node(group));
863 }
864
865 for (auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
866 // Even though is_constructor_type() returns true for instances of D4Group,
867 // Groups are kept under a separate container from variables because they
868 // have a different function than the Structure and Sequence types (Groups
869 // never hold data).
870 assert((*i)->type() != dods_group_c);
871 load_attributes(*i);
872 }
873
874 for (auto i = group->grp_begin(), e = group->grp_end(); i != e; ++i) {
875 load_attributes(*i);
876 }
877}
878
879void DMZ::load_all_attributes(libdap::DMR *dmr)
880{
881 assert(d_xml_doc != nullptr);
882 load_attributes(dmr->root());
883}
884
886
891
901void
902DMZ::process_compact(BaseType *btp, const xml_node &compact)
903{
904 dc(btp)->set_compact(true);
905
906 auto char_data = compact.child_value();
907 if (!char_data)
908 throw BESInternalError("The dmrpp::compact is missing data values.",__FILE__,__LINE__);
909
910 std::vector <u_int8_t> decoded = base64::Base64::decode(char_data);
911
912 if (btp->type() != dods_array_c)
913 throw BESInternalError("The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
914
915 // We know from the above that this is an Array, so accessing btp->var() is OK.
916 switch (btp->var()->type()) {
917 case dods_array_c:
918 throw BESInternalError("DMR++ document fail: An Array may not be the template for an Array.", __FILE__, __LINE__);
919
920 case dods_byte_c:
921 case dods_char_c:
922 case dods_int8_c:
923 case dods_uint8_c:
924 case dods_int16_c:
925 case dods_uint16_c:
926 case dods_int32_c:
927 case dods_uint32_c:
928 case dods_int64_c:
929 case dods_uint64_c:
930
931 case dods_enum_c:
932
933 case dods_float32_c:
934 case dods_float64_c:
935 btp->val2buf(reinterpret_cast<void *>(&decoded[0]));
936 btp->set_read_p(true);
937 break;
938
939 case dods_str_c:
940 case dods_url_c: {
941 std::string str(decoded.begin(), decoded.end());
942 auto *st = static_cast<DmrppArray *>(btp);
943 // Although val2buf() takes a void*, for DAP Str and Url types, it casts
944 // that to std::string*. jhrg 11/4/21
945 st->val2buf(&str);
946 st->set_read_p(true);
947 break;
948 }
949
950 default:
951 throw BESInternalError("Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
952 }
953}
954
962void DMZ::process_chunk(DmrppCommon *dc, const xml_node &chunk) const
963{
964 string href;
965 string trust;
966 string offset;
967 string size;
968 string chunk_position_in_array;
969
970 bool href_trusted = false;
971
972 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
973 if (is_eq(attr.name(), "href")) {
974 href = attr.value();
975 }
976 else if (is_eq(attr.name(), "trust")) {
977 href_trusted = is_eq(attr.value(), "true");
978 }
979 else if (is_eq(attr.name(), "offset")) {
980 offset = attr.value();
981 }
982 else if (is_eq(attr.name(), "nBytes")) {
983 size = attr.value();
984 }
985 else if (is_eq(attr.name(), "chunkPositionInArray")) {
986 chunk_position_in_array = attr.value();
987 }
988 }
989
990 if (offset.empty() || size.empty())
991 throw BESInternalError("Both size and offset are required for a chunk node.", __FILE__, __LINE__);
992
993 if (!href.empty()) {
994 // TODO For many cases, there are many chunks that share a URL. We could store
995 // a hash_map of known URLs and cut down on the total number of shared pointers.
996 // jhrg 11/22/21
997 shared_ptr<http::url> data_url(new http::url(href, href_trusted));
998 dc->add_chunk(data_url, dc->get_byte_order(), stoi(size), stoi(offset), chunk_position_in_array);
999 }
1000 else {
1001 dc->add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoi(size), stoi(offset), chunk_position_in_array);
1002 }
1003}
1004
1011void DMZ::process_cds_node(DmrppCommon *dc, const xml_node &chunks)
1012{
1013 for (auto child = chunks.child("dmrpp:chunkDimensionSizes"); child /*&& !cds_found*/; child = child.next_sibling()) {
1014 if (is_eq(child.name(), "dmrpp:chunkDimensionSizes")) {
1015 string sizes = child.child_value();
1016 dc->parse_chunk_dimension_sizes(sizes);
1017 }
1018 }
1019}
1020
1021// a 'dmrpp:chunks' node has a chunkDimensionSizes node and then one or more chunks
1022// nodes, and they have to be in that order.
1023void DMZ::process_chunks(DmrppCommon *dc, const xml_node &chunks)
1024{
1025 for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
1026 if (is_eq(attr.name(), "compressionType")) {
1027 dc->set_filter(attr.value());
1028 }
1029 }
1030
1031 // Look for the chunksDimensionSizes element - it will not be present for contiguous data
1032 process_cds_node(dc, chunks);
1033
1034 // Chunks for this node will be held in the var_node siblings.
1035 for (auto chunk = chunks.child("dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
1036 if (is_eq(chunk.name(), "dmrpp:chunk")) {
1037 process_chunk(dc, chunk);
1038 }
1039 }
1040}
1041
1050void DMZ::load_chunks(BaseType *btp)
1051{
1052 if (dc(btp)->get_chunks_loaded())
1053 return;
1054
1055 // goto the DOM tree node for this variable
1056 xml_node var_node = get_variable_xml_node(btp);
1057 if (var_node == nullptr)
1058 throw BESInternalError("Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
1059
1060 // Chunks for this node will be held in the var_node siblings. For a given BaseType, there should
1061 // be only one chunks node xor one chunk node.
1062 int chunks_found = 0;
1063 int chunk_found = 0;
1064 int compact_found = 0;
1065 auto child = var_node.child("dmrpp:chunks");
1066 if (child) {
1067 chunks_found = 1;
1068 process_chunks(dc(btp), child);
1069 }
1070
1071 auto chunk = var_node.child("dmrpp:chunk");
1072 if (chunk) {
1073 chunk_found = 1;
1074 process_chunk(dc(btp), chunk);
1075
1076 }
1077
1078 auto compact = var_node.child("dmrpp:compact");
1079 if (compact) {
1080 compact_found = 1;
1081 process_compact(btp, compact);
1082 }
1083
1084 // Here we (optionally) check that exactly one of the three types of node was found
1085 if (DmrppRequestHandler::d_require_chunks) {
1086 int elements_found = chunks_found + chunk_found + compact_found;
1087 if (elements_found != 1) {
1088 ostringstream oss;
1089 oss << "Expected chunk, chunks or compact information in the DMR++ data. Found " << elements_found
1090 << " types of nodes.";
1091 throw BESInternalError(oss.str(), __FILE__, __LINE__);
1092 }
1093 }
1094
1095 dc(btp)->set_chunks_loaded(true);
1096}
1097
1099
1100} // namespace dmrpp
exception thrown if internal error encountered
DMZ()=default
Build a DMZ without simultaneously parsing an XML document.
virtual void load_chunks(libdap::BaseType *btp)
Load the chunk information into a variable.
Definition: DMZ.cc:1050
void parse_xml_doc(const std::string &filename)
Build the DOM tree for a DMR++ XML document.
Definition: DMZ.cc:142
virtual void build_thin_dmr(libdap::DMR *dmr)
populate the DMR instance as a 'thin DMR'
Definition: DMZ.cc:574
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
Definition: DmrppCommon.cc:134
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
void set_filter(const std::string &value)
Set the value of the filters property.
Definition: DmrppCommon.cc:108
void set_compact(bool value)
Set the value of the compact property.
Definition: DmrppCommon.h:147
Type
Type of JSON value.
Definition: rapidjson.h:664