41#include "BESCatalogList.h"
47using std::chrono::system_clock;
49#define MODULE HTTP_MODULE
50#define prolog string("url::").append(__func__).append("() - ")
52#define PROTOCOL_KEY "http_url_protocol"
53#define HOST_KEY "http_url_host"
54#define PATH_KEY "http_url_path"
55#define QUERY_KEY "http_url_query"
56#define SOURCE_URL_KEY "http_url_target_url"
57#define INGEST_TIME_KEY "http_url_ingest_time"
67url::url(
const map<string,string> &kvp)
69 map<string,string> kvp_copy = kvp;
70 map<string,string>::const_iterator it;
71 map<string,string>::const_iterator itc;
73 it = kvp.find(PROTOCOL_KEY);
74 itc = kvp_copy.find(PROTOCOL_KEY);
75 if(it != kvp.end() && itc != kvp_copy.end()){
76 d_protocol = it->second;
77 kvp_copy.erase(it->first);
78 BESDEBUG(MODULE, prolog <<
"Located PROTOCOL_KEY(" << PROTOCOL_KEY <<
") value: " << d_protocol << endl);
80 it = kvp.find(HOST_KEY);
81 itc = kvp_copy.find(HOST_KEY);
82 if(it != kvp.end() && itc != kvp_copy.end()){
84 kvp_copy.erase(it->first);
85 BESDEBUG(MODULE, prolog <<
"Located HOST_KEY(" << HOST_KEY <<
") value: " << d_host << endl);
87 it = kvp.find(PATH_KEY);
88 itc = kvp_copy.find(PATH_KEY);
89 if(it != kvp.end() && itc != kvp_copy.end()){
91 kvp_copy.erase(it->first);
92 BESDEBUG(MODULE, prolog <<
"Located PATH_KEY(" << PATH_KEY <<
") value: " << d_path << endl);
94 it = kvp.find(QUERY_KEY);
95 itc = kvp_copy.find(QUERY_KEY);
96 if(it != kvp.end() && itc != kvp_copy.end()){
98 kvp_copy.erase(it->first);
99 BESDEBUG(MODULE, prolog <<
"Located QUERY_KEY(" << QUERY_KEY <<
") value: " << d_query << endl);
101 it = kvp.find(SOURCE_URL_KEY);
102 itc = kvp_copy.find(SOURCE_URL_KEY);
103 if(it != kvp.end() && itc != kvp_copy.end()){
104 d_source_url_str = it->second;
105 kvp_copy.erase(it->first);
106 BESDEBUG(MODULE, prolog <<
"Located SOURCE_URL_KEY(" << SOURCE_URL_KEY <<
") value: " << d_source_url_str << endl);
109 for(itc = kvp_copy.begin(); itc != kvp_copy.end(); itc++){
110 string key = itc->first;
111 string value = itc->second;
112 map<string, vector<string>* >::const_iterator record_it;
113 record_it = d_query_kvp.find(key);
114 if(record_it != d_query_kvp.end()){
115 vector<string> *values = record_it->second;
116 values->push_back(value);
119 vector<string> *values =
new vector<string>();
120 values->push_back(value);
121 d_query_kvp.insert(pair<
string, vector<string>*>(key, values));
133 if(!d_query_kvp.empty()){
134 map<string, vector<string>* >::const_iterator it;
135 for(it = d_query_kvp.begin() ; it != d_query_kvp.end(); it++){
150 const string protocol_end(
"://");
151 BESDEBUG(MODULE, prolog <<
"BEGIN (parsing: '" << d_source_url_str <<
"')" << endl);
157 if(d_source_url_str.find(protocol_end) == string::npos){
162 BESDEBUG(MODULE, prolog <<
"Searching for catalog: " << default_catalog_name << endl);
163 BESCatalog *bcat = bcl->find_catalog(default_catalog_name);
165 BESDEBUG(MODULE, prolog <<
"Found catalog: " << bcat->
get_catalog_name() << endl);
167 string msg =
"OUCH! Unable to locate default catalog!";
168 BESDEBUG(MODULE, prolog << msg << endl);
171 string catalog_root = bcat->
get_root();
172 BESDEBUG(MODULE, prolog <<
"Catalog root: " << catalog_root << endl);
175 if(file_path[0] !=
'/')
176 file_path =
"/" + file_path;
177 d_source_url_str = FILE_PROTOCOL + file_path;
180 const string parse_url_target(d_source_url_str);
182 string::const_iterator prot_i = search(parse_url_target.begin(), parse_url_target.end(),
183 protocol_end.begin(), protocol_end.end());
185 if (prot_i != parse_url_target.end())
186 advance(prot_i, protocol_end.length());
188 d_protocol.reserve(distance(parse_url_target.begin(), prot_i));
189 transform(parse_url_target.begin(), prot_i,
190 back_inserter(d_protocol),
191 ptr_fun<int, int>(tolower));
192 if (prot_i == parse_url_target.end())
195 if (d_protocol == FILE_PROTOCOL) {
196 d_path = parse_url_target.substr(d_protocol.length());
197 BESDEBUG(MODULE, prolog <<
"FILE_PROTOCOL d_path: " << d_path << endl);
199 else if( d_protocol == HTTP_PROTOCOL || d_protocol == HTTPS_PROTOCOL){
200 string::const_iterator path_i = find(prot_i, parse_url_target.end(),
'/');
201 d_host.reserve(distance(prot_i, path_i));
202 transform(prot_i, path_i,
203 back_inserter(d_host),
204 ptr_fun<int, int>(tolower));
205 string::const_iterator query_i = find(path_i, parse_url_target.end(),
'?');
206 d_path.assign(path_i, query_i);
207 if (query_i != parse_url_target.end())
209 d_query.assign(query_i, parse_url_target.end());
211 if (!d_query.empty()) {
212 vector<string> records;
213 string delimiters =
"&";
215 vector<string>::iterator i = records.begin();
216 for (; i != records.end(); i++) {
217 size_t index = i->find(
'=');
218 if (index != string::npos) {
219 string key = i->substr(0, index);
220 string value = i->substr(index + 1);
221 BESDEBUG(MODULE, prolog <<
"key: " << key <<
" value: " << value << endl);
222 map<string, vector<string> *>::const_iterator record_it;
223 record_it = d_query_kvp.find(key);
224 if (record_it != d_query_kvp.end()) {
225 vector<string> *values = record_it->second;
226 values->push_back(value);
228 vector<string> *values =
new vector<string>();
229 values->push_back(value);
230 d_query_kvp.insert(pair<
string, vector<string> *>(key, values));
238 msg << prolog <<
"Unsupported URL protocol " << d_protocol <<
" found in URL: " << d_source_url_str;
239 BESDEBUG(MODULE, msg.str() << endl);
242 BESDEBUG(MODULE, prolog <<
"END (parsing: '" << d_source_url_str <<
"')" << endl);
255 map<string, vector<string>* >::const_iterator it;
256 it = d_query_kvp.find(key);
257 if(it != d_query_kvp.end()){
258 vector<string> *values = it->second;
259 if(!values->empty()){
260 value = (*values)[0];
273 map<string, vector<string>* >::const_iterator it;
274 it = d_query_kvp.find(key);
275 if(it != d_query_kvp.end()){
276 values = *it->second;
286void url::kvp(map<string,string> &kvp){
290 kvp.insert(pair<string,string>(PROTOCOL_KEY, d_protocol));
291 kvp.insert(pair<string,string>(HOST_KEY, d_host));
292 kvp.insert(pair<string,string>(PATH_KEY, d_path));
293 kvp.insert(pair<string,string>(QUERY_KEY, d_query));
294 kvp.insert(pair<string,string>(SOURCE_URL_KEY, d_source_url_str));
296 kvp.insert(pair<string,string>(INGEST_TIME_KEY,ss.str()));
299 map<string, vector<string>* >::const_iterator it;
300 for(it=d_query_kvp.begin(); it != d_query_kvp.end(); it++){
301 kvp.insert(pair<string,string>(it->first,(*it->second)[0]));
316 std::time_t now = system_clock::to_time_t(system_clock::now());
318 BESDEBUG(MODULE, prolog <<
"now: " << now << endl);
320 std::time_t expires_time = ingest_time() + HTTP_EFFECTIVE_URL_DEFAULT_EXPIRES_INTERVAL;
325 if(!cf_expires.empty()){
326 std::istringstream(cf_expires) >> expires_time;
327 BESDEBUG(MODULE, prolog <<
"Using "<< CLOUDFRONT_EXPIRES_HEADER_KEY <<
": " << expires_time << endl);
329 else if(!aws_expires_str.empty()){
331 long long aws_expires;
332 std::istringstream(aws_expires_str) >> aws_expires;
336 std::time_t aws_start_time = ingest_time();
342 if(!aws_date.empty()){
344 string date = aws_date;
345 string year = date.substr(0,4);
346 string month = date.substr(4,2);
347 string day = date.substr(6,2);
348 string hour = date.substr(9,2);
349 string minute = date.substr(11,2);
350 string second = date.substr(13,2);
352 BESDEBUG(MODULE, prolog <<
"date: "<< date <<
353 " year: " << year <<
" month: " << month <<
" day: " << day <<
354 " hour: " << hour <<
" minute: " << minute <<
" second: " << second << endl);
358 BESDEBUG(MODULE, prolog <<
"old_now: " << old_now << endl);
359 struct tm *ti = gmtime(&old_now);
360 ti->tm_year = stoll(year) - 1900;
361 ti->tm_mon = stoll(month) - 1;
362 ti->tm_mday = stoll(day);
363 ti->tm_hour = stoll(hour);
364 ti->tm_min = stoll(minute);
365 ti->tm_sec = stoll(second);
367 BESDEBUG(MODULE, prolog <<
"ti->tm_year: "<< ti->tm_year <<
368 " ti->tm_mon: " << ti->tm_mon <<
369 " ti->tm_mday: " << ti->tm_mday <<
370 " ti->tm_hour: " << ti->tm_hour <<
371 " ti->tm_min: " << ti->tm_min <<
372 " ti->tm_sec: " << ti->tm_sec << endl);
375 aws_start_time = mktime(ti);
376 BESDEBUG(MODULE, prolog <<
"AWS start_time (computed): " << aws_start_time << endl);
379 expires_time = aws_start_time + aws_expires;
380 BESDEBUG(MODULE, prolog <<
"Using "<< AMS_EXPIRES_HEADER_KEY <<
": " << aws_expires <<
381 " (expires_time: " << expires_time <<
")" << endl);
383 std::time_t remaining = expires_time - now;
384 BESDEBUG(MODULE, prolog <<
"expires_time: " << expires_time <<
385 " remaining: " << remaining <<
386 " threshold: " << HTTP_URL_REFRESH_THRESHOLD << endl);
388 stale = remaining < HTTP_URL_REFRESH_THRESHOLD;
389 BESDEBUG(MODULE, prolog <<
"stale: " << (stale?
"true":
"false") << endl);
400 string indent_inc =
" ";
401 string indent = indent_inc;
403 ss <<
"http::url [" <<
this <<
"] " << endl;
404 ss << indent <<
"d_source_url_str: " << d_source_url_str << endl;
405 ss << indent <<
"d_protocol: " << d_protocol << endl;
406 ss << indent <<
"d_host: " << d_host << endl;
407 ss << indent <<
"d_path: " << d_path << endl;
408 ss << indent <<
"d_query: " << d_query << endl;
410 std::map<std::string, std::vector<std::string>* >::iterator it;
412 string idt = indent+indent_inc;
413 for(it=d_query_kvp.begin(); it !=d_query_kvp.end(); it++){
414 ss << indent <<
"d_query_kvp["<<it->first<<
"]: " << endl;
415 std::vector<std::string> *values = it->second;
416 for(
size_t i=0; i<values->size(); i++){
417 ss << idt <<
"value[" << i <<
"]: " << (*values)[i] << endl;
420 ss << indent <<
"d_ingest_time: " << d_ingest_time.time_since_epoch().count() << endl;
List of all registered catalogs.
virtual std::string default_catalog_name() const
The name of the default catalog.
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Catalogs provide a hierarchical organization for data.
virtual std::string get_root() const =0
virtual std::string get_catalog_name() const
Get the name for this catalog.
exception thrown if internal error encountered
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
virtual void query_parameter_values(const std::string &key, std::vector< std::string > &values) const
virtual std::string query_parameter_value(const std::string &key) const
virtual std::string dump()
virtual bool is_expired()
utility class for the HTTP catalog module