1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22 /*
23 A brief summary of the date string formats this parser groks:
24
25 RFC 2616 3.3.1
26
27 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
28 Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
29 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
30
31 we support dates without week day name:
32
33 06 Nov 1994 08:49:37 GMT
34 06-Nov-94 08:49:37 GMT
35 Nov 6 08:49:37 1994
36
37 without the time zone:
38
39 06 Nov 1994 08:49:37
40 06-Nov-94 08:49:37
41
42 weird order:
43
44 1994 Nov 6 08:49:37 (GNU date fails)
45 GMT 08:49:37 06-Nov-94 Sunday
46 94 6 Nov 08:49:37 (GNU date fails)
47
48 time left out:
49
50 1994 Nov 6
51 06-Nov-94
52 Sun Nov 6 94
53
54 unusual separators:
55
56 1994.Nov.6
57 Sun/Nov/6/94/GMT
58
59 commonly used time zone names:
60
61 Sun, 06 Nov 1994 08:49:37 CET
62 06 Nov 1994 08:49:37 EST
63
64 time zones specified using RFC822 style:
65
66 Sun, 12 Sep 2004 15:05:58 -0700
67 Sat, 11 Sep 2004 21:32:11 +0200
68
69 compact numerical date strings:
70
71 20040912 15:05:58 -0700
72 20040911 +0200
73
74 */
75
76 #include "parsedate.hpp"
77
78
79
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83
84 #include <limits.h>
85 #include <stdbool.h>
86 #include <errno.h>
87 #include <string.h>
88 #include <ctype.h>
89 #include <stdlib.h>
90 #include <stdio.h>
91
92
93 #define ERRNO (errno)
94 #define SET_ERRNO(x) (errno = (x))
95
96
97 /* Portable, consistent toupper (remember EBCDIC). Do not use toupper() because
98 its behavior is altered by the current locale. */
raw_toupper(char in)99 char raw_toupper(char in)
100 {
101 switch (in) {
102 case 'a':
103 return 'A';
104 case 'b':
105 return 'B';
106 case 'c':
107 return 'C';
108 case 'd':
109 return 'D';
110 case 'e':
111 return 'E';
112 case 'f':
113 return 'F';
114 case 'g':
115 return 'G';
116 case 'h':
117 return 'H';
118 case 'i':
119 return 'I';
120 case 'j':
121 return 'J';
122 case 'k':
123 return 'K';
124 case 'l':
125 return 'L';
126 case 'm':
127 return 'M';
128 case 'n':
129 return 'N';
130 case 'o':
131 return 'O';
132 case 'p':
133 return 'P';
134 case 'q':
135 return 'Q';
136 case 'r':
137 return 'R';
138 case 's':
139 return 'S';
140 case 't':
141 return 'T';
142 case 'u':
143 return 'U';
144 case 'v':
145 return 'V';
146 case 'w':
147 return 'W';
148 case 'x':
149 return 'X';
150 case 'y':
151 return 'Y';
152 case 'z':
153 return 'Z';
154 }
155 return in;
156 }
157
158 /*
159 * raw_equal() is for doing "raw" case insensitive strings. This is meant
160 * to be locale independent and only compare strings we know are safe for
161 * this. See http://daniel.haxx.se/blog/2008/10/15/strcasecmp-in-turkish/ for
162 * some further explanation to why this function is necessary.
163 *
164 * The function is capable of comparing a-z case insensitively even for
165 * non-ascii.
166 */
167
raw_equal(const char * first,const char * second)168 int raw_equal(const char *first, const char *second)
169 {
170 while(*first && *second) {
171 if(raw_toupper(*first) != raw_toupper(*second))
172 /* get out of the loop as soon as they don't match */
173 break;
174 first++;
175 second++;
176 }
177 /* we do the comparison here (possibly again), just to make sure that if the
178 loop above is skipped because one of the strings reached zero, we must not
179 return this as a successful match */
180 return (raw_toupper(*first) == raw_toupper(*second));
181 }
182
183 #define ISSPACE(x) (isspace((int) ((unsigned char)x)))
184 #define ISDIGIT(x) (isdigit((int) ((unsigned char)x)))
185 #define ISALNUM(x) (isalnum((int) ((unsigned char)x)))
186 #define ISALPHA(x) (isalpha((int) ((unsigned char)x)))
187
188
189 /*
190 * Redefine TRUE and FALSE too, to catch current use. With this
191 * change, 'bool found = 1' will give a warning on MIPSPro, but
192 * 'bool found = TRUE' will not. Change tested on IRIX/MIPSPro,
193 * AIX 5.1/Xlc, Tru64 5.1/cc, w/make test too.
194 */
195
196 #ifndef TRUE
197 #define TRUE true
198 #endif
199 #ifndef FALSE
200 #define FALSE false
201 #endif
202
203
204
205 /*
206 ** signed long to signed int
207 */
208
clamp_to_int(long slnum)209 int clamp_to_int(long slnum)
210 {
211 return slnum > INT_MAX ? INT_MAX : slnum < INT_MIN ? INT_MIN : (int)slnum;
212 }
213
214
215 const char * const wkday[] =
216 {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
217 static const char * const weekday[] =
218 { "Monday", "Tuesday", "Wednesday", "Thursday",
219 "Friday", "Saturday", "Sunday" };
220 const char * const month[]=
221 { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
222 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
223
224 struct tzinfo {
225 char name[5];
226 int offset; /* +/- in minutes */
227 };
228
229 /*
230 * parsedate()
231 *
232 * Returns:
233 *
234 * PARSEDATE_OK - a fine conversion
235 * PARSEDATE_FAIL - failed to convert
236 * PARSEDATE_LATER - time overflow at the far end of time_t
237 * PARSEDATE_SOONER - time underflow at the low end of time_t
238 */
239
240 static int parsedate(const char *date, time_t *output);
241
242 #define PARSEDATE_OK 0
243 #define PARSEDATE_FAIL -1
244 #define PARSEDATE_LATER 1
245 #define PARSEDATE_SOONER 2
246
247 /* Here's a bunch of frequently used time zone names. These were supported
248 by the old getdate parser. */
249 #define tDAYZONE -60 /* offset for daylight savings time */
250 static const struct tzinfo tz[]= {
251 {"GMT", 0}, /* Greenwich Mean */
252 {"UTC", 0}, /* Universal (Coordinated) */
253 {"WET", 0}, /* Western European */
254 {"BST", 0 tDAYZONE}, /* British Summer */
255 {"WAT", 60}, /* West Africa */
256 {"AST", 240}, /* Atlantic Standard */
257 {"ADT", 240 tDAYZONE}, /* Atlantic Daylight */
258 {"EST", 300}, /* Eastern Standard */
259 {"EDT", 300 tDAYZONE}, /* Eastern Daylight */
260 {"CST", 360}, /* Central Standard */
261 {"CDT", 360 tDAYZONE}, /* Central Daylight */
262 {"MST", 420}, /* Mountain Standard */
263 {"MDT", 420 tDAYZONE}, /* Mountain Daylight */
264 {"PST", 480}, /* Pacific Standard */
265 {"PDT", 480 tDAYZONE}, /* Pacific Daylight */
266 {"YST", 540}, /* Yukon Standard */
267 {"YDT", 540 tDAYZONE}, /* Yukon Daylight */
268 {"HST", 600}, /* Hawaii Standard */
269 {"HDT", 600 tDAYZONE}, /* Hawaii Daylight */
270 {"CAT", 600}, /* Central Alaska */
271 {"AHST", 600}, /* Alaska-Hawaii Standard */
272 {"NT", 660}, /* Nome */
273 {"IDLW", 720}, /* International Date Line West */
274 {"CET", -60}, /* Central European */
275 {"MET", -60}, /* Middle European */
276 {"MEWT", -60}, /* Middle European Winter */
277 {"MEST", -60 tDAYZONE}, /* Middle European Summer */
278 {"CEST", -60 tDAYZONE}, /* Central European Summer */
279 {"MESZ", -60 tDAYZONE}, /* Middle European Summer */
280 {"FWT", -60}, /* French Winter */
281 {"FST", -60 tDAYZONE}, /* French Summer */
282 {"EET", -120}, /* Eastern Europe, USSR Zone 1 */
283 {"WAST", -420}, /* West Australian Standard */
284 {"WADT", -420 tDAYZONE}, /* West Australian Daylight */
285 {"CCT", -480}, /* China Coast, USSR Zone 7 */
286 {"JST", -540}, /* Japan Standard, USSR Zone 8 */
287 {"EAST", -600}, /* Eastern Australian Standard */
288 {"EADT", -600 tDAYZONE}, /* Eastern Australian Daylight */
289 {"GST", -600}, /* Guam Standard, USSR Zone 9 */
290 {"NZT", -720}, /* New Zealand */
291 {"NZST", -720}, /* New Zealand Standard */
292 {"NZDT", -720 tDAYZONE}, /* New Zealand Daylight */
293 {"IDLE", -720}, /* International Date Line East */
294 /* Next up: Military timezone names. RFC822 allowed these, but (as noted in
295 RFC 1123) had their signs wrong. Here we use the correct signs to match
296 actual military usage.
297 */
298 {"A", +1 * 60}, /* Alpha */
299 {"B", +2 * 60}, /* Bravo */
300 {"C", +3 * 60}, /* Charlie */
301 {"D", +4 * 60}, /* Delta */
302 {"E", +5 * 60}, /* Echo */
303 {"F", +6 * 60}, /* Foxtrot */
304 {"G", +7 * 60}, /* Golf */
305 {"H", +8 * 60}, /* Hotel */
306 {"I", +9 * 60}, /* India */
307 /* "J", Juliet is not used as a timezone, to indicate the observer's local
308 time */
309 {"K", +10 * 60}, /* Kilo */
310 {"L", +11 * 60}, /* Lima */
311 {"M", +12 * 60}, /* Mike */
312 {"N", -1 * 60}, /* November */
313 {"O", -2 * 60}, /* Oscar */
314 {"P", -3 * 60}, /* Papa */
315 {"Q", -4 * 60}, /* Quebec */
316 {"R", -5 * 60}, /* Romeo */
317 {"S", -6 * 60}, /* Sierra */
318 {"T", -7 * 60}, /* Tango */
319 {"U", -8 * 60}, /* Uniform */
320 {"V", -9 * 60}, /* Victor */
321 {"W", -10 * 60}, /* Whiskey */
322 {"X", -11 * 60}, /* X-ray */
323 {"Y", -12 * 60}, /* Yankee */
324 {"Z", 0}, /* Zulu, zero meridian, a.k.a. UTC */
325 };
326
327 /* returns:
328 -1 no day
329 0 monday - 6 sunday
330 */
331
checkday(const char * check,size_t len)332 static int checkday(const char *check, size_t len)
333 {
334 int i;
335 const char * const *what;
336 bool found= FALSE;
337 if(len > 3)
338 what = &weekday[0];
339 else
340 what = &wkday[0];
341 for(i=0; i<7; i++) {
342 if(raw_equal(check, what[0])) {
343 found=TRUE;
344 break;
345 }
346 what++;
347 }
348 return found?i:-1;
349 }
350
checkmonth(const char * check)351 static int checkmonth(const char *check)
352 {
353 int i;
354 const char * const *what;
355 bool found= FALSE;
356
357 what = &month[0];
358 for(i=0; i<12; i++) {
359 if(raw_equal(check, what[0])) {
360 found=TRUE;
361 break;
362 }
363 what++;
364 }
365 return found?i:-1; /* return the offset or -1, no real offset is -1 */
366 }
367
368 /* return the time zone offset between GMT and the input one, in number
369 of seconds or -1 if the timezone wasn't found/legal */
370
checktz(const char * check)371 static int checktz(const char *check)
372 {
373 unsigned int i;
374 const struct tzinfo *what;
375 bool found= FALSE;
376
377 what = tz;
378 for(i=0; i< sizeof(tz)/sizeof(tz[0]); i++) {
379 if(raw_equal(check, what->name)) {
380 found=TRUE;
381 break;
382 }
383 what++;
384 }
385 return found?what->offset*60:-1;
386 }
387
skip(const char ** date)388 static void skip(const char **date)
389 {
390 /* skip everything that aren't letters or digits */
391 while(**date && !ISALNUM(**date))
392 (*date)++;
393 }
394
395 enum assume {
396 DATE_MDAY,
397 DATE_YEAR,
398 DATE_TIME
399 };
400
401 /* this is a clone of 'struct tm' but with all fields we don't need or use
402 cut out */
403 struct my_tm {
404 int tm_sec;
405 int tm_min;
406 int tm_hour;
407 int tm_mday;
408 int tm_mon;
409 int tm_year;
410 };
411
412 /* struct tm to time since epoch in GMT time zone.
413 * This is similar to the standard mktime function but for GMT only, and
414 * doesn't suffer from the various bugs and portability problems that
415 * some systems' implementations have.
416 */
my_timegm(struct my_tm * tm)417 static time_t my_timegm(struct my_tm *tm)
418 {
419 static const int month_days_cumulative [12] =
420 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
421 int month_, year, leap_days;
422
423 if(tm->tm_year < 70)
424 /* we don't support years before 1970 as they will cause this function
425 to return a negative value */
426 return -1;
427
428 year = tm->tm_year + 1900;
429 month_ = tm->tm_mon;
430 if(month_ < 0) {
431 year += (11 - month_) / 12;
432 month_ = 11 - (11 - month_) % 12;
433 }
434 else if(month_ >= 12) {
435 year -= month_ / 12;
436 month_ = month_ % 12;
437 }
438
439 leap_days = year - (tm->tm_mon <= 1);
440 leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400)
441 - (1969 / 4) + (1969 / 100) - (1969 / 400));
442
443 return ((((time_t) (year - 1970) * 365
444 + leap_days + month_days_cumulative [month_] + tm->tm_mday - 1) * 24
445 + tm->tm_hour) * 60 + tm->tm_min) * 60 + tm->tm_sec;
446 }
447
448 /*
449 * parsedate()
450 *
451 * Returns:
452 *
453 * PARSEDATE_OK - a fine conversion
454 * PARSEDATE_FAIL - failed to convert
455 * PARSEDATE_LATER - time overflow at the far end of time_t
456 * PARSEDATE_SOONER - time underflow at the low end of time_t
457 */
458
parsedate(const char * date,time_t * output)459 static int parsedate(const char *date, time_t *output)
460 {
461 time_t t = 0;
462 int wdaynum=-1; /* day of the week number, 0-6 (mon-sun) */
463 int monnum=-1; /* month of the year number, 0-11 */
464 int mdaynum=-1; /* day of month, 1 - 31 */
465 int hournum=-1;
466 int minnum=-1;
467 int secnum=-1;
468 int yearnum=-1;
469 int tzoff=-1;
470 struct my_tm tm;
471 enum assume dignext = DATE_MDAY;
472 const char *indate = date; /* save the original pointer */
473 int part = 0; /* max 6 parts */
474
475 while(*date && (part < 6)) {
476 bool found=FALSE;
477
478 skip(&date);
479
480 if(ISALPHA(*date)) {
481 /* a name coming up */
482 char buf[32]="";
483 size_t len;
484 if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ"
485 "abcdefghijklmnopqrstuvwxyz]", buf))
486 len = strlen(buf);
487 else
488 len = 0;
489
490 if(wdaynum == -1) {
491 wdaynum = checkday(buf, len);
492 if(wdaynum != -1)
493 found = TRUE;
494 }
495 if(!found && (monnum == -1)) {
496 monnum = checkmonth(buf);
497 if(monnum != -1)
498 found = TRUE;
499 }
500
501 if(!found && (tzoff == -1)) {
502 /* this just must be a time zone string */
503 tzoff = checktz(buf);
504 if(tzoff != -1)
505 found = TRUE;
506 }
507
508 if(!found)
509 return PARSEDATE_FAIL; /* bad string */
510
511 date += len;
512 }
513 else if(ISDIGIT(*date)) {
514 /* a digit */
515 int val;
516 char *end;
517 if((secnum == -1) &&
518 (3 == sscanf(date, "%02d:%02d:%02d", &hournum, &minnum, &secnum))) {
519 /* time stamp! */
520 date += 8;
521 }
522 else if((secnum == -1) &&
523 (2 == sscanf(date, "%02d:%02d", &hournum, &minnum))) {
524 /* time stamp without seconds */
525 date += 5;
526 secnum = 0;
527 }
528 else {
529 long lval;
530 int error;
531 int old_errno;
532
533 old_errno = ERRNO;
534 SET_ERRNO(0);
535 lval = strtol(date, &end, 10);
536 error = ERRNO;
537 if(error != old_errno)
538 SET_ERRNO(old_errno);
539
540 if(error)
541 return PARSEDATE_FAIL;
542
543 #if LONG_MAX != INT_MAX
544 if((lval > (long)INT_MAX) || (lval < (long)INT_MIN))
545 return PARSEDATE_FAIL;
546 #endif
547
548 val = clamp_to_int(lval);
549
550 if((tzoff == -1) &&
551 ((end - date) == 4) &&
552 (val <= 1400) &&
553 (indate< date) &&
554 ((date[-1] == '+' || date[-1] == '-'))) {
555 /* four digits and a value less than or equal to 1400 (to take into
556 account all sorts of funny time zone diffs) and it is preceded
557 with a plus or minus. This is a time zone indication. 1400 is
558 picked since +1300 is frequently used and +1400 is mentioned as
559 an edge number in the document "ISO C 200X Proposal: Timezone
560 Functions" at http://david.tribble.com/text/c0xtimezone.html If
561 anyone has a more authoritative source for the exact maximum time
562 zone offsets, please speak up! */
563 found = TRUE;
564 tzoff = (val/100 * 60 + val%100)*60;
565
566 /* the + and - prefix indicates the local time compared to GMT,
567 this we need ther reversed math to get what we want */
568 tzoff = date[-1]=='+'?-tzoff:tzoff;
569 }
570
571 if(((end - date) == 8) &&
572 (yearnum == -1) &&
573 (monnum == -1) &&
574 (mdaynum == -1)) {
575 /* 8 digits, no year, month or day yet. This is YYYYMMDD */
576 found = TRUE;
577 yearnum = val/10000;
578 monnum = (val%10000)/100-1; /* month is 0 - 11 */
579 mdaynum = val%100;
580 }
581
582 if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) {
583 if((val > 0) && (val<32)) {
584 mdaynum = val;
585 found = TRUE;
586 }
587 dignext = DATE_YEAR;
588 }
589
590 if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) {
591 yearnum = val;
592 found = TRUE;
593 if(yearnum < 1900) {
594 if(yearnum > 70)
595 yearnum += 1900;
596 else
597 yearnum += 2000;
598 }
599 if(mdaynum == -1)
600 dignext = DATE_MDAY;
601 }
602
603 if(!found)
604 return PARSEDATE_FAIL;
605
606 date = end;
607 }
608 }
609
610 part++;
611 }
612
613 if(-1 == secnum)
614 secnum = minnum = hournum = 0; /* no time, make it zero */
615
616 if((-1 == mdaynum) ||
617 (-1 == monnum) ||
618 (-1 == yearnum))
619 /* lacks vital info, fail */
620 return PARSEDATE_FAIL;
621
622 #if SIZEOF_TIME_T < 5
623 /* 32 bit time_t can only hold dates to the beginning of 2038 */
624 if(yearnum > 2037) {
625 *output = 0x7fffffff;
626 return PARSEDATE_LATER;
627 }
628 #endif
629
630 if(yearnum < 1970) {
631 *output = 0;
632 return PARSEDATE_SOONER;
633 }
634
635 if((mdaynum > 31) || (monnum > 11) ||
636 (hournum > 23) || (minnum > 59) || (secnum > 60))
637 return PARSEDATE_FAIL; /* clearly an illegal date */
638
639 tm.tm_sec = secnum;
640 tm.tm_min = minnum;
641 tm.tm_hour = hournum;
642 tm.tm_mday = mdaynum;
643 tm.tm_mon = monnum;
644 tm.tm_year = yearnum - 1900;
645
646 /* my_timegm() returns a time_t. time_t is often 32 bits, even on many
647 architectures that feature 64 bit 'long'.
648
649 Some systems have 64 bit time_t and deal with years beyond 2038. However,
650 even on some of the systems with 64 bit time_t mktime() returns -1 for
651 dates beyond 03:14:07 UTC, January 19, 2038. (Such as AIX 5100-06)
652 */
653 t = my_timegm(&tm);
654
655 /* time zone adjust (cast t to int to compare to negative one) */
656 if(-1 != (int)t) {
657
658 /* Add the time zone diff between local time zone and GMT. */
659 long delta = (long)(tzoff!=-1?tzoff:0);
660
661 if((delta>0) && (t > LONG_MAX - delta))
662 return -1; /* time_t overflow */
663
664 t += delta;
665 }
666
667 *output = t;
668
669 return PARSEDATE_OK;
670 }
671
parse_date(const char * p)672 time_t parse_date(const char *p)
673 {
674 time_t parsed = -1;
675 int rc = parsedate(p, &parsed);
676
677 switch(rc) {
678 case PARSEDATE_OK:
679 case PARSEDATE_LATER:
680 case PARSEDATE_SOONER:
681 return parsed;
682 }
683 /* everything else is fail */
684 return -1;
685 }
686
687 #ifdef __cplusplus
688 }
689 #endif
690