1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at http://curl.haxx.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  ***************************************************************************/
22 /*
23   A brief summary of the date string formats this parser groks:
24 
25   RFC 2616 3.3.1
26 
27   Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
28   Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
29   Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
30 
31   we support dates without week day name:
32 
33   06 Nov 1994 08:49:37 GMT
34   06-Nov-94 08:49:37 GMT
35   Nov  6 08:49:37 1994
36 
37   without the time zone:
38 
39   06 Nov 1994 08:49:37
40   06-Nov-94 08:49:37
41 
42   weird order:
43 
44   1994 Nov 6 08:49:37  (GNU date fails)
45   GMT 08:49:37 06-Nov-94 Sunday
46   94 6 Nov 08:49:37    (GNU date fails)
47 
48   time left out:
49 
50   1994 Nov 6
51   06-Nov-94
52   Sun Nov 6 94
53 
54   unusual separators:
55 
56   1994.Nov.6
57   Sun/Nov/6/94/GMT
58 
59   commonly used time zone names:
60 
61   Sun, 06 Nov 1994 08:49:37 CET
62   06 Nov 1994 08:49:37 EST
63 
64   time zones specified using RFC822 style:
65 
66   Sun, 12 Sep 2004 15:05:58 -0700
67   Sat, 11 Sep 2004 21:32:11 +0200
68 
69   compact numerical date strings:
70 
71   20040912 15:05:58 -0700
72   20040911 +0200
73 
74 */
75 
76 #include "parsedate.hpp"
77 
78 
79 
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83 
84 #include <limits.h>
85 #include <stdbool.h>
86 #include <errno.h>
87 #include <string.h>
88 #include <ctype.h>
89 #include <stdlib.h>
90 #include <stdio.h>
91 
92 
93 #define ERRNO         (errno)
94 #define SET_ERRNO(x)  (errno = (x))
95 
96 
97 /* Portable, consistent toupper (remember EBCDIC). Do not use toupper() because
98    its behavior is altered by the current locale. */
raw_toupper(char in)99 char raw_toupper(char in)
100 {
101   switch (in) {
102   case 'a':
103     return 'A';
104   case 'b':
105     return 'B';
106   case 'c':
107     return 'C';
108   case 'd':
109     return 'D';
110   case 'e':
111     return 'E';
112   case 'f':
113     return 'F';
114   case 'g':
115     return 'G';
116   case 'h':
117     return 'H';
118   case 'i':
119     return 'I';
120   case 'j':
121     return 'J';
122   case 'k':
123     return 'K';
124   case 'l':
125     return 'L';
126   case 'm':
127     return 'M';
128   case 'n':
129     return 'N';
130   case 'o':
131     return 'O';
132   case 'p':
133     return 'P';
134   case 'q':
135     return 'Q';
136   case 'r':
137     return 'R';
138   case 's':
139     return 'S';
140   case 't':
141     return 'T';
142   case 'u':
143     return 'U';
144   case 'v':
145     return 'V';
146   case 'w':
147     return 'W';
148   case 'x':
149     return 'X';
150   case 'y':
151     return 'Y';
152   case 'z':
153     return 'Z';
154   }
155   return in;
156 }
157 
158 /*
159  * raw_equal() is for doing "raw" case insensitive strings. This is meant
160  * to be locale independent and only compare strings we know are safe for
161  * this.  See http://daniel.haxx.se/blog/2008/10/15/strcasecmp-in-turkish/ for
162  * some further explanation to why this function is necessary.
163  *
164  * The function is capable of comparing a-z case insensitively even for
165  * non-ascii.
166  */
167 
raw_equal(const char * first,const char * second)168 int raw_equal(const char *first, const char *second)
169 {
170   while(*first && *second) {
171     if(raw_toupper(*first) != raw_toupper(*second))
172       /* get out of the loop as soon as they don't match */
173       break;
174     first++;
175     second++;
176   }
177   /* we do the comparison here (possibly again), just to make sure that if the
178      loop above is skipped because one of the strings reached zero, we must not
179      return this as a successful match */
180   return (raw_toupper(*first) == raw_toupper(*second));
181 }
182 
183 #define ISSPACE(x)  (isspace((int)  ((unsigned char)x)))
184 #define ISDIGIT(x)  (isdigit((int)  ((unsigned char)x)))
185 #define ISALNUM(x)  (isalnum((int)  ((unsigned char)x)))
186 #define ISALPHA(x)  (isalpha((int)  ((unsigned char)x)))
187 
188 
189 /*
190  * Redefine TRUE and FALSE too, to catch current use. With this
191  * change, 'bool found = 1' will give a warning on MIPSPro, but
192  * 'bool found = TRUE' will not. Change tested on IRIX/MIPSPro,
193  * AIX 5.1/Xlc, Tru64 5.1/cc, w/make test too.
194  */
195 
196 #ifndef TRUE
197 #define TRUE true
198 #endif
199 #ifndef FALSE
200 #define FALSE false
201 #endif
202 
203 
204 
205 /*
206 ** signed long to signed int
207 */
208 
clamp_to_int(long slnum)209 int clamp_to_int(long slnum)
210 {
211   return slnum > INT_MAX ? INT_MAX : slnum < INT_MIN ? INT_MIN : (int)slnum;
212 }
213 
214 
215 const char * const wkday[] =
216 {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
217 static const char * const weekday[] =
218 { "Monday", "Tuesday", "Wednesday", "Thursday",
219   "Friday", "Saturday", "Sunday" };
220 const char * const month[]=
221 { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
222   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
223 
224 struct tzinfo {
225   char name[5];
226   int offset; /* +/- in minutes */
227 };
228 
229 /*
230  * parsedate()
231  *
232  * Returns:
233  *
234  * PARSEDATE_OK     - a fine conversion
235  * PARSEDATE_FAIL   - failed to convert
236  * PARSEDATE_LATER  - time overflow at the far end of time_t
237  * PARSEDATE_SOONER - time underflow at the low end of time_t
238  */
239 
240 static int parsedate(const char *date, time_t *output);
241 
242 #define PARSEDATE_OK     0
243 #define PARSEDATE_FAIL   -1
244 #define PARSEDATE_LATER  1
245 #define PARSEDATE_SOONER 2
246 
247 /* Here's a bunch of frequently used time zone names. These were supported
248    by the old getdate parser. */
249 #define tDAYZONE -60       /* offset for daylight savings time */
250 static const struct tzinfo tz[]= {
251   {"GMT", 0},              /* Greenwich Mean */
252   {"UTC", 0},              /* Universal (Coordinated) */
253   {"WET", 0},              /* Western European */
254   {"BST", 0 tDAYZONE},     /* British Summer */
255   {"WAT", 60},             /* West Africa */
256   {"AST", 240},            /* Atlantic Standard */
257   {"ADT", 240 tDAYZONE},   /* Atlantic Daylight */
258   {"EST", 300},            /* Eastern Standard */
259   {"EDT", 300 tDAYZONE},   /* Eastern Daylight */
260   {"CST", 360},            /* Central Standard */
261   {"CDT", 360 tDAYZONE},   /* Central Daylight */
262   {"MST", 420},            /* Mountain Standard */
263   {"MDT", 420 tDAYZONE},   /* Mountain Daylight */
264   {"PST", 480},            /* Pacific Standard */
265   {"PDT", 480 tDAYZONE},   /* Pacific Daylight */
266   {"YST", 540},            /* Yukon Standard */
267   {"YDT", 540 tDAYZONE},   /* Yukon Daylight */
268   {"HST", 600},            /* Hawaii Standard */
269   {"HDT", 600 tDAYZONE},   /* Hawaii Daylight */
270   {"CAT", 600},            /* Central Alaska */
271   {"AHST", 600},           /* Alaska-Hawaii Standard */
272   {"NT",  660},            /* Nome */
273   {"IDLW", 720},           /* International Date Line West */
274   {"CET", -60},            /* Central European */
275   {"MET", -60},            /* Middle European */
276   {"MEWT", -60},           /* Middle European Winter */
277   {"MEST", -60 tDAYZONE},  /* Middle European Summer */
278   {"CEST", -60 tDAYZONE},  /* Central European Summer */
279   {"MESZ", -60 tDAYZONE},  /* Middle European Summer */
280   {"FWT", -60},            /* French Winter */
281   {"FST", -60 tDAYZONE},   /* French Summer */
282   {"EET", -120},           /* Eastern Europe, USSR Zone 1 */
283   {"WAST", -420},          /* West Australian Standard */
284   {"WADT", -420 tDAYZONE}, /* West Australian Daylight */
285   {"CCT", -480},           /* China Coast, USSR Zone 7 */
286   {"JST", -540},           /* Japan Standard, USSR Zone 8 */
287   {"EAST", -600},          /* Eastern Australian Standard */
288   {"EADT", -600 tDAYZONE}, /* Eastern Australian Daylight */
289   {"GST", -600},           /* Guam Standard, USSR Zone 9 */
290   {"NZT", -720},           /* New Zealand */
291   {"NZST", -720},          /* New Zealand Standard */
292   {"NZDT", -720 tDAYZONE}, /* New Zealand Daylight */
293   {"IDLE", -720},          /* International Date Line East */
294   /* Next up: Military timezone names. RFC822 allowed these, but (as noted in
295      RFC 1123) had their signs wrong. Here we use the correct signs to match
296      actual military usage.
297    */
298   {"A",  +1 * 60},         /* Alpha */
299   {"B",  +2 * 60},         /* Bravo */
300   {"C",  +3 * 60},         /* Charlie */
301   {"D",  +4 * 60},         /* Delta */
302   {"E",  +5 * 60},         /* Echo */
303   {"F",  +6 * 60},         /* Foxtrot */
304   {"G",  +7 * 60},         /* Golf */
305   {"H",  +8 * 60},         /* Hotel */
306   {"I",  +9 * 60},         /* India */
307   /* "J", Juliet is not used as a timezone, to indicate the observer's local
308      time */
309   {"K", +10 * 60},         /* Kilo */
310   {"L", +11 * 60},         /* Lima */
311   {"M", +12 * 60},         /* Mike */
312   {"N",  -1 * 60},         /* November */
313   {"O",  -2 * 60},         /* Oscar */
314   {"P",  -3 * 60},         /* Papa */
315   {"Q",  -4 * 60},         /* Quebec */
316   {"R",  -5 * 60},         /* Romeo */
317   {"S",  -6 * 60},         /* Sierra */
318   {"T",  -7 * 60},         /* Tango */
319   {"U",  -8 * 60},         /* Uniform */
320   {"V",  -9 * 60},         /* Victor */
321   {"W", -10 * 60},         /* Whiskey */
322   {"X", -11 * 60},         /* X-ray */
323   {"Y", -12 * 60},         /* Yankee */
324   {"Z", 0},                /* Zulu, zero meridian, a.k.a. UTC */
325 };
326 
327 /* returns:
328    -1 no day
329    0 monday - 6 sunday
330 */
331 
checkday(const char * check,size_t len)332 static int checkday(const char *check, size_t len)
333 {
334   int i;
335   const char * const *what;
336   bool found= FALSE;
337   if(len > 3)
338     what = &weekday[0];
339   else
340     what = &wkday[0];
341   for(i=0; i<7; i++) {
342     if(raw_equal(check, what[0])) {
343       found=TRUE;
344       break;
345     }
346     what++;
347   }
348   return found?i:-1;
349 }
350 
checkmonth(const char * check)351 static int checkmonth(const char *check)
352 {
353   int i;
354   const char * const *what;
355   bool found= FALSE;
356 
357   what = &month[0];
358   for(i=0; i<12; i++) {
359     if(raw_equal(check, what[0])) {
360       found=TRUE;
361       break;
362     }
363     what++;
364   }
365   return found?i:-1; /* return the offset or -1, no real offset is -1 */
366 }
367 
368 /* return the time zone offset between GMT and the input one, in number
369    of seconds or -1 if the timezone wasn't found/legal */
370 
checktz(const char * check)371 static int checktz(const char *check)
372 {
373   unsigned int i;
374   const struct tzinfo *what;
375   bool found= FALSE;
376 
377   what = tz;
378   for(i=0; i< sizeof(tz)/sizeof(tz[0]); i++) {
379     if(raw_equal(check, what->name)) {
380       found=TRUE;
381       break;
382     }
383     what++;
384   }
385   return found?what->offset*60:-1;
386 }
387 
skip(const char ** date)388 static void skip(const char **date)
389 {
390   /* skip everything that aren't letters or digits */
391   while(**date && !ISALNUM(**date))
392     (*date)++;
393 }
394 
395 enum assume {
396   DATE_MDAY,
397   DATE_YEAR,
398   DATE_TIME
399 };
400 
401 /* this is a clone of 'struct tm' but with all fields we don't need or use
402    cut out */
403 struct my_tm {
404   int tm_sec;
405   int tm_min;
406   int tm_hour;
407   int tm_mday;
408   int tm_mon;
409   int tm_year;
410 };
411 
412 /* struct tm to time since epoch in GMT time zone.
413  * This is similar to the standard mktime function but for GMT only, and
414  * doesn't suffer from the various bugs and portability problems that
415  * some systems' implementations have.
416  */
my_timegm(struct my_tm * tm)417 static time_t my_timegm(struct my_tm *tm)
418 {
419   static const int month_days_cumulative [12] =
420     { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
421   int month_, year, leap_days;
422 
423   if(tm->tm_year < 70)
424     /* we don't support years before 1970 as they will cause this function
425        to return a negative value */
426     return -1;
427 
428   year = tm->tm_year + 1900;
429   month_ = tm->tm_mon;
430   if(month_ < 0) {
431     year += (11 - month_) / 12;
432     month_ = 11 - (11 - month_) % 12;
433   }
434   else if(month_ >= 12) {
435     year -= month_ / 12;
436     month_ = month_ % 12;
437   }
438 
439   leap_days = year - (tm->tm_mon <= 1);
440   leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400)
441                - (1969 / 4) + (1969 / 100) - (1969 / 400));
442 
443   return ((((time_t) (year - 1970) * 365
444             + leap_days + month_days_cumulative [month_] + tm->tm_mday - 1) * 24
445            + tm->tm_hour) * 60 + tm->tm_min) * 60 + tm->tm_sec;
446 }
447 
448 /*
449  * parsedate()
450  *
451  * Returns:
452  *
453  * PARSEDATE_OK     - a fine conversion
454  * PARSEDATE_FAIL   - failed to convert
455  * PARSEDATE_LATER  - time overflow at the far end of time_t
456  * PARSEDATE_SOONER - time underflow at the low end of time_t
457  */
458 
parsedate(const char * date,time_t * output)459 static int parsedate(const char *date, time_t *output)
460 {
461   time_t t = 0;
462   int wdaynum=-1;  /* day of the week number, 0-6 (mon-sun) */
463   int monnum=-1;   /* month of the year number, 0-11 */
464   int mdaynum=-1; /* day of month, 1 - 31 */
465   int hournum=-1;
466   int minnum=-1;
467   int secnum=-1;
468   int yearnum=-1;
469   int tzoff=-1;
470   struct my_tm tm;
471   enum assume dignext = DATE_MDAY;
472   const char *indate = date; /* save the original pointer */
473   int part = 0; /* max 6 parts */
474 
475   while(*date && (part < 6)) {
476     bool found=FALSE;
477 
478     skip(&date);
479 
480     if(ISALPHA(*date)) {
481       /* a name coming up */
482       char buf[32]="";
483       size_t len;
484       if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ"
485                           "abcdefghijklmnopqrstuvwxyz]", buf))
486         len = strlen(buf);
487       else
488         len = 0;
489 
490       if(wdaynum == -1) {
491         wdaynum = checkday(buf, len);
492         if(wdaynum != -1)
493           found = TRUE;
494       }
495       if(!found && (monnum == -1)) {
496         monnum = checkmonth(buf);
497         if(monnum != -1)
498           found = TRUE;
499       }
500 
501       if(!found && (tzoff == -1)) {
502         /* this just must be a time zone string */
503         tzoff = checktz(buf);
504         if(tzoff != -1)
505           found = TRUE;
506       }
507 
508       if(!found)
509         return PARSEDATE_FAIL; /* bad string */
510 
511       date += len;
512     }
513     else if(ISDIGIT(*date)) {
514       /* a digit */
515       int val;
516       char *end;
517       if((secnum == -1) &&
518          (3 == sscanf(date, "%02d:%02d:%02d", &hournum, &minnum, &secnum))) {
519         /* time stamp! */
520         date += 8;
521       }
522       else if((secnum == -1) &&
523               (2 == sscanf(date, "%02d:%02d", &hournum, &minnum))) {
524         /* time stamp without seconds */
525         date += 5;
526         secnum = 0;
527       }
528       else {
529         long lval;
530         int error;
531         int old_errno;
532 
533         old_errno = ERRNO;
534         SET_ERRNO(0);
535         lval = strtol(date, &end, 10);
536         error = ERRNO;
537         if(error != old_errno)
538           SET_ERRNO(old_errno);
539 
540         if(error)
541           return PARSEDATE_FAIL;
542 
543 #if LONG_MAX != INT_MAX
544         if((lval > (long)INT_MAX) || (lval < (long)INT_MIN))
545           return PARSEDATE_FAIL;
546 #endif
547 
548         val = clamp_to_int(lval);
549 
550         if((tzoff == -1) &&
551            ((end - date) == 4) &&
552            (val <= 1400) &&
553            (indate< date) &&
554            ((date[-1] == '+' || date[-1] == '-'))) {
555           /* four digits and a value less than or equal to 1400 (to take into
556              account all sorts of funny time zone diffs) and it is preceded
557              with a plus or minus. This is a time zone indication.  1400 is
558              picked since +1300 is frequently used and +1400 is mentioned as
559              an edge number in the document "ISO C 200X Proposal: Timezone
560              Functions" at http://david.tribble.com/text/c0xtimezone.html If
561              anyone has a more authoritative source for the exact maximum time
562              zone offsets, please speak up! */
563           found = TRUE;
564           tzoff = (val/100 * 60 + val%100)*60;
565 
566           /* the + and - prefix indicates the local time compared to GMT,
567              this we need ther reversed math to get what we want */
568           tzoff = date[-1]=='+'?-tzoff:tzoff;
569         }
570 
571         if(((end - date) == 8) &&
572            (yearnum == -1) &&
573            (monnum == -1) &&
574            (mdaynum == -1)) {
575           /* 8 digits, no year, month or day yet. This is YYYYMMDD */
576           found = TRUE;
577           yearnum = val/10000;
578           monnum = (val%10000)/100-1; /* month is 0 - 11 */
579           mdaynum = val%100;
580         }
581 
582         if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) {
583           if((val > 0) && (val<32)) {
584             mdaynum = val;
585             found = TRUE;
586           }
587           dignext = DATE_YEAR;
588         }
589 
590         if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) {
591           yearnum = val;
592           found = TRUE;
593           if(yearnum < 1900) {
594             if(yearnum > 70)
595               yearnum += 1900;
596             else
597               yearnum += 2000;
598           }
599           if(mdaynum == -1)
600             dignext = DATE_MDAY;
601         }
602 
603         if(!found)
604           return PARSEDATE_FAIL;
605 
606         date = end;
607       }
608     }
609 
610     part++;
611   }
612 
613   if(-1 == secnum)
614     secnum = minnum = hournum = 0; /* no time, make it zero */
615 
616   if((-1 == mdaynum) ||
617      (-1 == monnum) ||
618      (-1 == yearnum))
619     /* lacks vital info, fail */
620     return PARSEDATE_FAIL;
621 
622 #if SIZEOF_TIME_T < 5
623   /* 32 bit time_t can only hold dates to the beginning of 2038 */
624   if(yearnum > 2037) {
625     *output = 0x7fffffff;
626     return PARSEDATE_LATER;
627   }
628 #endif
629 
630   if(yearnum < 1970) {
631     *output = 0;
632     return PARSEDATE_SOONER;
633   }
634 
635   if((mdaynum > 31) || (monnum > 11) ||
636      (hournum > 23) || (minnum > 59) || (secnum > 60))
637     return PARSEDATE_FAIL; /* clearly an illegal date */
638 
639   tm.tm_sec = secnum;
640   tm.tm_min = minnum;
641   tm.tm_hour = hournum;
642   tm.tm_mday = mdaynum;
643   tm.tm_mon = monnum;
644   tm.tm_year = yearnum - 1900;
645 
646   /* my_timegm() returns a time_t. time_t is often 32 bits, even on many
647      architectures that feature 64 bit 'long'.
648 
649      Some systems have 64 bit time_t and deal with years beyond 2038. However,
650      even on some of the systems with 64 bit time_t mktime() returns -1 for
651      dates beyond 03:14:07 UTC, January 19, 2038. (Such as AIX 5100-06)
652   */
653   t = my_timegm(&tm);
654 
655   /* time zone adjust (cast t to int to compare to negative one) */
656   if(-1 != (int)t) {
657 
658     /* Add the time zone diff between local time zone and GMT. */
659     long delta = (long)(tzoff!=-1?tzoff:0);
660 
661     if((delta>0) && (t > LONG_MAX  - delta))
662       return -1; /* time_t overflow */
663 
664     t += delta;
665   }
666 
667   *output = t;
668 
669   return PARSEDATE_OK;
670 }
671 
parse_date(const char * p)672 time_t parse_date(const char *p)
673 {
674   time_t parsed = -1;
675   int rc = parsedate(p, &parsed);
676 
677   switch(rc) {
678   case PARSEDATE_OK:
679   case PARSEDATE_LATER:
680   case PARSEDATE_SOONER:
681     return parsed;
682   }
683   /* everything else is fail */
684   return -1;
685 }
686 
687 #ifdef __cplusplus
688 }
689 #endif
690