mirror of
https://github.com/Ranchero-Software/NetNewsWire.git
synced 2025-01-10 17:02:54 +01:00
462 lines
14 KiB
Objective-C
Executable File
462 lines
14 KiB
Objective-C
Executable File
//
|
|
// RSDateParser.m
|
|
// RSParser
|
|
//
|
|
// Created by Brent Simmons on 3/25/15.
|
|
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
|
//
|
|
|
|
|
|
#import "RSDateParser.h"
|
|
#import <time.h>
|
|
|
|
|
|
typedef struct {
|
|
const char *abbreviation;
|
|
const NSInteger offsetHours;
|
|
const NSInteger offsetMinutes;
|
|
} RSTimeZoneAbbreviationAndOffset;
|
|
|
|
|
|
#define kNumberOfTimeZones 96
|
|
|
|
static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = {
|
|
{"GMT", 0, 0}, //Most common at top, for performance
|
|
{"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0},
|
|
{"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0},
|
|
{"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0},
|
|
{"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0},
|
|
{"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0},
|
|
{"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0},
|
|
{"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30},
|
|
{"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0},
|
|
{"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0},
|
|
{"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0},
|
|
{"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0},
|
|
{"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0},
|
|
{"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0},
|
|
{"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0},
|
|
{"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0},
|
|
{"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30},
|
|
{"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0},
|
|
{"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45},
|
|
{"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0},
|
|
{"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0},
|
|
{"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0},
|
|
{"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0},
|
|
{"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0},
|
|
{"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0}
|
|
}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/
|
|
|
|
|
|
|
|
#pragma mark - Parser
|
|
|
|
enum {
|
|
RSJanuary = 1,
|
|
RSFebruary,
|
|
RSMarch,
|
|
RSApril,
|
|
RSMay,
|
|
RSJune,
|
|
RSJuly,
|
|
RSAugust,
|
|
RSSeptember,
|
|
RSOctober,
|
|
RSNovember,
|
|
RSDecember
|
|
};
|
|
|
|
static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) {
|
|
|
|
/*Months are 1-based -- January is 1, Dec is 12.
|
|
Lots of short-circuits here. Not strict. GIGO.*/
|
|
|
|
NSUInteger i;// = startingIndex;
|
|
NSUInteger numberOfAlphaCharactersFound = 0;
|
|
char monthCharacters[3] = {0, 0, 0};
|
|
|
|
for (i = startingIndex; i < numberOfBytes; i++) {
|
|
|
|
*finalIndex = i;
|
|
char character = bytes[i];
|
|
|
|
BOOL isAlphaCharacter = (BOOL)isalpha(character);
|
|
if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1)
|
|
continue;
|
|
if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0)
|
|
break;
|
|
|
|
numberOfAlphaCharactersFound++;
|
|
if (numberOfAlphaCharactersFound == 1) {
|
|
if (character == 'F' || character == 'f')
|
|
return RSFebruary;
|
|
if (character == 'S' || character == 's')
|
|
return RSSeptember;
|
|
if (character == 'O' || character == 'o')
|
|
return RSOctober;
|
|
if (character == 'N' || character == 'n')
|
|
return RSNovember;
|
|
if (character == 'D' || character == 'd')
|
|
return RSDecember;
|
|
}
|
|
|
|
monthCharacters[numberOfAlphaCharactersFound - 1] = character;
|
|
if (numberOfAlphaCharactersFound >=3)
|
|
break;
|
|
}
|
|
|
|
if (numberOfAlphaCharactersFound < 2)
|
|
return NSNotFound;
|
|
|
|
if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul
|
|
if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A')
|
|
return RSJanuary;
|
|
if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') {
|
|
if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N')
|
|
return RSJune;
|
|
return RSJuly;
|
|
}
|
|
return RSJanuary;
|
|
}
|
|
|
|
if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May
|
|
if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y')
|
|
return RSMay;
|
|
return RSMarch;
|
|
}
|
|
|
|
if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August
|
|
if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U')
|
|
return RSAugust;
|
|
return RSApril;
|
|
}
|
|
|
|
return RSJanuary; //should never get here
|
|
}
|
|
|
|
|
|
static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) {
|
|
|
|
/*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years).
|
|
*finalIndex will be the index of the last character looked at.*/
|
|
|
|
if (maximumNumberOfDigits > 4)
|
|
maximumNumberOfDigits = 4;
|
|
|
|
NSUInteger i = 0;
|
|
NSUInteger numberOfDigitsFound = 0;
|
|
NSInteger digits[4] = {0, 0, 0, 0};
|
|
|
|
for (i = startingIndex; i < numberOfBytes; i++) {
|
|
*finalIndex = i;
|
|
BOOL isDigit = (BOOL)isdigit(bytes[i]);
|
|
if (!isDigit && numberOfDigitsFound < 1)
|
|
continue;
|
|
if (!isDigit && numberOfDigitsFound > 0)
|
|
break;
|
|
digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48
|
|
numberOfDigitsFound++;
|
|
if (numberOfDigitsFound >= maximumNumberOfDigits)
|
|
break;
|
|
}
|
|
|
|
if (numberOfDigitsFound < 1)
|
|
return NSNotFound;
|
|
if (numberOfDigitsFound == 1)
|
|
return digits[0];
|
|
if (numberOfDigitsFound == 2)
|
|
return (digits[0] * 10) + digits[1];
|
|
if (numberOfDigitsFound == 3)
|
|
return (digits[0] * 100) + (digits[1] * 10) + digits[2];
|
|
return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3];
|
|
}
|
|
|
|
|
|
static BOOL hasAtLeastOneAlphaCharacter(const char *s) {
|
|
|
|
NSUInteger length = strlen(s);
|
|
NSUInteger i = 0;
|
|
|
|
for (i = 0; i < length; i++) {
|
|
if (isalpha(s[i]))
|
|
return YES;
|
|
}
|
|
|
|
return NO;
|
|
}
|
|
|
|
|
|
#pragma mark - Time Zones and offsets
|
|
|
|
static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) {
|
|
|
|
/*Linear search should be fine. It's a C array, and short (under 100 items).
|
|
Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/
|
|
|
|
NSUInteger i;
|
|
|
|
for (i = 0; i < kNumberOfTimeZones; i++) {
|
|
|
|
RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i];
|
|
if (strcmp(abbreviation, zone.abbreviation) == 0) {
|
|
if (zone.offsetHours < 0)
|
|
return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60);
|
|
return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) {
|
|
|
|
BOOL isPlus = timeZoneCharacters[0] == '+';
|
|
NSUInteger finalIndex = 0;
|
|
NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex);
|
|
NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex);
|
|
|
|
if (hours == NSNotFound)
|
|
hours = 0;
|
|
if (minutes == NSNotFound)
|
|
minutes = 0;
|
|
if (hours == 0 && minutes == 0)
|
|
return 0;
|
|
|
|
NSInteger seconds = (hours * 60 * 60) + (minutes * 60);
|
|
if (!isPlus)
|
|
seconds = 0 - seconds;
|
|
return seconds;
|
|
}
|
|
|
|
|
|
static const char *rs_GMT = "GMT";
|
|
static const char *rs_UTC = "UTC";
|
|
|
|
static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) {
|
|
|
|
/*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST
|
|
Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT.
|
|
If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/
|
|
|
|
char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character
|
|
NSUInteger i = 0;
|
|
NSUInteger numberOfCharactersFound = 0;
|
|
|
|
for (i = startingIndex; i < numberOfBytes; i++) {
|
|
char ch = bytes[i];
|
|
if (ch == ':' || ch == ' ')
|
|
continue;
|
|
if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') {
|
|
numberOfCharactersFound++;
|
|
timeZoneCharacters[numberOfCharactersFound - 1] = ch;
|
|
}
|
|
if (numberOfCharactersFound >= 5)
|
|
break;
|
|
}
|
|
|
|
if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z')
|
|
return 0;
|
|
if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC))
|
|
return 0;
|
|
|
|
if (hasAtLeastOneAlphaCharacter(timeZoneCharacters))
|
|
return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters);
|
|
return offsetInSecondsForOffsetCharacters(timeZoneCharacters);
|
|
}
|
|
|
|
|
|
#pragma mark - Date Creation
|
|
|
|
static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) {
|
|
|
|
struct tm timeInfo;
|
|
timeInfo.tm_sec = (int)second;
|
|
timeInfo.tm_min = (int)minute;
|
|
timeInfo.tm_hour = (int)hour;
|
|
timeInfo.tm_mday = (int)day;
|
|
timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in
|
|
timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900
|
|
timeInfo.tm_wday = -1;
|
|
timeInfo.tm_yday = -1;
|
|
timeInfo.tm_isdst = -1;
|
|
timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT];
|
|
timeInfo.tm_zone = nil;
|
|
|
|
NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone)
|
|
if (rawTime == (time_t)ULONG_MAX) {
|
|
|
|
/*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/
|
|
|
|
NSDateComponents *dateComponents = [NSDateComponents new];
|
|
|
|
dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset];
|
|
dateComponents.year = year;
|
|
dateComponents.month = month;
|
|
dateComponents.day = day;
|
|
dateComponents.hour = hour;
|
|
dateComponents.minute = minute;
|
|
dateComponents.second = second + (milliseconds / 1000);
|
|
|
|
return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents];
|
|
}
|
|
|
|
if (milliseconds > 0) {
|
|
rawTime += ((float)milliseconds / 1000.0f);
|
|
}
|
|
|
|
return [NSDate dateWithTimeIntervalSince1970:rawTime];
|
|
}
|
|
|
|
|
|
#pragma mark - Standard Formats
|
|
|
|
static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) {
|
|
|
|
/*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ"
|
|
@"EEE, dd MMM yyyy HH:mm:ss zzz"
|
|
@"dd MMM yyyy HH:mm zzz"
|
|
@"dd MMM yyyy HH:mm ZZZ"
|
|
@"EEE, dd MMM yyyy"
|
|
@"EEE, dd MMM yyyy HH:mm zzz"
|
|
etc.*/
|
|
|
|
NSUInteger finalIndex = 0;
|
|
NSInteger day = 1;
|
|
NSInteger month = RSJanuary;
|
|
NSInteger year = 1970;
|
|
NSInteger hour = 0;
|
|
NSInteger minute = 0;
|
|
NSInteger second = 0;
|
|
NSInteger timeZoneOffset = 0;
|
|
|
|
day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex);
|
|
if (day < 1 || day == NSNotFound)
|
|
day = 1;
|
|
|
|
month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex);
|
|
year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex);
|
|
hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
if (hour == NSNotFound)
|
|
hour = 0;
|
|
|
|
minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
if (minute == NSNotFound)
|
|
minute = 0;
|
|
|
|
NSUInteger currentIndex = finalIndex + 1;
|
|
|
|
BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':');
|
|
if (hasSeconds)
|
|
second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex);
|
|
|
|
currentIndex = finalIndex + 1;
|
|
BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' ');
|
|
if (hasTimeZone)
|
|
timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex);
|
|
|
|
return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset);
|
|
}
|
|
|
|
|
|
static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) {
|
|
|
|
/*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss"
|
|
@"yyyy-MM-dd'T'HH:mm:sszzz"
|
|
@"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz"
|
|
etc.*/
|
|
|
|
NSUInteger finalIndex = 0;
|
|
NSInteger day = 1;
|
|
NSInteger month = RSJanuary;
|
|
NSInteger year = 1970;
|
|
NSInteger hour = 0;
|
|
NSInteger minute = 0;
|
|
NSInteger second = 0;
|
|
NSInteger milliseconds = 0;
|
|
NSInteger timeZoneOffset = 0;
|
|
|
|
year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex);
|
|
month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
|
|
|
|
NSUInteger currentIndex = finalIndex + 1;
|
|
BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.');
|
|
if (hasMilliseconds) {
|
|
milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex);
|
|
currentIndex = finalIndex + 1;
|
|
}
|
|
|
|
timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex);
|
|
|
|
return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset);
|
|
}
|
|
|
|
|
|
static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) {
|
|
|
|
NSUInteger i = 0;
|
|
|
|
for (i = 0; i < numberOfBytes; i++) {
|
|
if (bytes[i] == ' ' || bytes[i] == ',')
|
|
return YES;
|
|
}
|
|
|
|
return NO;
|
|
}
|
|
|
|
|
|
static BOOL dateIsW3CDate(const char *bytes, NSUInteger numberOfBytes) {
|
|
|
|
// Something like 2010-11-17T08:40:07-05:00
|
|
// But might be missing T character in the middle.
|
|
// Looks for four digits in a row followed by a -.
|
|
|
|
for (NSUInteger i = 0; i < numberOfBytes; i++) {
|
|
char ch = bytes[i];
|
|
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
|
|
continue;
|
|
}
|
|
if (numberOfBytes - i < 5) {
|
|
return NO;
|
|
}
|
|
return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == '-';
|
|
}
|
|
|
|
return NO;
|
|
}
|
|
|
|
static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) {
|
|
return numberOfBytes < 6 || numberOfBytes > 150;
|
|
}
|
|
|
|
|
|
#pragma mark - API
|
|
|
|
NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) {
|
|
|
|
if (numberOfBytesIsOutsideReasonableRange(numberOfBytes))
|
|
return nil;
|
|
|
|
if (dateIsW3CDate(bytes, numberOfBytes)) {
|
|
return RSParseW3CWithBytes(bytes, numberOfBytes);
|
|
}
|
|
if (dateIsPubDate(bytes, numberOfBytes))
|
|
return RSParsePubDateWithBytes(bytes, numberOfBytes);
|
|
|
|
// Fallback, in case our detection fails.
|
|
return RSParseW3CWithBytes(bytes, numberOfBytes);
|
|
}
|
|
|
|
|
|
NSDate *RSDateWithString(NSString *dateString) {
|
|
|
|
const char *utf8String = [dateString UTF8String];
|
|
return RSDateWithBytes(utf8String, strlen(utf8String));
|
|
}
|
|
|