From 46a2c63f8e1c3819cceff2d61fe9106051e8ecee Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Sat, 8 Apr 2017 19:42:50 +0200 Subject: [PATCH] [fix] yahoo news date parsing --- searx/engines/yahoo_news.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 3e4cf02e..1a0fd28f 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -80,16 +80,19 @@ def response(resp): # still useful ? if re.match("^[0-9]+ minute(s|) ago$", publishedDate): - publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) # noqa + publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) + elif re.match("^[0-9]+ days? ago$", publishedDate): + publishedDate = datetime.now() - timedelta(days=int(re.match(r'\d+', publishedDate).group())) + elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): + timeNumbers = re.findall(r'\d+', publishedDate) + publishedDate = datetime.now()\ + - timedelta(hours=int(timeNumbers[0]))\ + - timedelta(minutes=int(timeNumbers[1])) else: - if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", - publishedDate): - timeNumbers = re.findall(r'\d+', publishedDate) - publishedDate = datetime.now()\ - - timedelta(hours=int(timeNumbers[0]))\ - - timedelta(minutes=int(timeNumbers[1])) - else: + try: publishedDate = parser.parse(publishedDate) + except: + publishedDate = datetime.now() if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year)