diff --git a/lib/services/dom-parser.js b/lib/services/dom-parser.js index 666210f..808ac1f 100644 --- a/lib/services/dom-parser.js +++ b/lib/services/dom-parser.js @@ -2,6 +2,10 @@ const cheerio = require('cheerio') const dayjs = require('dayjs') const { parseDates } = require('../parser-utils') +const TITLE_BLACKLIST = [ + 'Content Not Found', +] + const parseDate = (timeText = '') => { const parts = timeText.split('at') const datePart = parts[0] || null @@ -48,7 +52,8 @@ const parseUsingDOM = (html, { logger }) => { } const $ = cheerio.load(html) - const title = $('title').text() + const titleText = $('title').text() + const title = TITLE_BLACKLIST.includes(titleText) ? null : titleText const $eventSummary = $('#event_summary') const $eventNode = $eventSummary ? $eventSummary.children()[1] : null diff --git a/test/services/dom-parser.spec.js b/test/services/dom-parser.spec.js index d110c71..05c9f48 100644 --- a/test/services/dom-parser.spec.js +++ b/test/services/dom-parser.spec.js @@ -258,6 +258,20 @@ describe(parseUsingDOM, () => { }) + it('should return null if title was blacklisted', () => { + const html = ` + + + Content Not Found + + + ` + const eventData = parseUsingDOM(html, { logger }) + + expect(eventData).to.be.null + }) + + it('should NOT return start time without title', () => { const html = `