Facebook-Events-iCal-Converter/lib/services/dom-parser.js

const cheerio = require('cheerio')
const dayjs = require('dayjs')
const { parseDates } = require('../parser-utils')

const TITLE_BLACKLIST = [
  'Content Not Found',
]

const parseDate = (timeText = '') => {
  const parts = timeText.split('at')
  const datePart = parts[0] || null
  const timePart = parts[1] || null

  const rangeTimeParts = timePart ? timePart.split('–') : []
  const startTimePart = `${datePart || ''}${rangeTimeParts[0] || ''}`
  const endTimePart = `${datePart || ''}${rangeTimeParts[1] || ''}`

  const startTime = startTimePart ?
    dayjs(startTimePart) :
    dayjs(new Date())
  const endTime = dayjs(endTimePart)

  const normalizedStartTime = startTime.isValid() ? startTime : dayjs(new Date())
  const normalizedEndTime = endTime.isValid() ? endTime : dayjs(new Date())
  const { start, duration } = parseDates(normalizedStartTime, normalizedEndTime)

  const minimumDuration = { ...duration, minutes: duration.minutes || 120 }

  return {
    start,
    duration: minimumDuration,
  }
}

const createLocationData = (streetText, areaText) => {
  const location = ([ streetText, areaText ])
    .filter(i => i)
    .join(', ') || ''

  return location.replace(/\r?\n|\r/g, ' ')
}

// NOTE: Fallback parser
// Attempt reading event data directly from DOM
const parseUsingDOM = (html, { logger }) => {
  if (logger) {
    logger.log({
      message: 'Using fallback DOM parser',
      level: 'info',
      service: 'parser',
    })
  }

  const $ = cheerio.load(html)
  const titleText = $('title').text()
  const title = TITLE_BLACKLIST.includes(titleText) ? null : titleText

  const $eventSummary = $('#event_summary')
  const $eventNode = $eventSummary ? $eventSummary.children()[1] : null

  const $timeNode = $eventNode ? $eventNode.childNodes[0] : null
  const $locationNode = $eventNode ? $eventNode.childNodes[1] : null

  const timeText = $timeNode ? $timeNode.attribs.title : ''

  const $locationBlock = $locationNode ? $($locationNode).find('td') : null
  const $locationBlockTDs = $locationBlock ? $locationBlock.children() : []
  const $streetBlock = $locationBlockTDs[1] || null
  const $areaBlock = $locationBlockTDs[2] || null

  const streetText = $streetBlock ? $($streetBlock).text() : ''
  const areaText = $areaBlock ? $($areaBlock).text() : ''

  const location = createLocationData(streetText, areaText)
  const { start, duration } = parseDate(timeText)

  const eventData = {
    location,
    start,
    duration,
    title,
  }

  if (!eventData.title || !eventData.start) {
    return null
  }

  return eventData
}

module.exports = parseUsingDOM