Facebook-Events-iCal-Converter/lib/services/dom-parser.js

92 lines
2.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const cheerio = require('cheerio')
const dayjs = require('dayjs')
const { parseDates } = require('../parser-utils')
const TITLE_BLACKLIST = [
'Content Not Found',
]
const parseDate = (timeText = '') => {
const parts = timeText.split('at')
const datePart = parts[0] || null
const timePart = parts[1] || null
const rangeTimeParts = timePart ? timePart.split('') : []
const startTimePart = `${datePart || ''}${rangeTimeParts[0] || ''}`
const endTimePart = `${datePart || ''}${rangeTimeParts[1] || ''}`
const startTime = startTimePart ?
dayjs(startTimePart) :
dayjs(new Date())
const endTime = dayjs(endTimePart)
const normalizedStartTime = startTime.isValid() ? startTime : dayjs(new Date())
const normalizedEndTime = endTime.isValid() ? endTime : dayjs(new Date())
const { start, duration } = parseDates(normalizedStartTime, normalizedEndTime)
const minimumDuration = { ...duration, minutes: duration.minutes || 120 }
return {
start,
duration: minimumDuration,
}
}
const createLocationData = (streetText, areaText) => {
const location = ([ streetText, areaText ])
.filter(i => i)
.join(', ') || ''
return location.replace(/\r?\n|\r/g, ' ')
}
// NOTE: Fallback parser
// Attempt reading event data directly from DOM
const parseUsingDOM = (html, { logger }) => {
if (logger) {
logger.log({
message: 'Using fallback DOM parser',
level: 'info',
service: 'parser',
})
}
const $ = cheerio.load(html)
const titleText = $('title').text()
const title = TITLE_BLACKLIST.includes(titleText) ? null : titleText
const $eventSummary = $('#event_summary')
const $eventNode = $eventSummary ? $eventSummary.children()[1] : null
const $timeNode = $eventNode ? $eventNode.childNodes[0] : null
const $locationNode = $eventNode ? $eventNode.childNodes[1] : null
const timeText = $timeNode ? $timeNode.attribs.title : ''
const $locationBlock = $locationNode ? $($locationNode).find('td') : null
const $locationBlockTDs = $locationBlock ? $locationBlock.children() : []
const $streetBlock = $locationBlockTDs[1] || null
const $areaBlock = $locationBlockTDs[2] || null
const streetText = $streetBlock ? $($streetBlock).text() : ''
const areaText = $areaBlock ? $($areaBlock).text() : ''
const location = createLocationData(streetText, areaText)
const { start, duration } = parseDate(timeText)
const eventData = {
location,
start,
duration,
title,
}
if (!eventData.title || !eventData.start) {
return null
}
return eventData
}
module.exports = parseUsingDOM