Facebook-Events-iCal-Converter/lib/services/ldjson-parser.js

87 lines
2.0 KiB
JavaScript
Raw Normal View History

const cheerio = require('cheerio')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const { parseDates } = require('../parser-utils')
2019-10-15 22:45:03 +02:00
dayjs.extend(utc)
const parseEventData = (eventData) => {
const startDate = eventData.startDate ?
dayjs(eventData.startDate) :
dayjs.utc(new Date())
const endDate = eventData.endDate && dayjs.utc(eventData.endDate)
const { start, duration } = parseDates(startDate, endDate)
const { location } = eventData || {}
const { address } = location || {}
2019-10-15 22:45:03 +02:00
const locationName = location ? location.name : ''
const addressStr = address ? [
address.streetAddress || '',
address.addressLocality || '',
address.postalCode || '',
address.addressCountry || '',
2019-10-17 22:14:08 +02:00
].join(' ') : ''
const locationStr = [
locationName,
addressStr,
].join(' ')
2020-07-15 08:46:03 +02:00
const cleanedLocationStr = locationStr.trim().replace(/\r?\n|\r/g, ' ')
2019-10-17 22:14:08 +02:00
const title = eventData.name || ''
const url = eventData.url || ''
const description = eventData.description || ''
2019-10-15 22:45:03 +02:00
return {
start,
duration,
2019-10-17 22:14:08 +02:00
location: cleanedLocationStr,
title,
url,
description,
}
}
2019-10-15 22:45:03 +02:00
const parseUsingLDJSONData = (html, { logger }) => {
if (logger) {
logger.log({
message: 'Parsing using LDJSON parser',
level: 'info',
service: 'parser',
})
}
2020-07-15 08:46:03 +02:00
// NOTE: Mobile web should have serialized
// event info in one of the script tags
const $ = cheerio.load(html)
const $scripts = $('head script[type="application/ld+json"]')
const rawData = $scripts.toArray().reduce((data, node) => {
const firstNode = node.children[0]
2019-10-15 22:45:03 +02:00
2020-07-15 08:46:03 +02:00
if (!firstNode || !firstNode.data) {
return data
2020-07-15 08:46:03 +02:00
}
2019-10-15 22:45:03 +02:00
2020-07-15 08:46:03 +02:00
// NOTE: Handle prefix
if (firstNode.data.startsWith('//<![CDATA')) {
return firstNode.data.slice(12, -5)
}
2020-07-15 08:46:03 +02:00
if (firstNode.data) {
return firstNode.data.trim()
}
2019-10-15 22:45:03 +02:00
return data
2020-07-15 08:46:03 +02:00
}, null)
if (!rawData) {
return null
2019-10-15 09:22:08 +02:00
}
2020-07-15 08:46:03 +02:00
const eventData = JSON.parse(rawData)
const data = parseEventData(eventData)
return data
2019-10-15 09:22:08 +02:00
}
module.exports = parseUsingLDJSONData