2019-10-17 21:43:53 +02:00
|
|
|
const cheerio = require('cheerio')
|
2019-10-23 22:03:20 +02:00
|
|
|
const dayjs = require('dayjs')
|
|
|
|
const { parseDates } = require('../parser-utils')
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2019-10-17 21:43:53 +02:00
|
|
|
const parseEventData = (eventData) => {
|
2019-12-15 10:56:26 +01:00
|
|
|
const startDate = eventData.startDate ?
|
|
|
|
dayjs(eventData.startDate) :
|
|
|
|
dayjs(new Date())
|
2019-10-17 21:43:53 +02:00
|
|
|
const endDate = eventData.endDate && dayjs(eventData.endDate)
|
|
|
|
const { start, duration } = parseDates(startDate, endDate)
|
|
|
|
const { location } = eventData || {}
|
|
|
|
const { address } = location || {}
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2019-11-25 21:51:35 +01:00
|
|
|
const locationName = location ? location.name : ''
|
|
|
|
const addressStr = address ? [
|
2019-10-17 21:43:53 +02:00
|
|
|
address.streetAddress || '',
|
|
|
|
address.addressLocality || '',
|
|
|
|
address.postalCode || '',
|
|
|
|
address.addressCountry || '',
|
2019-10-17 22:14:08 +02:00
|
|
|
].join(' ') : ''
|
2019-11-25 21:51:35 +01:00
|
|
|
const locationStr = [
|
|
|
|
locationName,
|
|
|
|
addressStr,
|
|
|
|
].join(' ')
|
2020-07-15 08:46:03 +02:00
|
|
|
const cleanedLocationStr = locationStr.trim().replace(/\r?\n|\r/g, ' ')
|
2019-10-17 22:14:08 +02:00
|
|
|
const title = eventData.name || ''
|
|
|
|
const url = eventData.url || ''
|
|
|
|
const description = eventData.description || ''
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2019-10-17 21:43:53 +02:00
|
|
|
return {
|
|
|
|
start,
|
|
|
|
duration,
|
2019-10-17 22:14:08 +02:00
|
|
|
location: cleanedLocationStr,
|
2019-10-17 21:43:53 +02:00
|
|
|
title,
|
|
|
|
url,
|
|
|
|
description,
|
|
|
|
}
|
|
|
|
}
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2019-10-24 10:21:41 +02:00
|
|
|
const parseUsingLDJSONData = (html, { logger }) => {
|
2020-07-14 22:07:42 +02:00
|
|
|
if (logger) {
|
|
|
|
logger.log({
|
|
|
|
message: 'Parsing using LDJSON parser',
|
|
|
|
level: 'info',
|
|
|
|
service: 'parser',
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-07-15 08:46:03 +02:00
|
|
|
// NOTE: Mobile web should have serialized
|
|
|
|
// event info in one of the script tags
|
|
|
|
const $ = cheerio.load(html)
|
|
|
|
const $scripts = $('head script[type="application/ld+json"]')
|
|
|
|
const rawData = $scripts.toArray().reduce((data, node) => {
|
|
|
|
const firstNode = node.children[0]
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2020-07-15 08:46:03 +02:00
|
|
|
if (!firstNode || !firstNode.data) {
|
2019-10-17 21:43:53 +02:00
|
|
|
return data
|
2020-07-15 08:46:03 +02:00
|
|
|
}
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2020-07-15 08:46:03 +02:00
|
|
|
// NOTE: Handle prefix
|
|
|
|
if (firstNode.data.startsWith('//<![CDATA')) {
|
|
|
|
return firstNode.data.slice(12, -5)
|
2019-10-22 20:25:09 +02:00
|
|
|
}
|
|
|
|
|
2020-07-15 08:46:03 +02:00
|
|
|
if (firstNode.data) {
|
|
|
|
return firstNode.data.trim()
|
|
|
|
}
|
2019-10-15 22:45:03 +02:00
|
|
|
|
2019-10-17 21:43:53 +02:00
|
|
|
return data
|
2020-07-15 08:46:03 +02:00
|
|
|
}, null)
|
|
|
|
|
|
|
|
if (!rawData) {
|
|
|
|
return null
|
2019-10-15 09:22:08 +02:00
|
|
|
}
|
2020-07-15 08:46:03 +02:00
|
|
|
|
|
|
|
const eventData = JSON.parse(rawData)
|
|
|
|
const data = parseEventData(eventData)
|
|
|
|
|
|
|
|
return data
|
2019-10-15 09:22:08 +02:00
|
|
|
}
|
|
|
|
|
2019-10-23 22:03:20 +02:00
|
|
|
module.exports = parseUsingLDJSONData
|