add specs for LD JSON parser

This commit is contained in:
Ondřej Synáček 2020-07-15 08:46:03 +02:00
parent 59eb0191bf
commit 48e012bbc0
2 changed files with 462 additions and 24 deletions

View File

@ -22,7 +22,7 @@ const parseEventData = (eventData) => {
locationName,
addressStr,
].join(' ')
const cleanedLocationStr = locationStr.replace(/\r?\n|\r/g, ' ')
const cleanedLocationStr = locationStr.trim().replace(/\r?\n|\r/g, ' ')
const title = eventData.name || ''
const url = eventData.url || ''
const description = eventData.description || ''
@ -46,36 +46,37 @@ const parseUsingLDJSONData = (html, { logger }) => {
})
}
try {
// NOTE: Mobile web should have serialized
// event info in one of the script tags
const $ = cheerio.load(html)
const $scripts = $('head script[type="application/ld+json"]')
const rawData = $scripts.toArray().reduce((data, node) => {
const firstNode = node.children[0]
if (!firstNode || !firstNode.data) {
return data
}
if (firstNode.data.startsWith('//<![CDATA')) {
return firstNode.data
}
// NOTE: Mobile web should have serialized
// event info in one of the script tags
const $ = cheerio.load(html)
const $scripts = $('head script[type="application/ld+json"]')
const rawData = $scripts.toArray().reduce((data, node) => {
const firstNode = node.children[0]
if (!firstNode || !firstNode.data) {
return data
}, null)
if (!rawData) {
return null
}
const eventData = JSON.parse(rawData.slice(12, -5))
const data = parseEventData(eventData)
// NOTE: Handle prefix
if (firstNode.data.startsWith('//<![CDATA')) {
return firstNode.data.slice(12, -5)
}
if (firstNode.data) {
return firstNode.data.trim()
}
return data
} catch (err) {
throw err
}, null)
if (!rawData) {
return null
}
const eventData = JSON.parse(rawData)
const data = parseEventData(eventData)
return data
}
module.exports = parseUsingLDJSONData

View File

@ -0,0 +1,437 @@
const { expect } = require('chai')
const parseUsingLDJSONData = require('../../lib/services/ldjson-parser')
const MockLogger = require('../../mocks/logger.mock')
describe(parseUsingLDJSONData, () => {
describe('results', () => {
describe('detect LDJSON', () => {
it('should detect json with CDATA prefix', () => {
const html = `
<html>
<head>
<script type="application/ld+json">//<![CDATA[\n{"name":"Event"}\n]\n]></script>
</head>
</html>
`
const { title } = parseUsingLDJSONData(html, { logger: null })
expect(title).to.equal('Event')
})
it('should detect json', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"name":"Event"}</script>
</head>
</html>
`
const { title } = parseUsingLDJSONData(html, { logger: null })
expect(title).to.equal('Event')
})
it('should throw when JSON parsing fails', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"name":"Event"</script>
</head>
</html>
`
expect(() => {
parseUsingLDJSONData(html, { logger: null })
}).to.throw('Unexpected end of JSON input')
})
})
describe('time', () => {
it('should get start time', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"startDate":"2020-03-02T15:35:00"}</script>
</head>
</html>
`
const { start } = parseUsingLDJSONData(html, { logger: null })
expect(start).to.deep.equal([ 2020, 3, 2, 15, 35 ])
})
it('should return current time for start time if not present', () => {
const now = new Date('2020-01-01 12:00:00')
const spy = jest
.spyOn(global, 'Date')
.mockImplementation(() => now)
const html = `
<html>
<head>
<script type="application/ld+json">{}</script>
</head>
</html>
`
const { start } = parseUsingLDJSONData(html, { logger: null })
expect(start).to.deep.equal([ 2020, 1, 1, 12, 0 ])
spy.mockRestore()
})
it('should get duration based on end and start time', () => {
const html = `
<html>
<head>
<script type="application/ld+json">
{
"startDate":"2020-03-02T15:35:00",
"endDate":"2020-03-02T18:35:00"
}
</script>
</head>
</html>
`
const { duration } = parseUsingLDJSONData(html, { logger: null })
expect(duration).to.deep.equal({ minutes: 180 })
})
it('should get default duration of 120 minutes if ' +
'end date is missing', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{}</script>
</head>
</html>
`
const { duration } = parseUsingLDJSONData(html, { logger: null })
expect(duration).to.deep.equal({ minutes: 120 })
})
})
describe('url', () => {
it('should get url', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"url":"https://abc.xyz"}</script>
</head>
</html>
`
const { url } = parseUsingLDJSONData(html, { logger: null })
expect(url).to.equal('https://abc.xyz')
})
it('should get empty string if url is missing', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{}</script>
</head>
</html>
`
const { url } = parseUsingLDJSONData(html, { logger: null })
expect(url).to.equal('')
})
})
describe('description', () => {
it('should get event description', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{"description":"This is event description."}</script>
</head>
</html>
`
const { description } = parseUsingLDJSONData(html, { logger: null })
expect(description).to.equal('This is event description.')
})
it('should get empty string for missing event description', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{}</script>
</head>
</html>
`
const { description } = parseUsingLDJSONData(html, { logger: null })
expect(description).to.equal('')
})
})
describe('location', () => {
it('should include name of the location', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": { "name": "Test Location" }
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('Test Location')
})
it('should include street address of the location', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"address": { "streetAddress": "132 Test st." }
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('132 Test st.')
})
it('should include address locality of the location', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"address": { "addressLocality": "South" }
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('South')
})
it('should include postal code of the location', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"address": { "postalCode": "113 AB" }
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('113 AB')
})
it('should include address country of the location', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"address": { "addressCountry": "Liberland" }
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('Liberland')
})
it('should concatenate address information', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"address": {
"streetAddress": "132 Test st.",
"addressLocality": "South",
"postalCode": "113 AB",
"addressCountry": "Liberland"
}
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('132 Test st. South 113 AB Liberland')
})
it('should concatenate address information and location name', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"name": "D0nut shop",
"address": {
"streetAddress": "132 Test st.",
"addressLocality": "South",
"postalCode": "113 AB",
"addressCountry": "Liberland"
}
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('D0nut shop 132 Test st. South 113 AB Liberland')
})
it('should concatenate and remove any new lines from location fields', () => {
const html = `
<html>
<head>
<script type="application/ld+json">{
"location": {
"name": "D0nut\\rshop",
"address": {
"streetAddress": "132 Test st.",
"addressLocality": "South\\nNorth",
"postalCode": "113\\nAB",
"addressCountry": "Liberland"
}
}
}
</script>
</head>
</html>
`
const { location } = parseUsingLDJSONData(html, { logger: null })
expect(location).to.equal('D0nut shop 132 Test st. South North 113 AB Liberland')
})
})
})
describe('null results', () => {
it('should return null if script with application\/ld+json ' +
'is not found', () => {
const html = `
<html>
<head>
<script type="application/ld+json">
</script>
</head>
</html>
`
const eventData = parseUsingLDJSONData(html, { logger: null })
expect(eventData).to.be.null
})
it('should return null if script with application\/ld+json ' +
'has no content', () => {
const html = `
<html>
<head>
<script type="text/javascript">alert('x')</script>
</head>
</html>
`
const eventData = parseUsingLDJSONData(html, { logger: null })
expect(eventData).to.be.null
})
})
describe('logging', () => {
it('should log with message', (callback) => {
const logger = new MockLogger()
logger.on('test:log', ({ message }) => {
expect(message).to.equal('Parsing using LDJSON parser')
callback()
})
parseUsingLDJSONData('<html></html>', { logger })
})
it('should log with level', (callback) => {
const logger = new MockLogger()
logger.on('test:log', ({ level }) => {
expect(level).to.equal('info')
callback()
})
parseUsingLDJSONData('<html></html>', { logger })
})
it('should log with service', (callback) => {
const logger = new MockLogger()
logger.on('test:log', ({ service }) => {
expect(service).to.equal('parser')
callback()
})
parseUsingLDJSONData('<html></html>', { logger })
})
})
})