diff --git a/lib/services/ldjson-parser.js b/lib/services/ldjson-parser.js index 219f3b0..12e7e58 100644 --- a/lib/services/ldjson-parser.js +++ b/lib/services/ldjson-parser.js @@ -22,7 +22,7 @@ const parseEventData = (eventData) => { locationName, addressStr, ].join(' ') - const cleanedLocationStr = locationStr.replace(/\r?\n|\r/g, ' ') + const cleanedLocationStr = locationStr.trim().replace(/\r?\n|\r/g, ' ') const title = eventData.name || '' const url = eventData.url || '' const description = eventData.description || '' @@ -46,36 +46,37 @@ const parseUsingLDJSONData = (html, { logger }) => { }) } - try { - // NOTE: Mobile web should have serialized - // event info in one of the script tags - const $ = cheerio.load(html) - const $scripts = $('head script[type="application/ld+json"]') - const rawData = $scripts.toArray().reduce((data, node) => { - const firstNode = node.children[0] - - if (!firstNode || !firstNode.data) { - return data - } - - if (firstNode.data.startsWith('// { + const firstNode = node.children[0] + if (!firstNode || !firstNode.data) { return data - }, null) - - if (!rawData) { - return null } - const eventData = JSON.parse(rawData.slice(12, -5)) - const data = parseEventData(eventData) + // NOTE: Handle prefix + if (firstNode.data.startsWith('// { + describe('results', () => { + describe('detect LDJSON', () => { + it('should detect json with CDATA prefix', () => { + const html = ` + + + + + + ` + + const { title } = parseUsingLDJSONData(html, { logger: null }) + + expect(title).to.equal('Event') + }) + + + it('should detect json', () => { + const html = ` + + + + + + ` + + const { title } = parseUsingLDJSONData(html, { logger: null }) + + expect(title).to.equal('Event') + }) + + + it('should throw when JSON parsing fails', () => { + const html = ` + + + + + + ` + + expect(() => { + parseUsingLDJSONData(html, { logger: null }) + }).to.throw('Unexpected end of JSON input') + }) + }) + + describe('time', () => { + it('should get start time', () => { + const html = ` + + + + + + ` + + const { start } = parseUsingLDJSONData(html, { logger: null }) + + expect(start).to.deep.equal([ 2020, 3, 2, 15, 35 ]) + }) + + + it('should return current time for start time if not present', () => { + const now = new Date('2020-01-01 12:00:00') + const spy = jest + .spyOn(global, 'Date') + .mockImplementation(() => now) + + const html = ` + + + + + + ` + + const { start } = parseUsingLDJSONData(html, { logger: null }) + + expect(start).to.deep.equal([ 2020, 1, 1, 12, 0 ]) + + spy.mockRestore() + }) + + + it('should get duration based on end and start time', () => { + const html = ` + + + + + + ` + + const { duration } = parseUsingLDJSONData(html, { logger: null }) + + expect(duration).to.deep.equal({ minutes: 180 }) + }) + + + it('should get default duration of 120 minutes if ' + + 'end date is missing', () => { + const html = ` + + + + + + ` + + const { duration } = parseUsingLDJSONData(html, { logger: null }) + + expect(duration).to.deep.equal({ minutes: 120 }) + }) + }) + + describe('url', () => { + it('should get url', () => { + const html = ` + + + + + + ` + + const { url } = parseUsingLDJSONData(html, { logger: null }) + + expect(url).to.equal('https://abc.xyz') + }) + + + it('should get empty string if url is missing', () => { + const html = ` + + + + + + ` + + const { url } = parseUsingLDJSONData(html, { logger: null }) + + expect(url).to.equal('') + }) + }) + + describe('description', () => { + it('should get event description', () => { + const html = ` + + + + + + ` + + const { description } = parseUsingLDJSONData(html, { logger: null }) + + expect(description).to.equal('This is event description.') + }) + + + it('should get empty string for missing event description', () => { + const html = ` + + + + + + ` + + const { description } = parseUsingLDJSONData(html, { logger: null }) + + expect(description).to.equal('') + }) + }) + + describe('location', () => { + it('should include name of the location', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('Test Location') + }) + + + it('should include street address of the location', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('132 Test st.') + }) + + + it('should include address locality of the location', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('South') + }) + + + it('should include postal code of the location', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('113 AB') + }) + + + it('should include address country of the location', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('Liberland') + }) + + + it('should concatenate address information', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('132 Test st. South 113 AB Liberland') + }) + + + it('should concatenate address information and location name', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('D0nut shop 132 Test st. South 113 AB Liberland') + }) + + + it('should concatenate and remove any new lines from location fields', () => { + const html = ` + + + + + + ` + + const { location } = parseUsingLDJSONData(html, { logger: null }) + + expect(location).to.equal('D0nut shop 132 Test st. South North 113 AB Liberland') + }) + }) + }) + + describe('null results', () => { + it('should return null if script with application\/ld+json ' + + 'is not found', () => { + const html = ` + + + + + + ` + + const eventData = parseUsingLDJSONData(html, { logger: null }) + + expect(eventData).to.be.null + }) + + + it('should return null if script with application\/ld+json ' + + 'has no content', () => { + const html = ` + + + + + + ` + + const eventData = parseUsingLDJSONData(html, { logger: null }) + + expect(eventData).to.be.null + }) + }) + + describe('logging', () => { + it('should log with message', (callback) => { + const logger = new MockLogger() + + logger.on('test:log', ({ message }) => { + expect(message).to.equal('Parsing using LDJSON parser') + callback() + }) + + parseUsingLDJSONData('', { logger }) + }) + + + it('should log with level', (callback) => { + const logger = new MockLogger() + + logger.on('test:log', ({ level }) => { + expect(level).to.equal('info') + callback() + }) + + parseUsingLDJSONData('', { logger }) + }) + + + it('should log with service', (callback) => { + const logger = new MockLogger() + + logger.on('test:log', ({ service }) => { + expect(service).to.equal('parser') + callback() + }) + + parseUsingLDJSONData('', { logger }) + }) + }) +})