diff --git a/lib/cli.js b/lib/cli.js index 420746c..daa23be 100755 --- a/lib/cli.js +++ b/lib/cli.js @@ -11,9 +11,7 @@ const parseHTMLString = (html, { verbose }) => { try { const LDJSONEventData = parseUsingLDJSONData(html, { logger }) - // TODO: adding empty string for URL argument since we don't know original - // URL of document - const eventData = LDJSONEventData || parseUsingDOM(html, '', { logger }) + const eventData = LDJSONEventData || parseUsingDOM(html, { logger }) if (!eventData) { throw createParserError() diff --git a/lib/services/dom-parser.js b/lib/services/dom-parser.js index bc9fed0..666210f 100644 --- a/lib/services/dom-parser.js +++ b/lib/services/dom-parser.js @@ -38,7 +38,7 @@ const createLocationData = (streetText, areaText) => { // NOTE: Fallback parser // Attempt reading event data directly from DOM -const parseUsingDOM = (html, url, { logger }) => { +const parseUsingDOM = (html, { logger }) => { if (logger) { logger.log({ message: 'Using fallback DOM parser', @@ -74,7 +74,6 @@ const parseUsingDOM = (html, url, { logger }) => { start, duration, title, - url, } if (!eventData.title || !eventData.start) { diff --git a/lib/services/ics-retriever.js b/lib/services/ics-retriever.js index 22a37f6..7885130 100644 --- a/lib/services/ics-retriever.js +++ b/lib/services/ics-retriever.js @@ -8,13 +8,18 @@ const retrieveICS = async (URLparameter, { logger }) => { const url = getNormalizedUrl(URLparameter) const html = await crawl(url, { logger }) const LDJSONEventData = parseUsingLDJSONData(html, { logger }) - const eventData = LDJSONEventData || parseUsingDOM(html, url, { logger }) + const rawEventData = LDJSONEventData || parseUsingDOM(html, { logger }) - if (!eventData) { + if (!rawEventData) { throw createParserError() return } + const eventData = { + ...rawEventData, + url: rawEventData.url || url, + } + const icsContent = await generateICS(eventData) return icsContent } diff --git a/test/services/dom-parser.spec.js b/test/services/dom-parser.spec.js index 1e1ba1c..d110c71 100644 --- a/test/services/dom-parser.spec.js +++ b/test/services/dom-parser.spec.js @@ -19,7 +19,7 @@ describe(parseUsingDOM, () => { ` - const { title } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { title } = parseUsingDOM(html, { logger }) expect(title).to.equal('Test') }) @@ -40,7 +40,7 @@ describe(parseUsingDOM, () => { ` - const { start } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { start } = parseUsingDOM(html, { logger }) expect(start).to.deep.equal([ 2020, 3, 2, 13, 30 ]) }) @@ -64,7 +64,7 @@ describe(parseUsingDOM, () => { ` - const { start } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { start } = parseUsingDOM(html, { logger }) spy.mockRestore() @@ -91,7 +91,7 @@ describe(parseUsingDOM, () => { ` - const { duration } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { duration } = parseUsingDOM(html, { logger }) spy.mockRestore() @@ -115,26 +115,12 @@ describe(parseUsingDOM, () => { ` - const { duration } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { duration } = parseUsingDOM(html, { logger }) expect(duration).to.deep.equal({ minutes: 120 }) }) }) - it('should return passed in url', () => { - const html = ` - - - Test - - - ` - const { url } = parseUsingDOM(html, 'abc.xyz', { logger }) - - expect(url).to.equal('abc.xyz') - }) - - describe('location', () => { it('should return approximated location and area', () => { const html = ` @@ -150,7 +136,7 @@ describe(parseUsingDOM, () => { ` - const { location } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { location } = parseUsingDOM(html, { logger }) expect(location).to.equal('123 Main St. AcmeTown, Main area') }) @@ -170,7 +156,7 @@ describe(parseUsingDOM, () => { ` - const { location } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { location } = parseUsingDOM(html, { logger }) expect(location).to.equal('123 Main St. AcmeTown') }) @@ -190,7 +176,7 @@ describe(parseUsingDOM, () => { ` - const { location } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { location } = parseUsingDOM(html, { logger }) expect(location).to.equal('Some area') }) @@ -210,7 +196,7 @@ describe(parseUsingDOM, () => { ` - const { location } = parseUsingDOM(html, 'abc.xyz', { logger }) + const { location } = parseUsingDOM(html, { logger }) expect(location).to.equal('') }) @@ -223,7 +209,7 @@ describe(parseUsingDOM, () => { callback() }) - parseUsingDOM('', '', { logger }) + parseUsingDOM('', { logger }) }) @@ -233,7 +219,7 @@ describe(parseUsingDOM, () => { callback() }) - parseUsingDOM('', '', { logger }) + parseUsingDOM('', { logger }) }) @@ -243,7 +229,7 @@ describe(parseUsingDOM, () => { callback() }) - parseUsingDOM('', '', { logger }) + parseUsingDOM('', { logger }) }) @@ -253,7 +239,7 @@ describe(parseUsingDOM, () => { callback() }) - parseUsingDOM('', '', { logger }) + parseUsingDOM('', { logger }) }) }) @@ -266,7 +252,7 @@ describe(parseUsingDOM, () => { ` - const eventData = parseUsingDOM(html, 'abc.xyz', { logger }) + const eventData = parseUsingDOM(html, { logger }) expect(eventData).to.be.null }) @@ -286,7 +272,7 @@ describe(parseUsingDOM, () => { ` - const eventData = parseUsingDOM(html, 'abc.xyz', { logger }) + const eventData = parseUsingDOM(html, { logger }) expect(eventData).to.be.null }) diff --git a/test/services/ics-retriever.spec.js b/test/services/ics-retriever.spec.js index 0c138b7..c6a350c 100644 --- a/test/services/ics-retriever.spec.js +++ b/test/services/ics-retriever.spec.js @@ -102,4 +102,23 @@ describe(retrieveICS, () => { callback() } }) + + + it('should contain normalized URL when using DOM parser', async () => { + const html = ` + + + Test + + + + + ` + + setMockCrawlResult(html) + + const icsContent = await retrieveICS('123', { logger }) + expect(icsContent).to.include('URL:https://mobile.facebook.com/events/123') + + }) })