remove URL requirement for DOM parser

This commit is contained in:
Ondřej Synáček 2020-07-16 16:48:05 +02:00
parent 456eaa1fbc
commit 9da4c33ffd
5 changed files with 43 additions and 36 deletions

View File

@ -11,9 +11,7 @@ const parseHTMLString = (html, { verbose }) => {
try {
const LDJSONEventData = parseUsingLDJSONData(html, { logger })
// TODO: adding empty string for URL argument since we don't know original
// URL of document
const eventData = LDJSONEventData || parseUsingDOM(html, '', { logger })
const eventData = LDJSONEventData || parseUsingDOM(html, { logger })
if (!eventData) {
throw createParserError()

View File

@ -38,7 +38,7 @@ const createLocationData = (streetText, areaText) => {
// NOTE: Fallback parser
// Attempt reading event data directly from DOM
const parseUsingDOM = (html, url, { logger }) => {
const parseUsingDOM = (html, { logger }) => {
if (logger) {
logger.log({
message: 'Using fallback DOM parser',
@ -74,7 +74,6 @@ const parseUsingDOM = (html, url, { logger }) => {
start,
duration,
title,
url,
}
if (!eventData.title || !eventData.start) {

View File

@ -8,13 +8,18 @@ const retrieveICS = async (URLparameter, { logger }) => {
const url = getNormalizedUrl(URLparameter)
const html = await crawl(url, { logger })
const LDJSONEventData = parseUsingLDJSONData(html, { logger })
const eventData = LDJSONEventData || parseUsingDOM(html, url, { logger })
const rawEventData = LDJSONEventData || parseUsingDOM(html, { logger })
if (!eventData) {
if (!rawEventData) {
throw createParserError()
return
}
const eventData = {
...rawEventData,
url: rawEventData.url || url,
}
const icsContent = await generateICS(eventData)
return icsContent
}

View File

@ -19,7 +19,7 @@ describe(parseUsingDOM, () => {
</head>
</html>
`
const { title } = parseUsingDOM(html, 'abc.xyz', { logger })
const { title } = parseUsingDOM(html, { logger })
expect(title).to.equal('Test')
})
@ -40,7 +40,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { start } = parseUsingDOM(html, 'abc.xyz', { logger })
const { start } = parseUsingDOM(html, { logger })
expect(start).to.deep.equal([ 2020, 3, 2, 13, 30 ])
})
@ -64,7 +64,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { start } = parseUsingDOM(html, 'abc.xyz', { logger })
const { start } = parseUsingDOM(html, { logger })
spy.mockRestore()
@ -91,7 +91,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { duration } = parseUsingDOM(html, 'abc.xyz', { logger })
const { duration } = parseUsingDOM(html, { logger })
spy.mockRestore()
@ -115,26 +115,12 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { duration } = parseUsingDOM(html, 'abc.xyz', { logger })
const { duration } = parseUsingDOM(html, { logger })
expect(duration).to.deep.equal({ minutes: 120 })
})
})
it('should return passed in url', () => {
const html = `
<html>
<head>
<title>Test</title>
</head>
</html>
`
const { url } = parseUsingDOM(html, 'abc.xyz', { logger })
expect(url).to.equal('abc.xyz')
})
describe('location', () => {
it('should return approximated location and area', () => {
const html = `
@ -150,7 +136,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { location } = parseUsingDOM(html, 'abc.xyz', { logger })
const { location } = parseUsingDOM(html, { logger })
expect(location).to.equal('123 Main St. AcmeTown, Main area')
})
@ -170,7 +156,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { location } = parseUsingDOM(html, 'abc.xyz', { logger })
const { location } = parseUsingDOM(html, { logger })
expect(location).to.equal('123 Main St. AcmeTown')
})
@ -190,7 +176,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { location } = parseUsingDOM(html, 'abc.xyz', { logger })
const { location } = parseUsingDOM(html, { logger })
expect(location).to.equal('Some area')
})
@ -210,7 +196,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const { location } = parseUsingDOM(html, 'abc.xyz', { logger })
const { location } = parseUsingDOM(html, { logger })
expect(location).to.equal('')
})
@ -223,7 +209,7 @@ describe(parseUsingDOM, () => {
callback()
})
parseUsingDOM('', '', { logger })
parseUsingDOM('', { logger })
})
@ -233,7 +219,7 @@ describe(parseUsingDOM, () => {
callback()
})
parseUsingDOM('', '', { logger })
parseUsingDOM('', { logger })
})
@ -243,7 +229,7 @@ describe(parseUsingDOM, () => {
callback()
})
parseUsingDOM('', '', { logger })
parseUsingDOM('', { logger })
})
@ -253,7 +239,7 @@ describe(parseUsingDOM, () => {
callback()
})
parseUsingDOM('', '', { logger })
parseUsingDOM('', { logger })
})
})
@ -266,7 +252,7 @@ describe(parseUsingDOM, () => {
</head>
</html>
`
const eventData = parseUsingDOM(html, 'abc.xyz', { logger })
const eventData = parseUsingDOM(html, { logger })
expect(eventData).to.be.null
})
@ -286,7 +272,7 @@ describe(parseUsingDOM, () => {
</body>
</html>
`
const eventData = parseUsingDOM(html, 'abc.xyz', { logger })
const eventData = parseUsingDOM(html, { logger })
expect(eventData).to.be.null
})

View File

@ -102,4 +102,23 @@ describe(retrieveICS, () => {
callback()
}
})
it('should contain normalized URL when using DOM parser', async () => {
const html = `
<html>
<head>
<title>Test</title>
</head>
<body>
</body>
</html>
`
setMockCrawlResult(html)
const icsContent = await retrieveICS('123', { logger })
expect(icsContent).to.include('URL:https://mobile.facebook.com/events/123')
})
})