2020-07-16 16:37:20 +02:00
|
|
|
const chai = require('chai')
|
|
|
|
const { expect } = chai
|
|
|
|
const chaiSinon = require('chai-sinon')
|
|
|
|
|
|
|
|
chai.use(chaiSinon)
|
|
|
|
|
|
|
|
const MockLogger = require('../../mocks/logger.mock')
|
|
|
|
const {
|
|
|
|
mockCrawl,
|
|
|
|
setMockCrawlResult,
|
|
|
|
setMockCrawlErrorResult,
|
|
|
|
clearMockCrawlResult,
|
|
|
|
} = require('../../mocks/crawler.mock')
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
const { retrieveICS, extractEventDataFromHTML } = require('../../lib/services/ics-retriever')
|
2020-07-16 16:37:20 +02:00
|
|
|
|
|
|
|
describe(retrieveICS, () => {
|
|
|
|
let logger
|
|
|
|
|
|
|
|
beforeEach(() => {
|
|
|
|
logger = new MockLogger()
|
|
|
|
clearMockCrawlResult()
|
|
|
|
})
|
|
|
|
|
|
|
|
it('should use create ICS contents', async () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<script type="application/ld+json">{"name":"Test Event"}</script>
|
|
|
|
</head>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
|
|
|
setMockCrawlResult(html)
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
const icsContent = await retrieveICS('https://facebook.com/events/123', {
|
|
|
|
logger,
|
|
|
|
crawl: mockCrawl,
|
|
|
|
})
|
2020-07-16 16:37:20 +02:00
|
|
|
|
|
|
|
expect(icsContent).to.be.ok
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
it('should use create ICS contents based on LDJSON', async () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<script type="application/ld+json">{"name":"Test Event","location":{"name":"Location X"}}</script>
|
|
|
|
</head>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
|
|
|
setMockCrawlResult(html)
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
const icsContent = await retrieveICS('https://facebook.com/events/123', {
|
|
|
|
logger,
|
|
|
|
crawl: mockCrawl,
|
|
|
|
})
|
2020-07-16 16:37:20 +02:00
|
|
|
|
|
|
|
expect(icsContent).to.include('SUMMARY:Test Event')
|
|
|
|
expect(icsContent).to.include('LOCATION:Location X')
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
it('should extract event data using LDJSON', () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<script type="application/ld+json">{"name":"Test Event","location":{"name":"Location X"}}</script>
|
|
|
|
</head>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
|
|
|
const { location } = extractEventDataFromHTML(
|
|
|
|
html,
|
|
|
|
'https://facebook.com/events/123', {
|
|
|
|
logger,
|
|
|
|
})
|
|
|
|
|
|
|
|
expect(location).to.equal('Location X')
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2020-07-16 16:37:20 +02:00
|
|
|
it('should use create ICS contents based on DOM', async () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<title>Test</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div id="event_summary">
|
|
|
|
<div class="test_eventNode1"></div>
|
|
|
|
<div class="test_eventNode2"><div class="test_timeNode"></div><div class="test_locationNode"><table><tr><td><span></span><span>123 Main St.\nAcmeTown</span><span>Main area</span></td></tr></table></div></div>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
|
|
|
setMockCrawlResult(html)
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
const icsContent = await retrieveICS('https://facebook.com/events/123', {
|
|
|
|
logger,
|
|
|
|
crawl: mockCrawl,
|
|
|
|
})
|
2020-07-16 16:37:20 +02:00
|
|
|
|
|
|
|
expect(icsContent).to.include('LOCATION:123 Main St. AcmeTown, Main area')
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
it('should extract event data based on DOM', () => {
|
2020-07-16 16:37:20 +02:00
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
2020-07-17 23:11:07 +02:00
|
|
|
<title>Test</title>
|
2020-07-16 16:37:20 +02:00
|
|
|
</head>
|
|
|
|
<body>
|
2020-07-17 23:11:07 +02:00
|
|
|
<div id="event_summary">
|
|
|
|
<div class="test_eventNode1"></div>
|
|
|
|
<div class="test_eventNode2"><div class="test_timeNode"></div><div class="test_locationNode"><table><tr><td><span></span><span>123 Main St.\nAcmeTown</span><span>Main area</span></td></tr></table></div></div>
|
|
|
|
</div>
|
2020-07-16 16:37:20 +02:00
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
const { location } = extractEventDataFromHTML(
|
|
|
|
html,
|
|
|
|
'https://facebook.com/events/123', {
|
|
|
|
logger,
|
|
|
|
})
|
|
|
|
|
|
|
|
expect(location).to.equal('123 Main St. AcmeTown, Main area')
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2020-07-23 17:23:04 +02:00
|
|
|
it('should normalize URL when parsing event data based on DOM', () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<title>Test</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div id="event_summary">
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
|
|
|
const { url } = extractEventDataFromHTML(
|
|
|
|
html,
|
|
|
|
'123', {
|
|
|
|
logger,
|
|
|
|
})
|
|
|
|
|
|
|
|
expect(url).to.equal('https://mobile.facebook.com/events/123')
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
it('should throw parser error if no event data is found', () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
`
|
2020-07-16 16:37:20 +02:00
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
expect(() => {
|
|
|
|
extractEventDataFromHTML(
|
|
|
|
html,
|
|
|
|
'https://facebook.com/events/132', {
|
|
|
|
logger,
|
|
|
|
})
|
|
|
|
}).to.throw('Unable to parse event data.')
|
2020-07-16 16:37:20 +02:00
|
|
|
})
|
2020-07-16 16:48:05 +02:00
|
|
|
|
|
|
|
|
|
|
|
it('should contain normalized URL when using DOM parser', async () => {
|
|
|
|
const html = `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<title>Test</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
`
|
|
|
|
|
|
|
|
setMockCrawlResult(html)
|
|
|
|
|
2020-07-17 23:11:07 +02:00
|
|
|
const icsContent = await retrieveICS('123', { logger, crawl: mockCrawl })
|
2020-07-16 16:48:05 +02:00
|
|
|
expect(icsContent).to.include('URL:https://mobile.facebook.com/events/123')
|
|
|
|
})
|
2020-07-16 16:37:20 +02:00
|
|
|
})
|
2020-07-17 23:11:07 +02:00
|
|
|
|