implement same parsing logic on server and frontend

Server now downloads the HTML file via new endpoint but the parsing logic now happens in
the browser. The reason for this is to have a way same code for both
environments.

If the JavaScript is disabled, it's still possible to call the previous
endpoint and download the file from the server.
This commit is contained in:
Ondřej Synáček 2020-07-17 22:20:48 +02:00
parent e2c839fb2f
commit 88daa02117
5 changed files with 97 additions and 30 deletions

View File

@ -4,7 +4,7 @@ const path = require('path')
const favicon = require('serve-favicon')
const rateLimit = require('express-rate-limit')
const retrieveICS = require('./services/ics-retriever')
const { retrieveICS } = require('./services/ics-retriever')
const crawl = require('./services/crawler')
const {
genericErrorHandler,
@ -14,6 +14,7 @@ const {
createErrorLogger,
} = require('./middlewares')
const { createAppLogger } = require('./log-utils')
const { getNormalizedUrl } = require('./utils')
const port = process.env.PORT
const certEndpoint = process.env.CERT_ENDPOINT || ''
@ -76,6 +77,24 @@ app.get('*', (req, res) => {
res.status(400).render('404')
})
app.use('/download/html', limiter)
app.use('/download/html', checkURLParameter)
app.post('/download/html', async (req, res, next) => {
try {
const { url } = req.body
const facebookURL = getNormalizedUrl(url)
const html = await crawl(facebookURL, { logger: appLogger })
res
.contentType('text/html')
.status(200)
.send(Buffer.from(html, 'utf8'))
} catch (err) {
next(err)
}
})
app.use('/download', limiter)
app.use('/download', checkURLParameter)
app.post('/download', async (req, res, next) => {

View File

@ -3,9 +3,7 @@ const parseUsingDOM = require('./dom-parser')
const generateICS = require('./ics-generator')
const { createParserError, getNormalizedUrl } = require('../utils')
const retrieveICS = async (URLparameter, { logger, crawl }) => {
const url = getNormalizedUrl(URLparameter)
const html = await crawl(url, { logger })
const extractEventDataFromHTML = (html, { logger }) => {
const LDJSONEventData = parseUsingLDJSONData(html, { logger })
const rawEventData = LDJSONEventData || parseUsingDOM(html, { logger })
@ -19,8 +17,19 @@ const retrieveICS = async (URLparameter, { logger, crawl }) => {
url: rawEventData.url || url,
}
return eventData
}
const retrieveICS = async (URLparameter, { logger, crawl }) => {
const url = getNormalizedUrl(URLparameter)
const html = await crawl(url, { logger })
const eventData = extractEventDataFromHTML(html, { logger })
const icsContent = await generateICS(eventData)
return icsContent
}
module.exports = retrieveICS
module.exports = {
retrieveICS,
extractEventDataFromHTML,
}

20
lib/static/app/crawler.js Normal file
View File

@ -0,0 +1,20 @@
const crawl = async (url, { logger }) => {
if (logger) {
logger.log({
message: `Crawl started for url: ${url}`,
level: 'info',
service: 'parser',
})
}
return new Promise((resolve, reject) => {
fetch(url, {
method: 'GET',
}).then((response) => {
console.log(response)
resolve()
}).catch(reject)
})
}
export default crawl

View File

@ -7,6 +7,9 @@ import {
saveRecord,
deleteRecord,
} from './app/storage'
import logger from './app/logger'
import { extractEventDataFromHTML } from '../../lib/services/ics-retriever'
import generateICS from '../../lib/services/ics-generator'
(() => {
if (!window.fetch || !window.Promise || !window.URLSearchParams || !window.crypto) {
@ -168,10 +171,10 @@ import {
const postURL = (data) => {
return new Promise((resolve, reject) => {
fetch('/download', {
fetch('/download/html', {
method: 'POST',
headers: {
'Accept': 'text/calendar, application/json',
'Accept': 'text/html, application/json',
'Content-Type': 'application/x-www-form-urlencoded',
},
body: data,
@ -254,6 +257,43 @@ import {
localStorage.setItem('fb-to-ical-nojs', event.target.checked)
})
const handleHTMLResponse = (html) => {
try {
setStatusParsing()
const eventData = extractEventDataFromHTML(html, { logger })
generateICS(eventData)
.then((text) => {
const dataUri = encodeURIComponent(text)
const uri = `data:text/calendar;charset=utf-8,${dataUri}`
link.setAttribute('href', uri)
link.setAttribute('download', 'download.ics')
link.click()
input.value = ''
const summaryMatch = text.match(/SUMMARY:.*/)[0]
const summary = summaryMatch ? summaryMatch.replace(/SUMMARY:/, '') : ''
const startTimeMatches = text.match(/DTSTART:.*/)
const startTimeMatch = text.length > 0 ?
(startTimeMatches[0] || '').replace(/DTSTART:/, '') :
''
const startTime = parseStartTimeFromiCalString(startTimeMatch)
createRecord(uri, summary, startTime)
clearStatuses()
})
.catch((err) => {
handleError(err)
})
} catch (err) {
handleError(err)
}
}
submitButton.addEventListener('click', (event) => {
if (noJS()) {
return
@ -273,28 +313,7 @@ import {
postURL(formData)
.then((res) => {
res.text()
.then((text) => {
setStatusParsing()
const dataUri = encodeURIComponent(text)
const uri = `data:text/calendar;charset=utf-8,${dataUri}`
link.setAttribute('href', uri)
link.setAttribute('download', 'download.ics')
link.click()
input.value = ''
const summaryMatch = text.match(/SUMMARY:.*/)[0]
const summary = summaryMatch ? summaryMatch.replace(/SUMMARY:/, '') : ''
const startTimeMatches = text.match(/DTSTART:.*/)
const startTimeMatch = text.length > 0 ?
(startTimeMatches[0] || '').replace(/DTSTART:/, '') :
''
const startTime = parseStartTimeFromiCalString(startTimeMatch)
createRecord(uri, summary, startTime)
clearStatuses()
})
.then(handleHTMLResponse)
.catch((err) => {
handleError(err)
})

View File

@ -12,7 +12,7 @@ const {
clearMockCrawlResult,
} = require('../../mocks/crawler.mock')
const retrieveICS = require('../../lib/services/ics-retriever')
const { retrieveICS } = require('../../lib/services/ics-retriever')
jest.mock('../../lib/services/crawler', () => mockCrawl)