handle irrelevant hashtags

This commit is contained in:
wryk 2020-03-06 14:38:44 +01:00
parent 82f382099e
commit 1f866e370d
2 changed files with 70 additions and 45 deletions

View File

@ -1,3 +1,7 @@
import getUrls from 'get-urls'
import { asyncMap, execPipe, map, findOr } from 'iter-tools'
import { mapNullable } from '/services/misc.js'
const LINK_RE = /<(.+?)>; rel="(\w+)"/gi const LINK_RE = /<(.+?)>; rel="(\w+)"/gi
function parseLinkHeader(linkHeader) { function parseLinkHeader(linkHeader) {
@ -15,7 +19,11 @@ export const fetchStatus = (domain, id) => fetch(`https://${domain}/api/v1/statu
.then(status => processStatus(domain, status)) .then(status => processStatus(domain, status))
export async function* statusIterator({ domain, id }) { export async function* statusIterator({ domain, id }) {
yield await fetchStatus(domain, id) const partialTrack = await fetchStatus(domain, id)
if (partialTrack !== null) {
yield partialTrack
}
} }
export const hashtagStreamingObservable = (domain, hashtag) => { export const hashtagStreamingObservable = (domain, hashtag) => {
@ -63,7 +71,8 @@ export const hashtagsStreamingObservable = (domain, hashtags) => {
}) })
} }
export async function* hashtagTimelineIterator (domain, hashtag) {
export async function* hashtagTimelineStatusesIterator (domain, hashtag) {
let nextLink = `https://${domain}/api/v1/timelines/tag/${hashtag}?limit=40` let nextLink = `https://${domain}/api/v1/timelines/tag/${hashtag}?limit=40`
while (nextLink) { while (nextLink) {
@ -73,14 +82,30 @@ export async function* hashtagTimelineIterator (domain, hashtag) {
? parseLinkHeader(response.headers.get('link')).get('next') ? parseLinkHeader(response.headers.get('link')).get('next')
: null : null
const statuses = await response.json() yield* await response.json()
console.log(`Timeline ${domain} #${hashtag} : fetched ${statuses.length} statuses`)
yield* statuses.map(status => processStatus(domain, status))
} }
} }
export const hashtagTimelineIterator = (domain, hashtag) => execPipe(
hashtagTimelineStatusesIterator(domain, hashtag),
asyncMap(status => processStatus(domain, status)),
async function* (xs) {
let c = 0
for await (const x of xs) {
if (x === null) {
if (++c > 69) {
console.log(`Not found any viable media on #${hashtag}.`)
break
}
} else {
c = 0
yield x
}
}
}
)
export async function* hashtagsTimelineIterator (domain, hashtags) { export async function* hashtagsTimelineIterator (domain, hashtags) {
const iterators = hashtags.map(hashtag => hashtagTimelineIterator(domain, hashtag)) const iterators = hashtags.map(hashtag => hashtagTimelineIterator(domain, hashtag))
const promises = iterators.map(iterator => iterator.next()) const promises = iterators.map(iterator => iterator.next())
@ -91,7 +116,7 @@ export async function* hashtagsTimelineIterator (domain, hashtags) {
.filter(({ result }) => !result.done) .filter(({ result }) => !result.done)
if (results.length > 0) { if (results.length > 0) {
const sorted = results.sort((a, b) => b.result.value.date - a.result.value.date) const sorted = results.sort((a, b) => b.result.value.referer.date - a.result.value.referrer.date)
const { index, result: { value } } = sorted[0] const { index, result: { value } } = sorted[0]
promises[index] = iterators[index].next() promises[index] = iterators[index].next()
@ -134,11 +159,30 @@ export async function* hashtagsIterator(domain, hashtags) {
} }
} }
const processStatus = (domain, status) => ({ const processStatus = (domain, status) => mapNullable(findMedia(status), partialMedia => ({
username: status.account.username, referer: {
content: status.content, username: status.account.username,
date: new Date(status.created_at), content: status.content,
url: status.url, date: new Date(status.created_at),
credentials: { type: 'mastodon', domain, id: status.id } url: status.url,
}) credentials: { type: 'mastodon', domain, id: status.id }
},
partialMedia
}))
const findMedia = status => execPipe(
status.content,
getUrls,
map(url => {
const { hostname, pathname, searchParams } = new URL(url)
if (['youtube.com', 'm.youtube.com', 'music.youtube.com'].includes(hostname) && searchParams.has('v')) {
return { url, credentials: { type: 'youtube', id: searchParams.get('v') } }
} else if (hostname === 'youtu.be') {
return { url, credentials: { type: 'youtube', id: pathname.substring(1) } }
} else {
return null
}
}),
findOr(null, x => x !== null)
)

View File

@ -1,11 +1,13 @@
import getUrls from 'get-urls' import { execPipe, asyncFilter, asyncMap } from 'iter-tools'
import { execPipe, asyncFilter, asyncMap, map, take, filter, asyncFlatMap, toArray } from 'iter-tools'
import { share } from '/routes.js' import { share } from '/routes.js'
const intersection = (xs, ys) => xs.filter(x => ys.includes(x)) const intersection = (xs, ys) => xs.filter(x => ys.includes(x))
const difference = (xs, ys) => xs.filter(x => !ys.includes(x)) const difference = (xs, ys) => xs.filter(x => !ys.includes(x))
const symmetricDifference = (xs, ys) => [...difference(xs, ys), ...difference(ys, xs)] const symmetricDifference = (xs, ys) => [...difference(xs, ys), ...difference(ys, xs)]
export const mapNullable = (nullable, f) => nullable === null ? nullable : f(nullable)
export const tap = f => x => { export const tap = f => x => {
f(x) f(x)
return x return x
@ -62,40 +64,19 @@ export async function* tracksIterator(refererGenerator, cache) {
try { try {
yield* execPipe( yield* execPipe(
refererGenerator, refererGenerator,
asyncFilter(({ credentials: { domain, id } }) => notKnow(['referer', 'mastodon', domain, id])), asyncFilter(({ referer: { credentials: { domain, id } } }) => notKnow(['referer', 'mastodon', domain, id])),
asyncFlatMap(referer => { asyncFilter(({ partialMedia: { credentials: { id } } }) => notKnow(['media', 'youtube', id])),
return execPipe( asyncMap(async ({ referer, partialMedia }) => {
referer.content, const metadata = await fetchMetadata(partialMedia.credentials)
getUrls,
map(url => {
const { hostname, pathname, searchParams } = new URL(url)
if (['youtube.com', 'm.youtube.com', 'music.youtube.com'].includes(hostname) && searchParams.has('v')) {
return { url, credentials: { type: 'youtube', id: searchParams.get('v') } }
} else if (hostname === 'youtu.be') {
return { url, credentials: { type: 'youtube', id: pathname.substring(1) } }
} else {
return null
}
}),
filter(media => media !== null),
map(({ url, credentials }) => ({ referer, mediaUrl: url, mediaCredentials: credentials })),
take(1),
toArray
)
}),
asyncFilter(({ mediaCredentials: { id }}) => notKnow(['media', 'youtube', id])),
asyncMap(async ({ referer, mediaUrl, mediaCredentials }) => {
const metadata = await fetchMetadata(mediaCredentials)
return { return {
shareUrl: `${location.origin}${share.reverse({ domain: referer.credentials.domain, id: referer.credentials.id })}`, shareUrl: `${location.origin}${share.reverse({ domain: referer.credentials.domain, id: referer.credentials.id })}`,
referer, referer,
media: { media: {
title: metadata.title, title: metadata.title,
url: mediaUrl, url: partialMedia.url,
cover: `https://img.youtube.com/vi/${mediaCredentials.id}/mqdefault.jpg`, cover: `https://img.youtube.com/vi/${partialMedia.credentials.id}/mqdefault.jpg`,
credentials: mediaCredentials credentials: partialMedia.credentials
} }
} }
}) })