handle irrelevant hashtags

This commit is contained in:
wryk 2020-03-06 14:38:44 +01:00
parent 82f382099e
commit 1f866e370d
2 changed files with 70 additions and 45 deletions

View File

@ -1,3 +1,7 @@
import getUrls from 'get-urls'
import { asyncMap, execPipe, map, findOr } from 'iter-tools'
import { mapNullable } from '/services/misc.js'
const LINK_RE = /<(.+?)>; rel="(\w+)"/gi
function parseLinkHeader(linkHeader) {
@ -15,7 +19,11 @@ export const fetchStatus = (domain, id) => fetch(`https://${domain}/api/v1/statu
.then(status => processStatus(domain, status))
export async function* statusIterator({ domain, id }) {
yield await fetchStatus(domain, id)
const partialTrack = await fetchStatus(domain, id)
if (partialTrack !== null) {
yield partialTrack
}
}
export const hashtagStreamingObservable = (domain, hashtag) => {
@ -63,7 +71,8 @@ export const hashtagsStreamingObservable = (domain, hashtags) => {
})
}
export async function* hashtagTimelineIterator (domain, hashtag) {
export async function* hashtagTimelineStatusesIterator (domain, hashtag) {
let nextLink = `https://${domain}/api/v1/timelines/tag/${hashtag}?limit=40`
while (nextLink) {
@ -73,14 +82,30 @@ export async function* hashtagTimelineIterator (domain, hashtag) {
? parseLinkHeader(response.headers.get('link')).get('next')
: null
const statuses = await response.json()
console.log(`Timeline ${domain} #${hashtag} : fetched ${statuses.length} statuses`)
yield* statuses.map(status => processStatus(domain, status))
yield* await response.json()
}
}
export const hashtagTimelineIterator = (domain, hashtag) => execPipe(
hashtagTimelineStatusesIterator(domain, hashtag),
asyncMap(status => processStatus(domain, status)),
async function* (xs) {
let c = 0
for await (const x of xs) {
if (x === null) {
if (++c > 69) {
console.log(`Not found any viable media on #${hashtag}.`)
break
}
} else {
c = 0
yield x
}
}
}
)
export async function* hashtagsTimelineIterator (domain, hashtags) {
const iterators = hashtags.map(hashtag => hashtagTimelineIterator(domain, hashtag))
const promises = iterators.map(iterator => iterator.next())
@ -91,7 +116,7 @@ export async function* hashtagsTimelineIterator (domain, hashtags) {
.filter(({ result }) => !result.done)
if (results.length > 0) {
const sorted = results.sort((a, b) => b.result.value.date - a.result.value.date)
const sorted = results.sort((a, b) => b.result.value.referer.date - a.result.value.referrer.date)
const { index, result: { value } } = sorted[0]
promises[index] = iterators[index].next()
@ -134,11 +159,30 @@ export async function* hashtagsIterator(domain, hashtags) {
}
}
const processStatus = (domain, status) => ({
username: status.account.username,
content: status.content,
date: new Date(status.created_at),
url: status.url,
credentials: { type: 'mastodon', domain, id: status.id }
})
const processStatus = (domain, status) => mapNullable(findMedia(status), partialMedia => ({
referer: {
username: status.account.username,
content: status.content,
date: new Date(status.created_at),
url: status.url,
credentials: { type: 'mastodon', domain, id: status.id }
},
partialMedia
}))
const findMedia = status => execPipe(
status.content,
getUrls,
map(url => {
const { hostname, pathname, searchParams } = new URL(url)
if (['youtube.com', 'm.youtube.com', 'music.youtube.com'].includes(hostname) && searchParams.has('v')) {
return { url, credentials: { type: 'youtube', id: searchParams.get('v') } }
} else if (hostname === 'youtu.be') {
return { url, credentials: { type: 'youtube', id: pathname.substring(1) } }
} else {
return null
}
}),
findOr(null, x => x !== null)
)

View File

@ -1,11 +1,13 @@
import getUrls from 'get-urls'
import { execPipe, asyncFilter, asyncMap, map, take, filter, asyncFlatMap, toArray } from 'iter-tools'
import { execPipe, asyncFilter, asyncMap } from 'iter-tools'
import { share } from '/routes.js'
const intersection = (xs, ys) => xs.filter(x => ys.includes(x))
const difference = (xs, ys) => xs.filter(x => !ys.includes(x))
const symmetricDifference = (xs, ys) => [...difference(xs, ys), ...difference(ys, xs)]
export const mapNullable = (nullable, f) => nullable === null ? nullable : f(nullable)
export const tap = f => x => {
f(x)
return x
@ -62,40 +64,19 @@ export async function* tracksIterator(refererGenerator, cache) {
try {
yield* execPipe(
refererGenerator,
asyncFilter(({ credentials: { domain, id } }) => notKnow(['referer', 'mastodon', domain, id])),
asyncFlatMap(referer => {
return execPipe(
referer.content,
getUrls,
map(url => {
const { hostname, pathname, searchParams } = new URL(url)
if (['youtube.com', 'm.youtube.com', 'music.youtube.com'].includes(hostname) && searchParams.has('v')) {
return { url, credentials: { type: 'youtube', id: searchParams.get('v') } }
} else if (hostname === 'youtu.be') {
return { url, credentials: { type: 'youtube', id: pathname.substring(1) } }
} else {
return null
}
}),
filter(media => media !== null),
map(({ url, credentials }) => ({ referer, mediaUrl: url, mediaCredentials: credentials })),
take(1),
toArray
)
}),
asyncFilter(({ mediaCredentials: { id }}) => notKnow(['media', 'youtube', id])),
asyncMap(async ({ referer, mediaUrl, mediaCredentials }) => {
const metadata = await fetchMetadata(mediaCredentials)
asyncFilter(({ referer: { credentials: { domain, id } } }) => notKnow(['referer', 'mastodon', domain, id])),
asyncFilter(({ partialMedia: { credentials: { id } } }) => notKnow(['media', 'youtube', id])),
asyncMap(async ({ referer, partialMedia }) => {
const metadata = await fetchMetadata(partialMedia.credentials)
return {
shareUrl: `${location.origin}${share.reverse({ domain: referer.credentials.domain, id: referer.credentials.id })}`,
referer,
media: {
title: metadata.title,
url: mediaUrl,
cover: `https://img.youtube.com/vi/${mediaCredentials.id}/mqdefault.jpg`,
credentials: mediaCredentials
url: partialMedia.url,
cover: `https://img.youtube.com/vi/${partialMedia.credentials.id}/mqdefault.jpg`,
credentials: partialMedia.credentials
}
}
})