First release
Addition of the CLI for the generation of podcast feeds of Ad Alta Voce. This first version includes the commands: - `single` generate a podcast feed of the given audiobook - `all` generate a podcast feed for all audiobooks
This commit is contained in:
parent
021a99cfcc
commit
df30fbed23
48
README.md
48
README.md
|
@ -1,3 +1,51 @@
|
||||||
# Ad Alta Voce
|
# Ad Alta Voce
|
||||||
|
|
||||||
Script per la generazione dei Feed Podcast di [Ad Alta Voce](https://www.raiplayradio.it/programmi/adaltavoce/).
|
Script per la generazione dei Feed Podcast di [Ad Alta Voce](https://www.raiplayradio.it/programmi/adaltavoce/).
|
||||||
|
|
||||||
|
## Utilizzo
|
||||||
|
|
||||||
|
### Installazione
|
||||||
|
|
||||||
|
Una volta scaricato il repository è possibile installare il programma tramite `stack` attraverso il seguente comando:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
stack install
|
||||||
|
```
|
||||||
|
|
||||||
|
L'eseguibile può essere chiamato attraverso il comando `loud`.
|
||||||
|
|
||||||
|
### Generazione di un singolo podcast
|
||||||
|
|
||||||
|
È possibile generare il feed podcast di un audiobook tramite il seguente comando:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
loud single <audiobook-url>
|
||||||
|
```
|
||||||
|
|
||||||
|
Una lista di tutti gli audiolibri di Ad Alta Voce può essere recuperata al seguente [link](https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte/).
|
||||||
|
|
||||||
|
### Generazione di tutti i podcast
|
||||||
|
|
||||||
|
È possibile generare in automatico tutti i feed podcast degli audiobook attraverso il seguente comando:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
loud all
|
||||||
|
```
|
||||||
|
|
||||||
|
### Help page
|
||||||
|
|
||||||
|
È possibile visualizzare le opzioni agiunti attraverso la *help page* richiamabile attraverso il seguente comando:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
loud --help
|
||||||
|
```
|
||||||
|
|
||||||
|
## Utilizzo tramite stack
|
||||||
|
|
||||||
|
Alternativamente è possibile utilizzare il `loud` attraverso `stack` senza doverlo installare.
|
||||||
|
In questo caso i comandi sono:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
stack run -- single <audiobook-url>
|
||||||
|
stack run -- all
|
||||||
|
```
|
||||||
|
|
|
@ -5,36 +5,51 @@ cabal-version: 1.12
|
||||||
-- see: https://github.com/sol/hpack
|
-- see: https://github.com/sol/hpack
|
||||||
|
|
||||||
name: ad-alta-voce
|
name: ad-alta-voce
|
||||||
version: 0.1.0.0
|
version: 0.0.0.1
|
||||||
description: Please see the README on GitHub at <https://github.com/githubuser/ad-alta-voce#readme>
|
description: Script per la generazione dei Feed Podcast di Ad Alta Voce
|
||||||
homepage: https://github.com/githubuser/ad-alta-voce#readme
|
homepage: https://github.com/norangebit/ad-alta-voce#readme
|
||||||
bug-reports: https://github.com/githubuser/ad-alta-voce/issues
|
bug-reports: https://github.com/norangebit/ad-alta-voce/issues
|
||||||
author: Author name here
|
author: Raffaele Mignone
|
||||||
maintainer: example@example.com
|
maintainer: git@norangeb.it
|
||||||
copyright: 2021 Author name here
|
copyright: 2021 norangebit
|
||||||
license: BSD3
|
license: GPL3
|
||||||
license-file: LICENSE
|
license-file: LICENSE
|
||||||
build-type: Simple
|
build-type: Simple
|
||||||
extra-source-files:
|
extra-source-files:
|
||||||
README.md
|
README.md
|
||||||
ChangeLog.md
|
ChangeLog.md
|
||||||
|
data-files:
|
||||||
|
templates/podcast.mustache
|
||||||
|
data-dir: data
|
||||||
|
|
||||||
source-repository head
|
source-repository head
|
||||||
type: git
|
type: git
|
||||||
location: https://github.com/githubuser/ad-alta-voce
|
location: https://github.com/norangebit/ad-alta-voce
|
||||||
|
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
Lib
|
Command.All
|
||||||
|
Command.CLI
|
||||||
|
Command.Single
|
||||||
|
Scraper.Audiobook
|
||||||
|
Scraper.Playlist
|
||||||
|
Types
|
||||||
other-modules:
|
other-modules:
|
||||||
Paths_ad_alta_voce
|
Paths_ad_alta_voce
|
||||||
hs-source-dirs:
|
hs-source-dirs:
|
||||||
src
|
src
|
||||||
build-depends:
|
build-depends:
|
||||||
base >=4.7 && <5
|
base >=4.7 && <5
|
||||||
|
, directory
|
||||||
|
, mustache
|
||||||
|
, optparse-applicative
|
||||||
|
, parsec
|
||||||
|
, scalpel
|
||||||
|
, text
|
||||||
|
, time
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable ad-alta-voce-exe
|
executable loud
|
||||||
main-is: Main.hs
|
main-is: Main.hs
|
||||||
other-modules:
|
other-modules:
|
||||||
Paths_ad_alta_voce
|
Paths_ad_alta_voce
|
||||||
|
@ -44,6 +59,13 @@ executable ad-alta-voce-exe
|
||||||
build-depends:
|
build-depends:
|
||||||
ad-alta-voce
|
ad-alta-voce
|
||||||
, base >=4.7 && <5
|
, base >=4.7 && <5
|
||||||
|
, directory
|
||||||
|
, mustache
|
||||||
|
, optparse-applicative
|
||||||
|
, parsec
|
||||||
|
, scalpel
|
||||||
|
, text
|
||||||
|
, time
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
test-suite ad-alta-voce-test
|
test-suite ad-alta-voce-test
|
||||||
|
@ -57,4 +79,11 @@ test-suite ad-alta-voce-test
|
||||||
build-depends:
|
build-depends:
|
||||||
ad-alta-voce
|
ad-alta-voce
|
||||||
, base >=4.7 && <5
|
, base >=4.7 && <5
|
||||||
|
, directory
|
||||||
|
, mustache
|
||||||
|
, optparse-applicative
|
||||||
|
, parsec
|
||||||
|
, scalpel
|
||||||
|
, text
|
||||||
|
, time
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
15
app/Main.hs
15
app/Main.hs
|
@ -1,6 +1,17 @@
|
||||||
|
{-|
|
||||||
|
Module : Main
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
-}
|
||||||
|
|
||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
import Lib
|
import Options.Applicative ( execParser )
|
||||||
|
import Command.CLI ( commandParserInfo, execute )
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = someFunc
|
main = do
|
||||||
|
cliCommand <- execParser commandParserInfo
|
||||||
|
execute cliCommand
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>{{audiobook-title}}</title>
|
||||||
|
<link>{{base-url}}</link>
|
||||||
|
<image>
|
||||||
|
<url>https://www.raiplayradio.it/{{audiobook-cover-url}}</url>
|
||||||
|
<title>{{audiobook-cover-title}}</title>
|
||||||
|
<link>{{base-url}}</link>
|
||||||
|
</image>
|
||||||
|
<description>{{audiobook-description}}</description>
|
||||||
|
<language>it-it</language>
|
||||||
|
<lastBuildDate>{{pub-day}}</lastBuildDate>
|
||||||
|
<itunes:author>Ad Alta Voce - Rai Radio 3</itunes:author>
|
||||||
|
<itunes:summary>{{audiobook-description}}</itunes:summary>
|
||||||
|
<itunes:subtitle>Ad Alta Voce</itunes:subtitle>
|
||||||
|
<itunes:explicit>No</itunes:explicit>
|
||||||
|
<itunes:image href="https://www.raiplayradio.it/{{audiobook-cover-url}}"/>
|
||||||
|
<pubDate>{{pub-day}}</pubDate>
|
||||||
|
{{#episodes}}
|
||||||
|
<item>
|
||||||
|
<title>{{episode-title}}</title>
|
||||||
|
<link>https://www.raiplayradio.it/{{episode-url}}</link>
|
||||||
|
<enclosure url="{{episode-track-url}}" type="audio/mpeg"/>
|
||||||
|
<guid>{{episode-track-url}}</guid>
|
||||||
|
<itunes:duration>{{episode-duration}}</itunes:duration>
|
||||||
|
</item>
|
||||||
|
{{/episodes}}
|
||||||
|
</channel>
|
||||||
|
</rss>
|
15
package.yaml
15
package.yaml
|
@ -1,5 +1,5 @@
|
||||||
name: ad-alta-voce
|
name: ad-alta-voce
|
||||||
version: 0.0.0.0
|
version: 0.0.0.1
|
||||||
github: "norangebit/ad-alta-voce"
|
github: "norangebit/ad-alta-voce"
|
||||||
license: GPL3
|
license: GPL3
|
||||||
author: "Raffaele Mignone"
|
author: "Raffaele Mignone"
|
||||||
|
@ -10,6 +10,10 @@ extra-source-files:
|
||||||
- README.md
|
- README.md
|
||||||
- ChangeLog.md
|
- ChangeLog.md
|
||||||
|
|
||||||
|
data-dir: data
|
||||||
|
data-files:
|
||||||
|
- templates/podcast.mustache
|
||||||
|
|
||||||
# Metadata used when publishing your package
|
# Metadata used when publishing your package
|
||||||
# synopsis: Short description of your package
|
# synopsis: Short description of your package
|
||||||
# category: Web
|
# category: Web
|
||||||
|
@ -21,12 +25,19 @@ description: Script per la generazione dei Feed Podcast di Ad Alta Voce
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- base >= 4.7 && < 5
|
- base >= 4.7 && < 5
|
||||||
|
- directory
|
||||||
|
- text
|
||||||
|
- time
|
||||||
|
- scalpel
|
||||||
|
- mustache
|
||||||
|
- parsec
|
||||||
|
- optparse-applicative
|
||||||
|
|
||||||
library:
|
library:
|
||||||
source-dirs: src
|
source-dirs: src
|
||||||
|
|
||||||
executables:
|
executables:
|
||||||
ad-alta-voce-exe:
|
loud:
|
||||||
main: Main.hs
|
main: Main.hs
|
||||||
source-dirs: app
|
source-dirs: app
|
||||||
ghc-options:
|
ghc-options:
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
{-|
|
||||||
|
Module : Command.All
|
||||||
|
Description : Generate podcast for all audiobooks
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
|
||||||
|
This module exposes the command that generates podcast feeds for all the
|
||||||
|
audiobooks in Ad Alta Voce library.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Command.All(generateAll) where
|
||||||
|
|
||||||
|
import Control.Monad ( join )
|
||||||
|
import Data.Maybe ( catMaybes )
|
||||||
|
import Text.HTML.Scalpel ( scrapeURL, URL )
|
||||||
|
import Command.Single ( single )
|
||||||
|
import Scraper.Playlist
|
||||||
|
( playlistPageNumbersScraper, playlistsUrlScraper )
|
||||||
|
|
||||||
|
baseUrl = "https://www.raiplayradio.it"
|
||||||
|
playlistBaseUrl = "https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte/"
|
||||||
|
|
||||||
|
scrapeAudiobooksUrl :: IO (Maybe [URL])
|
||||||
|
scrapeAudiobooksUrl = do
|
||||||
|
pageNumbers <- scrapeURL playlistBaseUrl playlistPageNumbersScraper
|
||||||
|
case scrapePlaylistPages pageNumbers of
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just urls -> Just <$> urls
|
||||||
|
|
||||||
|
scrapePlaylistPages :: Maybe [String] -> Maybe (IO [URL])
|
||||||
|
scrapePlaylistPages pageNumbers = do
|
||||||
|
pageNumbers' <- pageNumbers
|
||||||
|
let playlistUrls = map (playlistBaseUrl ++) pageNumbers'
|
||||||
|
audiobookUrls = mapM (`scrapeURL` playlistsUrlScraper) playlistUrls
|
||||||
|
flatAudiobookUrls = join . catMaybes <$> audiobookUrls
|
||||||
|
return $ map (baseUrl ++) <$> flatAudiobookUrls
|
||||||
|
|
||||||
|
generateAll :: String -> IO ()
|
||||||
|
generateAll outdir = do
|
||||||
|
urls <- scrapeAudiobooksUrl
|
||||||
|
case urls of
|
||||||
|
Nothing -> putStrLn "Error"
|
||||||
|
Just urls' -> do
|
||||||
|
mapM_ (`single` outdir) urls'
|
||||||
|
putStrLn "All done.\nEnjoy your books!"
|
|
@ -0,0 +1,67 @@
|
||||||
|
{-|
|
||||||
|
Module : Command.CLI
|
||||||
|
Description : Define CLI interface
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
|
||||||
|
This module exposes the CLI of the program.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Command.CLI where
|
||||||
|
|
||||||
|
import Options.Applicative
|
||||||
|
import Options.Applicative.Builder
|
||||||
|
import Options.Applicative.Types ( ParserInfo, Parser )
|
||||||
|
import Command.All ( generateAll )
|
||||||
|
import Command.Single ( single )
|
||||||
|
|
||||||
|
newtype AllOption = AllOption { outputDirectoryAll :: String}
|
||||||
|
|
||||||
|
data SingleOption = SingleOption { audiobookUrl :: String
|
||||||
|
, outputDirectorySingle :: String }
|
||||||
|
|
||||||
|
data Command = All AllOption| Single SingleOption
|
||||||
|
|
||||||
|
singleParser :: Parser SingleOption
|
||||||
|
singleParser = SingleOption
|
||||||
|
<$> argument str (metavar "URL"
|
||||||
|
<> help "Audiobook url")
|
||||||
|
<*> strOption (long "output"
|
||||||
|
<> short 'o'
|
||||||
|
<> metavar "DIRECTORY"
|
||||||
|
<> help "Directory where save the podcast"
|
||||||
|
<> value "out"
|
||||||
|
<> showDefault)
|
||||||
|
|
||||||
|
allParser :: Parser AllOption
|
||||||
|
allParser = AllOption
|
||||||
|
<$> strOption (long "output"
|
||||||
|
<> short 'o'
|
||||||
|
<> metavar "DIRECTORY"
|
||||||
|
<> help "Directory where save the podcasts"
|
||||||
|
<> value "out"
|
||||||
|
<> showDefault)
|
||||||
|
|
||||||
|
singleParserInfo :: ParserInfo Command
|
||||||
|
singleParserInfo = Single
|
||||||
|
<$> info (singleParser <**> helper)
|
||||||
|
(progDesc "Generate podcast of the given Ad Alta Voce url")
|
||||||
|
|
||||||
|
allParserInfo :: ParserInfo Command
|
||||||
|
allParserInfo = All
|
||||||
|
<$> info (allParser <**> helper)
|
||||||
|
(progDesc "Generate podcast for all Ad Alta Voce audioboks")
|
||||||
|
|
||||||
|
commandParser :: Parser Command
|
||||||
|
commandParser = subparser (
|
||||||
|
command "single" singleParserInfo
|
||||||
|
<> command "all" allParserInfo)
|
||||||
|
|
||||||
|
commandParserInfo :: ParserInfo Command
|
||||||
|
commandParserInfo = info (commandParser <**> helper)
|
||||||
|
(progDesc "Generate podcast of Ad Alta Voce audiobook")
|
||||||
|
|
||||||
|
execute :: Command -> IO ()
|
||||||
|
execute (Single opt) = single (audiobookUrl opt) (outputDirectorySingle opt)
|
||||||
|
execute (All opt) = generateAll $ outputDirectoryAll opt
|
|
@ -0,0 +1,58 @@
|
||||||
|
{-|
|
||||||
|
Module : Command.Single
|
||||||
|
Description : Generate podcast for an audiobooks
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
|
||||||
|
This module exposes the command that generates podcast feed for an audiobooks
|
||||||
|
in Ad Alta Voce library.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Command.Single(single) where
|
||||||
|
|
||||||
|
import Data.Text (unpack)
|
||||||
|
import Data.Time.Clock ( UTCTime(utctDay), getCurrentTime )
|
||||||
|
import Text.HTML.Scalpel ( scrapeURL, URL )
|
||||||
|
import Text.Mustache
|
||||||
|
import Text.Parsec.Error ( ParseError )
|
||||||
|
import System.Directory ( createDirectoryIfMissing )
|
||||||
|
import System.IO
|
||||||
|
import Paths_ad_alta_voce ( getDataFileName )
|
||||||
|
import Scraper.Audiobook ( audiobookScraper )
|
||||||
|
import Types
|
||||||
|
|
||||||
|
compilePodcastTemplate :: IO (Either ParseError Template)
|
||||||
|
compilePodcastTemplate = do
|
||||||
|
templateDir <- getDataFileName "templates"
|
||||||
|
automaticCompile [templateDir] templateName
|
||||||
|
where
|
||||||
|
templateName = "podcast.mustache"
|
||||||
|
|
||||||
|
scrapeAudiobook :: URL -> IO (Maybe Audiobook)
|
||||||
|
scrapeAudiobook url = scrapeURL url audiobookScraper
|
||||||
|
|
||||||
|
writePodcastTemplate :: Either ParseError Template -> Maybe Podcast -> String -> IO ()
|
||||||
|
writePodcastTemplate (Left err) _ _ = print err
|
||||||
|
writePodcastTemplate _ Nothing _ = putStrLn "Error during audiobook parsing"
|
||||||
|
writePodcastTemplate (Right template) (Just podcast) outdir = do
|
||||||
|
createDirectoryIfMissing True outdir
|
||||||
|
withFile fileName WriteMode (\handle -> do
|
||||||
|
hPutStr handle $ unpack xmlPodcast
|
||||||
|
putStrLn output)
|
||||||
|
where
|
||||||
|
xmlPodcast = substitute template podcast
|
||||||
|
title = audiobookTitle $ audiobook podcast
|
||||||
|
fileName = outdir ++ "/" ++ generatePodcastFileName podcast
|
||||||
|
output = title ++ " done!"
|
||||||
|
|
||||||
|
single :: String -> String -> IO ()
|
||||||
|
single url outdir = do
|
||||||
|
day <- utctDay <$> getCurrentTime
|
||||||
|
audiobook <- scrapeAudiobook url
|
||||||
|
compiled <- compilePodcastTemplate
|
||||||
|
|
||||||
|
let podcast = generatePodcast day url <$> audiobook
|
||||||
|
|
||||||
|
writePodcastTemplate compiled podcast outdir
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
module Lib
|
|
||||||
( someFunc
|
|
||||||
) where
|
|
||||||
|
|
||||||
someFunc :: IO ()
|
|
||||||
someFunc = putStrLn "someFunc"
|
|
|
@ -0,0 +1,78 @@
|
||||||
|
{-|
|
||||||
|
Module : Scraper.Audiobook
|
||||||
|
Description : Scrape audiobook information from Rai Play Radio
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
|
||||||
|
This module contains all selectors and scrapers needed to retrieve the
|
||||||
|
information from the audiobook.
|
||||||
|
An example of a web page that can be scraped is available at the following
|
||||||
|
<https://www.raiplayradio.it/playlist/2017/12/Arancia-Meccanica-9d191ecb-23df-43fe-8e97-e423707f7de3.html link>
|
||||||
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Scraper.Audiobook(audiobookScraper) where
|
||||||
|
|
||||||
|
import Text.HTML.Scalpel
|
||||||
|
import Types ( Audiobook(Audiobook), Episode(Episode) )
|
||||||
|
|
||||||
|
audiobookHeaderSelector :: Selector
|
||||||
|
audiobookHeaderSelector = "div" @: [hasClass "descriptionProgramma"]
|
||||||
|
|
||||||
|
audiobookTitleScraper :: Scraper String String
|
||||||
|
audiobookTitleScraper = text
|
||||||
|
$ audiobookHeaderSelector
|
||||||
|
// "h2"
|
||||||
|
|
||||||
|
audiobookDescriptionScraper :: Scraper String String
|
||||||
|
audiobookDescriptionScraper = text
|
||||||
|
$ audiobookHeaderSelector
|
||||||
|
// "span" @: [hasClass "textDescriptionProgramma"]
|
||||||
|
|
||||||
|
audiobookCoverUrlScraper :: Scraper String String
|
||||||
|
audiobookCoverUrlScraper = attr "src"
|
||||||
|
$ "img" @: [hasClass "imgHomeProgramma"]
|
||||||
|
|
||||||
|
episodesSelector :: Selector
|
||||||
|
episodesSelector = "ol" @: [hasClass "elencoPlaylist"]
|
||||||
|
// "li"
|
||||||
|
|
||||||
|
episodeTitleScraper :: Scraper String String
|
||||||
|
episodeTitleScraper = text "h2"
|
||||||
|
|
||||||
|
episodeTrackUrlScraper :: Scraper String String
|
||||||
|
episodeTrackUrlScraper = attr "data-mediapolis" "li"
|
||||||
|
|
||||||
|
episodeCoverUrlScraper :: Scraper String String
|
||||||
|
episodeCoverUrlScraper = attr "data-image" "li"
|
||||||
|
|
||||||
|
episodeUrlScraper :: Scraper String String
|
||||||
|
episodeUrlScraper = attr "data-href" "li"
|
||||||
|
|
||||||
|
episodeDurationScraper :: Scraper String String
|
||||||
|
episodeDurationScraper = text
|
||||||
|
$ "span" @: [hasClass "timePlaylist"]
|
||||||
|
|
||||||
|
episodeScraper :: Scraper String Episode
|
||||||
|
episodeScraper = do
|
||||||
|
url <- episodeUrlScraper
|
||||||
|
title <- episodeTitleScraper
|
||||||
|
trackUrl <- episodeTrackUrlScraper
|
||||||
|
episodeCover <- episodeCoverUrlScraper
|
||||||
|
duration <- episodeDurationScraper
|
||||||
|
return $ Episode url title trackUrl episodeCover duration
|
||||||
|
|
||||||
|
episodesListScraper :: Scraper String [Episode]
|
||||||
|
episodesListScraper = chroots episodesSelector episodeScraper
|
||||||
|
|
||||||
|
-- |The 'audiobookScraper' function defines the scraper that retrive the
|
||||||
|
-- 'Audiobook' from the given web page.
|
||||||
|
audiobookScraper :: Scraper String Audiobook
|
||||||
|
audiobookScraper = do
|
||||||
|
title <- audiobookTitleScraper
|
||||||
|
description <- audiobookDescriptionScraper
|
||||||
|
coverUrl <- audiobookCoverUrlScraper
|
||||||
|
episodes <- episodesListScraper
|
||||||
|
return $ Audiobook title description coverUrl episodes
|
|
@ -0,0 +1,45 @@
|
||||||
|
{-|
|
||||||
|
Module : Scraper.Playlist
|
||||||
|
Description : Scrape audiobook link from Rai Play Radio
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
|
||||||
|
This module contains all selectors and scrapers needed to retrieve the
|
||||||
|
links to the audiobooks.
|
||||||
|
An example of a web page that can be scraped is available at the following
|
||||||
|
<https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte link>
|
||||||
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Scraper.Playlist
|
||||||
|
( playlistsUrlScraper
|
||||||
|
, playlistPageNumbersScraper
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Text.HTML.Scalpel
|
||||||
|
|
||||||
|
playlistSelector :: Selector
|
||||||
|
playlistSelector = "div" @: [hasClass "bloccoPlaylist"]
|
||||||
|
|
||||||
|
playlistUrlScraper :: Scraper String String
|
||||||
|
playlistUrlScraper = attr "href" "a"
|
||||||
|
|
||||||
|
-- |The 'playlistUrlScraper' function defines the scraper that retrieves all
|
||||||
|
-- audiobooks url cointains in the playlist page.
|
||||||
|
playlistsUrlScraper :: Scraper String [String]
|
||||||
|
playlistsUrlScraper = chroots playlistSelector playlistUrlScraper
|
||||||
|
|
||||||
|
playlistPageNumberSelector :: Selector
|
||||||
|
playlistPageNumberSelector = "ul" @: [hasClass "pagination"]
|
||||||
|
// ("li" @: [notP (hasClass "archivePaginatation")])
|
||||||
|
|
||||||
|
playlistPageNumberScraper :: Scraper String String
|
||||||
|
playlistPageNumberScraper = text "a"
|
||||||
|
|
||||||
|
-- |Audiobooks are listed on multiple pages, 'playlistPageNumbersScraper'
|
||||||
|
-- defines the scraper that retrieves the identifier of all pages that
|
||||||
|
-- contain some audiobooks.
|
||||||
|
playlistPageNumbersScraper :: Scraper String [String ]
|
||||||
|
playlistPageNumbersScraper = chroots playlistPageNumberSelector playlistPageNumberScraper
|
|
@ -0,0 +1,103 @@
|
||||||
|
{-|
|
||||||
|
Module : Types
|
||||||
|
Description : Defines data types
|
||||||
|
Copyright : (c) Raffaele Mignone 2021
|
||||||
|
License : GPL-3
|
||||||
|
Maintainer : git@norangeb.it
|
||||||
|
|
||||||
|
This module defines the types used by the program and the functions to access
|
||||||
|
their fields.
|
||||||
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Types
|
||||||
|
( Audiobook(Audiobook)
|
||||||
|
, Episode(Episode)
|
||||||
|
, Podcast(Podcast)
|
||||||
|
, generatePodcast
|
||||||
|
, episodeUrl
|
||||||
|
, episodeTitle
|
||||||
|
, episodeTrackUrl
|
||||||
|
, episodeCoverUrl
|
||||||
|
, episodeDuration
|
||||||
|
, audiobookTitle
|
||||||
|
, audiobookDescription
|
||||||
|
, audiobookCoverUrl
|
||||||
|
, audiobookEpisodes
|
||||||
|
, audiobook
|
||||||
|
, baseUrl
|
||||||
|
, pubDay
|
||||||
|
, generatePodcastFileName
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Char ( toLower )
|
||||||
|
import Data.Time.Calendar ( Day )
|
||||||
|
import Text.Mustache
|
||||||
|
import Text.Mustache.Types ( Pair )
|
||||||
|
|
||||||
|
-- | The 'Episode' data type represents an episode of the podcast.
|
||||||
|
-- 'Episode' is an istance of 'ToMustache' typeclass.
|
||||||
|
data Episode = Episode { episodeUrl :: String
|
||||||
|
, episodeTitle :: String
|
||||||
|
, episodeTrackUrl :: String
|
||||||
|
, episodeCoverUrl :: String
|
||||||
|
, episodeDuration :: String
|
||||||
|
}
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
-- | The 'Audiobook' data type represents the audiobook of the podcast.
|
||||||
|
-- 'Audiobook' is an istance of 'ToMustache' typeclass.
|
||||||
|
data Audiobook = Audiobook { audiobookTitle :: String
|
||||||
|
, audiobookDescription :: String
|
||||||
|
, audiobookCoverUrl :: String
|
||||||
|
, audiobookEpisodes :: [Episode]
|
||||||
|
}
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
|
||||||
|
-- | The 'Podcast' data type represents the podcast.
|
||||||
|
-- 'Podcast' is an istance of 'ToMustache' typeclass.
|
||||||
|
data Podcast = Podcast { audiobook :: Audiobook
|
||||||
|
, baseUrl :: String
|
||||||
|
, pubDay :: Day
|
||||||
|
}
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
toPairList :: Audiobook -> [Pair]
|
||||||
|
toPairList audiobook =
|
||||||
|
[ "audiobook-title" ~> audiobookTitle audiobook
|
||||||
|
, "audiobook-cover-url" ~> audiobookCoverUrl audiobook
|
||||||
|
, "audiobook-cover-title" ~> audiobookTitle audiobook
|
||||||
|
, "audiobook-description" ~> audiobookDescription audiobook
|
||||||
|
, "episodes" ~> audiobookEpisodes audiobook
|
||||||
|
]
|
||||||
|
|
||||||
|
instance ToMustache Episode where
|
||||||
|
toMustache episode = object
|
||||||
|
[ "episode-title" ~> episodeTitle episode
|
||||||
|
, "episode-url" ~> episodeUrl episode
|
||||||
|
, "episode-track-url" ~> episodeTrackUrl episode
|
||||||
|
, "episode-duration" ~> episodeDuration episode
|
||||||
|
]
|
||||||
|
|
||||||
|
instance ToMustache Audiobook where
|
||||||
|
toMustache = object . toPairList
|
||||||
|
|
||||||
|
instance ToMustache Podcast where
|
||||||
|
toMustache podcast = object $ [
|
||||||
|
"base-url" ~> baseUrl podcast,
|
||||||
|
"pub-day" ~> show (pubDay podcast)
|
||||||
|
] ++ (toPairList (audiobook podcast))
|
||||||
|
|
||||||
|
generatePodcast :: Day -> String -> Audiobook -> Podcast
|
||||||
|
generatePodcast day url audiobook = Podcast audiobook url day
|
||||||
|
|
||||||
|
generatePodcastFileName :: Podcast -> String
|
||||||
|
generatePodcastFileName (Podcast audiobook _ _) = title'' ++ ".xml"
|
||||||
|
where
|
||||||
|
replace :: Eq a => a -> a -> [a] -> [a]
|
||||||
|
replace a b = map $ \c -> if c == a then b else c
|
||||||
|
title = audiobookTitle audiobook
|
||||||
|
title' = map toLower title
|
||||||
|
title'' = replace ' ' '-' title'
|
Loading…
Reference in New Issue