First release

Addition of the CLI for the generation of podcast feeds of Ad Alta Voce.

This first version includes the commands:

- `single` generate a podcast feed of the given audiobook
- `all` generate a podcast feed for all audiobooks
This commit is contained in:
Raffaele Mignone 2021-01-24 14:26:07 +00:00 committed by norangebit
parent 021a99cfcc
commit df30fbed23
Signed by: norangebit
GPG Key ID: F5255658CB220573
12 changed files with 540 additions and 21 deletions

View File

@ -1,3 +1,51 @@
# Ad Alta Voce
Script per la generazione dei Feed Podcast di [Ad Alta Voce](https://www.raiplayradio.it/programmi/adaltavoce/).
## Utilizzo
### Installazione
Una volta scaricato il repository è possibile installare il programma tramite `stack` attraverso il seguente comando:
```bash
stack install
```
L'eseguibile può essere chiamato attraverso il comando `loud`.
### Generazione di un singolo podcast
È possibile generare il feed podcast di un audiobook tramite il seguente comando:
```bash
loud single <audiobook-url>
```
Una lista di tutti gli audiolibri di Ad Alta Voce può essere recuperata al seguente [link](https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte/).
### Generazione di tutti i podcast
È possibile generare in automatico tutti i feed podcast degli audiobook attraverso il seguente comando:
```bash
loud all
```
### Help page
È possibile visualizzare le opzioni agiunti attraverso la *help page* richiamabile attraverso il seguente comando:
```bash
loud --help
```
## Utilizzo tramite stack
Alternativamente è possibile utilizzare il `loud` attraverso `stack` senza doverlo installare.
In questo caso i comandi sono:
```bash
stack run -- single <audiobook-url>
stack run -- all
```

View File

@ -5,36 +5,51 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: ad-alta-voce
version: 0.1.0.0
description: Please see the README on GitHub at <https://github.com/githubuser/ad-alta-voce#readme>
homepage: https://github.com/githubuser/ad-alta-voce#readme
bug-reports: https://github.com/githubuser/ad-alta-voce/issues
author: Author name here
maintainer: example@example.com
copyright: 2021 Author name here
license: BSD3
version: 0.0.0.1
description: Script per la generazione dei Feed Podcast di Ad Alta Voce
homepage: https://github.com/norangebit/ad-alta-voce#readme
bug-reports: https://github.com/norangebit/ad-alta-voce/issues
author: Raffaele Mignone
maintainer: git@norangeb.it
copyright: 2021 norangebit
license: GPL3
license-file: LICENSE
build-type: Simple
extra-source-files:
README.md
ChangeLog.md
data-files:
templates/podcast.mustache
data-dir: data
source-repository head
type: git
location: https://github.com/githubuser/ad-alta-voce
location: https://github.com/norangebit/ad-alta-voce
library
exposed-modules:
Lib
Command.All
Command.CLI
Command.Single
Scraper.Audiobook
Scraper.Playlist
Types
other-modules:
Paths_ad_alta_voce
hs-source-dirs:
src
build-depends:
base >=4.7 && <5
, directory
, mustache
, optparse-applicative
, parsec
, scalpel
, text
, time
default-language: Haskell2010
executable ad-alta-voce-exe
executable loud
main-is: Main.hs
other-modules:
Paths_ad_alta_voce
@ -44,6 +59,13 @@ executable ad-alta-voce-exe
build-depends:
ad-alta-voce
, base >=4.7 && <5
, directory
, mustache
, optparse-applicative
, parsec
, scalpel
, text
, time
default-language: Haskell2010
test-suite ad-alta-voce-test
@ -57,4 +79,11 @@ test-suite ad-alta-voce-test
build-depends:
ad-alta-voce
, base >=4.7 && <5
, directory
, mustache
, optparse-applicative
, parsec
, scalpel
, text
, time
default-language: Haskell2010

View File

@ -1,6 +1,17 @@
{-|
Module : Main
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
-}
module Main where
import Lib
import Options.Applicative ( execParser )
import Command.CLI ( commandParserInfo, execute )
main :: IO ()
main = someFunc
main = do
cliCommand <- execParser commandParserInfo
execute cliCommand

View File

@ -0,0 +1,29 @@
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<title>{{audiobook-title}}</title>
<link>{{base-url}}</link>
<image>
<url>https://www.raiplayradio.it/{{audiobook-cover-url}}</url>
<title>{{audiobook-cover-title}}</title>
<link>{{base-url}}</link>
</image>
<description>{{audiobook-description}}</description>
<language>it-it</language>
<lastBuildDate>{{pub-day}}</lastBuildDate>
<itunes:author>Ad Alta Voce - Rai Radio 3</itunes:author>
<itunes:summary>{{audiobook-description}}</itunes:summary>
<itunes:subtitle>Ad Alta Voce</itunes:subtitle>
<itunes:explicit>No</itunes:explicit>
<itunes:image href="https://www.raiplayradio.it/{{audiobook-cover-url}}"/>
<pubDate>{{pub-day}}</pubDate>
{{#episodes}}
<item>
<title>{{episode-title}}</title>
<link>https://www.raiplayradio.it/{{episode-url}}</link>
<enclosure url="{{episode-track-url}}" type="audio/mpeg"/>
<guid>{{episode-track-url}}</guid>
<itunes:duration>{{episode-duration}}</itunes:duration>
</item>
{{/episodes}}
</channel>
</rss>

View File

@ -1,5 +1,5 @@
name: ad-alta-voce
version: 0.0.0.0
version: 0.0.0.1
github: "norangebit/ad-alta-voce"
license: GPL3
author: "Raffaele Mignone"
@ -10,6 +10,10 @@ extra-source-files:
- README.md
- ChangeLog.md
data-dir: data
data-files:
- templates/podcast.mustache
# Metadata used when publishing your package
# synopsis: Short description of your package
# category: Web
@ -21,12 +25,19 @@ description: Script per la generazione dei Feed Podcast di Ad Alta Voce
dependencies:
- base >= 4.7 && < 5
- directory
- text
- time
- scalpel
- mustache
- parsec
- optparse-applicative
library:
source-dirs: src
executables:
ad-alta-voce-exe:
loud:
main: Main.hs
source-dirs: app
ghc-options:

46
src/Command/All.hs Normal file
View File

@ -0,0 +1,46 @@
{-|
Module : Command.All
Description : Generate podcast for all audiobooks
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
This module exposes the command that generates podcast feeds for all the
audiobooks in Ad Alta Voce library.
-}
module Command.All(generateAll) where
import Control.Monad ( join )
import Data.Maybe ( catMaybes )
import Text.HTML.Scalpel ( scrapeURL, URL )
import Command.Single ( single )
import Scraper.Playlist
( playlistPageNumbersScraper, playlistsUrlScraper )
baseUrl = "https://www.raiplayradio.it"
playlistBaseUrl = "https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte/"
scrapeAudiobooksUrl :: IO (Maybe [URL])
scrapeAudiobooksUrl = do
pageNumbers <- scrapeURL playlistBaseUrl playlistPageNumbersScraper
case scrapePlaylistPages pageNumbers of
Nothing -> return Nothing
Just urls -> Just <$> urls
scrapePlaylistPages :: Maybe [String] -> Maybe (IO [URL])
scrapePlaylistPages pageNumbers = do
pageNumbers' <- pageNumbers
let playlistUrls = map (playlistBaseUrl ++) pageNumbers'
audiobookUrls = mapM (`scrapeURL` playlistsUrlScraper) playlistUrls
flatAudiobookUrls = join . catMaybes <$> audiobookUrls
return $ map (baseUrl ++) <$> flatAudiobookUrls
generateAll :: String -> IO ()
generateAll outdir = do
urls <- scrapeAudiobooksUrl
case urls of
Nothing -> putStrLn "Error"
Just urls' -> do
mapM_ (`single` outdir) urls'
putStrLn "All done.\nEnjoy your books!"

67
src/Command/CLI.hs Normal file
View File

@ -0,0 +1,67 @@
{-|
Module : Command.CLI
Description : Define CLI interface
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
This module exposes the CLI of the program.
-}
module Command.CLI where
import Options.Applicative
import Options.Applicative.Builder
import Options.Applicative.Types ( ParserInfo, Parser )
import Command.All ( generateAll )
import Command.Single ( single )
newtype AllOption = AllOption { outputDirectoryAll :: String}
data SingleOption = SingleOption { audiobookUrl :: String
, outputDirectorySingle :: String }
data Command = All AllOption| Single SingleOption
singleParser :: Parser SingleOption
singleParser = SingleOption
<$> argument str (metavar "URL"
<> help "Audiobook url")
<*> strOption (long "output"
<> short 'o'
<> metavar "DIRECTORY"
<> help "Directory where save the podcast"
<> value "out"
<> showDefault)
allParser :: Parser AllOption
allParser = AllOption
<$> strOption (long "output"
<> short 'o'
<> metavar "DIRECTORY"
<> help "Directory where save the podcasts"
<> value "out"
<> showDefault)
singleParserInfo :: ParserInfo Command
singleParserInfo = Single
<$> info (singleParser <**> helper)
(progDesc "Generate podcast of the given Ad Alta Voce url")
allParserInfo :: ParserInfo Command
allParserInfo = All
<$> info (allParser <**> helper)
(progDesc "Generate podcast for all Ad Alta Voce audioboks")
commandParser :: Parser Command
commandParser = subparser (
command "single" singleParserInfo
<> command "all" allParserInfo)
commandParserInfo :: ParserInfo Command
commandParserInfo = info (commandParser <**> helper)
(progDesc "Generate podcast of Ad Alta Voce audiobook")
execute :: Command -> IO ()
execute (Single opt) = single (audiobookUrl opt) (outputDirectorySingle opt)
execute (All opt) = generateAll $ outputDirectoryAll opt

58
src/Command/Single.hs Normal file
View File

@ -0,0 +1,58 @@
{-|
Module : Command.Single
Description : Generate podcast for an audiobooks
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
This module exposes the command that generates podcast feed for an audiobooks
in Ad Alta Voce library.
-}
module Command.Single(single) where
import Data.Text (unpack)
import Data.Time.Clock ( UTCTime(utctDay), getCurrentTime )
import Text.HTML.Scalpel ( scrapeURL, URL )
import Text.Mustache
import Text.Parsec.Error ( ParseError )
import System.Directory ( createDirectoryIfMissing )
import System.IO
import Paths_ad_alta_voce ( getDataFileName )
import Scraper.Audiobook ( audiobookScraper )
import Types
compilePodcastTemplate :: IO (Either ParseError Template)
compilePodcastTemplate = do
templateDir <- getDataFileName "templates"
automaticCompile [templateDir] templateName
where
templateName = "podcast.mustache"
scrapeAudiobook :: URL -> IO (Maybe Audiobook)
scrapeAudiobook url = scrapeURL url audiobookScraper
writePodcastTemplate :: Either ParseError Template -> Maybe Podcast -> String -> IO ()
writePodcastTemplate (Left err) _ _ = print err
writePodcastTemplate _ Nothing _ = putStrLn "Error during audiobook parsing"
writePodcastTemplate (Right template) (Just podcast) outdir = do
createDirectoryIfMissing True outdir
withFile fileName WriteMode (\handle -> do
hPutStr handle $ unpack xmlPodcast
putStrLn output)
where
xmlPodcast = substitute template podcast
title = audiobookTitle $ audiobook podcast
fileName = outdir ++ "/" ++ generatePodcastFileName podcast
output = title ++ " done!"
single :: String -> String -> IO ()
single url outdir = do
day <- utctDay <$> getCurrentTime
audiobook <- scrapeAudiobook url
compiled <- compilePodcastTemplate
let podcast = generatePodcast day url <$> audiobook
writePodcastTemplate compiled podcast outdir

View File

@ -1,6 +0,0 @@
module Lib
( someFunc
) where
someFunc :: IO ()
someFunc = putStrLn "someFunc"

78
src/Scraper/Audiobook.hs Normal file
View File

@ -0,0 +1,78 @@
{-|
Module : Scraper.Audiobook
Description : Scrape audiobook information from Rai Play Radio
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
This module contains all selectors and scrapers needed to retrieve the
information from the audiobook.
An example of a web page that can be scraped is available at the following
<https://www.raiplayradio.it/playlist/2017/12/Arancia-Meccanica-9d191ecb-23df-43fe-8e97-e423707f7de3.html link>
-}
{-# LANGUAGE OverloadedStrings #-}
module Scraper.Audiobook(audiobookScraper) where
import Text.HTML.Scalpel
import Types ( Audiobook(Audiobook), Episode(Episode) )
audiobookHeaderSelector :: Selector
audiobookHeaderSelector = "div" @: [hasClass "descriptionProgramma"]
audiobookTitleScraper :: Scraper String String
audiobookTitleScraper = text
$ audiobookHeaderSelector
// "h2"
audiobookDescriptionScraper :: Scraper String String
audiobookDescriptionScraper = text
$ audiobookHeaderSelector
// "span" @: [hasClass "textDescriptionProgramma"]
audiobookCoverUrlScraper :: Scraper String String
audiobookCoverUrlScraper = attr "src"
$ "img" @: [hasClass "imgHomeProgramma"]
episodesSelector :: Selector
episodesSelector = "ol" @: [hasClass "elencoPlaylist"]
// "li"
episodeTitleScraper :: Scraper String String
episodeTitleScraper = text "h2"
episodeTrackUrlScraper :: Scraper String String
episodeTrackUrlScraper = attr "data-mediapolis" "li"
episodeCoverUrlScraper :: Scraper String String
episodeCoverUrlScraper = attr "data-image" "li"
episodeUrlScraper :: Scraper String String
episodeUrlScraper = attr "data-href" "li"
episodeDurationScraper :: Scraper String String
episodeDurationScraper = text
$ "span" @: [hasClass "timePlaylist"]
episodeScraper :: Scraper String Episode
episodeScraper = do
url <- episodeUrlScraper
title <- episodeTitleScraper
trackUrl <- episodeTrackUrlScraper
episodeCover <- episodeCoverUrlScraper
duration <- episodeDurationScraper
return $ Episode url title trackUrl episodeCover duration
episodesListScraper :: Scraper String [Episode]
episodesListScraper = chroots episodesSelector episodeScraper
-- |The 'audiobookScraper' function defines the scraper that retrive the
-- 'Audiobook' from the given web page.
audiobookScraper :: Scraper String Audiobook
audiobookScraper = do
title <- audiobookTitleScraper
description <- audiobookDescriptionScraper
coverUrl <- audiobookCoverUrlScraper
episodes <- episodesListScraper
return $ Audiobook title description coverUrl episodes

45
src/Scraper/Playlist.hs Normal file
View File

@ -0,0 +1,45 @@
{-|
Module : Scraper.Playlist
Description : Scrape audiobook link from Rai Play Radio
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
This module contains all selectors and scrapers needed to retrieve the
links to the audiobooks.
An example of a web page that can be scraped is available at the following
<https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte link>
-}
{-# LANGUAGE OverloadedStrings #-}
module Scraper.Playlist
( playlistsUrlScraper
, playlistPageNumbersScraper
) where
import Text.HTML.Scalpel
playlistSelector :: Selector
playlistSelector = "div" @: [hasClass "bloccoPlaylist"]
playlistUrlScraper :: Scraper String String
playlistUrlScraper = attr "href" "a"
-- |The 'playlistUrlScraper' function defines the scraper that retrieves all
-- audiobooks url cointains in the playlist page.
playlistsUrlScraper :: Scraper String [String]
playlistsUrlScraper = chroots playlistSelector playlistUrlScraper
playlistPageNumberSelector :: Selector
playlistPageNumberSelector = "ul" @: [hasClass "pagination"]
// ("li" @: [notP (hasClass "archivePaginatation")])
playlistPageNumberScraper :: Scraper String String
playlistPageNumberScraper = text "a"
-- |Audiobooks are listed on multiple pages, 'playlistPageNumbersScraper'
-- defines the scraper that retrieves the identifier of all pages that
-- contain some audiobooks.
playlistPageNumbersScraper :: Scraper String [String ]
playlistPageNumbersScraper = chroots playlistPageNumberSelector playlistPageNumberScraper

103
src/Types.hs Normal file
View File

@ -0,0 +1,103 @@
{-|
Module : Types
Description : Defines data types
Copyright : (c) Raffaele Mignone 2021
License : GPL-3
Maintainer : git@norangeb.it
This module defines the types used by the program and the functions to access
their fields.
-}
{-# LANGUAGE OverloadedStrings #-}
module Types
( Audiobook(Audiobook)
, Episode(Episode)
, Podcast(Podcast)
, generatePodcast
, episodeUrl
, episodeTitle
, episodeTrackUrl
, episodeCoverUrl
, episodeDuration
, audiobookTitle
, audiobookDescription
, audiobookCoverUrl
, audiobookEpisodes
, audiobook
, baseUrl
, pubDay
, generatePodcastFileName
) where
import Data.Char ( toLower )
import Data.Time.Calendar ( Day )
import Text.Mustache
import Text.Mustache.Types ( Pair )
-- | The 'Episode' data type represents an episode of the podcast.
-- 'Episode' is an istance of 'ToMustache' typeclass.
data Episode = Episode { episodeUrl :: String
, episodeTitle :: String
, episodeTrackUrl :: String
, episodeCoverUrl :: String
, episodeDuration :: String
}
deriving (Show)
-- | The 'Audiobook' data type represents the audiobook of the podcast.
-- 'Audiobook' is an istance of 'ToMustache' typeclass.
data Audiobook = Audiobook { audiobookTitle :: String
, audiobookDescription :: String
, audiobookCoverUrl :: String
, audiobookEpisodes :: [Episode]
}
deriving (Show)
-- | The 'Podcast' data type represents the podcast.
-- 'Podcast' is an istance of 'ToMustache' typeclass.
data Podcast = Podcast { audiobook :: Audiobook
, baseUrl :: String
, pubDay :: Day
}
deriving (Show)
toPairList :: Audiobook -> [Pair]
toPairList audiobook =
[ "audiobook-title" ~> audiobookTitle audiobook
, "audiobook-cover-url" ~> audiobookCoverUrl audiobook
, "audiobook-cover-title" ~> audiobookTitle audiobook
, "audiobook-description" ~> audiobookDescription audiobook
, "episodes" ~> audiobookEpisodes audiobook
]
instance ToMustache Episode where
toMustache episode = object
[ "episode-title" ~> episodeTitle episode
, "episode-url" ~> episodeUrl episode
, "episode-track-url" ~> episodeTrackUrl episode
, "episode-duration" ~> episodeDuration episode
]
instance ToMustache Audiobook where
toMustache = object . toPairList
instance ToMustache Podcast where
toMustache podcast = object $ [
"base-url" ~> baseUrl podcast,
"pub-day" ~> show (pubDay podcast)
] ++ (toPairList (audiobook podcast))
generatePodcast :: Day -> String -> Audiobook -> Podcast
generatePodcast day url audiobook = Podcast audiobook url day
generatePodcastFileName :: Podcast -> String
generatePodcastFileName (Podcast audiobook _ _) = title'' ++ ".xml"
where
replace :: Eq a => a -> a -> [a] -> [a]
replace a b = map $ \c -> if c == a then b else c
title = audiobookTitle audiobook
title' = map toLower title
title'' = replace ' ' '-' title'