diff --git a/data/templates/podcast.mustache b/data/templates/podcast.mustache index e5ff11f..9b79108 100644 --- a/data/templates/podcast.mustache +++ b/data/templates/podcast.mustache @@ -10,8 +10,8 @@ {{audiobook-description}} it-it {{pub-day}} - Ad Alta Voce - Rai Radio 3 - {{audiobook-description}} + {{audiobook-author}} + {{audiobook-summary}} Ad Alta Voce No diff --git a/src/Command/All.hs b/src/Command/All.hs index bf6d816..052549b 100644 --- a/src/Command/All.hs +++ b/src/Command/All.hs @@ -14,33 +14,36 @@ module Command.All(generateAll) where import Control.Monad ( join ) import Data.Maybe ( catMaybes ) import Text.HTML.Scalpel ( scrapeURL, URL ) -import Command.Single ( single ) +import Command.Single ( singleWithAuthor ) import Scraper.Playlist - ( playlistPageNumbersScraper, playlistsUrlScraper ) + ( playlistPageNumbersScraper, playlistInfosScraper ) baseUrl = "https://www.raiplayradio.it" playlistBaseUrl = "https://www.raiplayradio.it/programmi/adaltavoce/archivio/audiolibri/tutte/" -scrapeAudiobooksUrl :: IO (Maybe [URL]) +scrapeAudiobooksUrl :: IO (Maybe [(URL, String)]) scrapeAudiobooksUrl = do pageNumbers <- scrapeURL playlistBaseUrl playlistPageNumbersScraper case scrapePlaylistPages pageNumbers of Nothing -> return Nothing Just urls -> Just <$> urls -scrapePlaylistPages :: Maybe [String] -> Maybe (IO [URL]) +scrapePlaylistPages :: Maybe [String] -> Maybe (IO [(URL, String)]) scrapePlaylistPages pageNumbers = do pageNumbers' <- pageNumbers let playlistUrls = map (playlistBaseUrl ++) pageNumbers' - audiobookUrls = mapM (`scrapeURL` playlistsUrlScraper) playlistUrls - flatAudiobookUrls = join . catMaybes <$> audiobookUrls - return $ map (baseUrl ++) <$> flatAudiobookUrls + audiobookInfos = mapM (`scrapeURL` playlistInfosScraper) playlistUrls + flatAudiobookInfos = join . catMaybes <$> audiobookInfos + return $ map (\(u, a) -> (concatBaseUrl u, a)) <$> flatAudiobookInfos + where + concatBaseUrl :: URL -> URL + concatBaseUrl = (++) baseUrl generateAll :: String -> IO () generateAll outdir = do - urls <- scrapeAudiobooksUrl - case urls of + infos <- scrapeAudiobooksUrl + case infos of Nothing -> putStrLn "Error" - Just urls' -> do - mapM_ (`single` outdir) urls' + Just infos' -> do + mapM_ (\(url, author) -> singleWithAuthor url outdir author) infos' putStrLn "All done.\nEnjoy your books!" diff --git a/src/Command/Single.hs b/src/Command/Single.hs index 2ee10a6..c1975d0 100644 --- a/src/Command/Single.hs +++ b/src/Command/Single.hs @@ -9,7 +9,7 @@ This module exposes the command that generates podcast feed for an audiobooks in Ad Alta Voce library. -} -module Command.Single(single) where +module Command.Single(single, singleWithAuthor) where import Data.Text (unpack) import Data.Time.Clock ( UTCTime(utctDay), getCurrentTime ) @@ -46,13 +46,21 @@ writePodcastTemplate (Right template) (Just podcast) outdir = do fileName = outdir ++ "/" ++ generatePodcastFileName podcast output = title ++ " done!" -single :: String -> String -> IO () -single url outdir = do +single' :: Maybe Audiobook -> String -> String -> IO () +single' audiobook url outdir = do day <- utctDay <$> getCurrentTime - audiobook <- scrapeAudiobook url compiled <- compilePodcastTemplate let podcast = generatePodcast day url <$> audiobook - writePodcastTemplate compiled podcast outdir - + +single :: String -> String -> IO () +single url outdir = do + audiobook <- scrapeAudiobook url + single' audiobook url outdir + +singleWithAuthor :: String -> String -> String -> IO () +singleWithAuthor url outdir author = do + audiobook <- scrapeAudiobook url + let abookDescription = (`toAudiobookWithAuthor` author) <$> audiobook + single' abookDescription url outdir diff --git a/src/Scraper/Audiobook.hs b/src/Scraper/Audiobook.hs index a00ccbe..efd866d 100644 --- a/src/Scraper/Audiobook.hs +++ b/src/Scraper/Audiobook.hs @@ -16,7 +16,7 @@ An example of a web page that can be scraped is available at the following module Scraper.Audiobook(audiobookScraper) where import Text.HTML.Scalpel -import Types ( Audiobook(Audiobook), Episode(Episode) ) +import Types audiobookHeaderSelector :: Selector audiobookHeaderSelector = "div" @: [hasClass "descriptionProgramma"] @@ -75,4 +75,4 @@ audiobookScraper = do description <- audiobookDescriptionScraper coverUrl <- audiobookCoverUrlScraper episodes <- episodesListScraper - return $ Audiobook title description coverUrl episodes + return $ makeAudiobook title description coverUrl episodes diff --git a/src/Scraper/Playlist.hs b/src/Scraper/Playlist.hs index 8035a34..749d655 100644 --- a/src/Scraper/Playlist.hs +++ b/src/Scraper/Playlist.hs @@ -14,8 +14,8 @@ An example of a web page that can be scraped is available at the following {-# LANGUAGE OverloadedStrings #-} module Scraper.Playlist - ( playlistsUrlScraper - , playlistPageNumbersScraper + ( playlistPageNumbersScraper + , playlistInfosScraper ) where import Text.HTML.Scalpel @@ -26,10 +26,22 @@ playlistSelector = "div" @: [hasClass "bloccoPlaylist"] playlistUrlScraper :: Scraper String String playlistUrlScraper = attr "href" "a" +playlistAuthorSelector :: Selector +playlistAuthorSelector = "span" @: [hasClass "canale"] + +playlistAuthorScraper :: Scraper String String +playlistAuthorScraper = text playlistAuthorSelector + +playlistInfoScraper :: Scraper String (String, String) +playlistInfoScraper = do + url <- playlistUrlScraper + author <- playlistAuthorScraper + return (url, author) + -- |The 'playlistUrlScraper' function defines the scraper that retrieves all --- audiobooks url cointains in the playlist page. -playlistsUrlScraper :: Scraper String [String] -playlistsUrlScraper = chroots playlistSelector playlistUrlScraper +-- audiobooks url and author cointains in the playlist page. +playlistInfosScraper :: Scraper String [(String, String)] +playlistInfosScraper = chroots playlistSelector playlistInfoScraper playlistPageNumberSelector :: Selector playlistPageNumberSelector = "ul" @: [hasClass "pagination"] diff --git a/src/Types.hs b/src/Types.hs index 1490386..559a5ae 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -12,9 +12,12 @@ their fields. {-# LANGUAGE OverloadedStrings #-} module Types - ( Audiobook(Audiobook) + ( Audiobook , Episode(Episode) , Podcast(Podcast) + , makeAudiobook + , makeAudiobookWithAuthor + , toAudiobookWithAuthor , generatePodcast , episodeUrl , episodeTitle @@ -25,6 +28,7 @@ module Types , audiobookDescription , audiobookCoverUrl , audiobookEpisodes + , audiobookAuthor , audiobook , baseUrl , pubDay @@ -49,12 +53,23 @@ data Episode = Episode { episodeUrl :: String -- | The 'Audiobook' data type represents the audiobook of the podcast. -- 'Audiobook' is an istance of 'ToMustache' typeclass. data Audiobook = Audiobook { audiobookTitle :: String + , audiobookAuthor :: String , audiobookDescription :: String , audiobookCoverUrl :: String , audiobookEpisodes :: [Episode] + } deriving (Show) +makeAudiobook :: String -> String -> String -> [Episode] -> Audiobook +makeAudiobook title = Audiobook title "Ad Alta Voce - Rai Radio 3" + +makeAudiobookWithAuthor :: String -> String -> String -> String -> [Episode] -> Audiobook +makeAudiobookWithAuthor title author = Audiobook title (author ++ " - Ad Alta Voce") + +toAudiobookWithAuthor :: Audiobook -> String -> Audiobook +toAudiobookWithAuthor (Audiobook title _ description coverUrl episodes) author = + makeAudiobookWithAuthor title author description coverUrl episodes -- | The 'Podcast' data type represents the podcast. -- 'Podcast' is an istance of 'ToMustache' typeclass. @@ -70,6 +85,8 @@ toPairList audiobook = , "audiobook-cover-url" ~> audiobookCoverUrl audiobook , "audiobook-cover-title" ~> audiobookTitle audiobook , "audiobook-description" ~> audiobookDescription audiobook + , "audiobook-summary" ~> audiobookDescription audiobook + , "audiobook-author" ~> audiobookAuthor audiobook , "episodes" ~> audiobookEpisodes audiobook ]