From 5f48b22a28e63d60ae76044d49759bc20b83ef47 Mon Sep 17 00:00:00 2001 From: freearhey <7253922+freearhey@users.noreply.github.com> Date: Sat, 1 Mar 2025 22:00:47 +0300 Subject: [PATCH] Create tvkaista.org.config.js --- sites/tvkaista.org/tvkaista.org.config.js | 169 ++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 sites/tvkaista.org/tvkaista.org.config.js diff --git a/sites/tvkaista.org/tvkaista.org.config.js b/sites/tvkaista.org/tvkaista.org.config.js new file mode 100644 index 00000000..6647f0bc --- /dev/null +++ b/sites/tvkaista.org/tvkaista.org.config.js @@ -0,0 +1,169 @@ +const doFetch = require('@ntlab/sfetch') +const cheerio = require('cheerio') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const timezone = require('dayjs/plugin/timezone') +const customParseFormat = require('dayjs/plugin/customParseFormat') + +dayjs.extend(utc) +dayjs.extend(timezone) +dayjs.extend(customParseFormat) + +const tz = 'Europe/Helsinki' + +module.exports = { + site: 'tvkaista.org', + days: 2, + url({ channel, date }) { + return `https://www.tvkaista.org/${channel.site_id}/${date.format('YYYY-MM-DD')}` + }, + parser({ content, date }) { + let programs = [] + const items = parseItems(content) + + items.forEach(item => { + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + + let start = parseStart($item, date) + let stop = parseStop($item, start) + + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } else if (stop.isBefore(start)) { + stop = stop.add(1, 'd') + date = date.add(1, 'd') + } + } else { + if (start.hour() > 18) { + start = start.subtract(1, 'd') + date = date.subtract(1, 'd') + } + } + + programs.push({ + title: parseTitle($item), + description: parseDescription($item), + season: parseSeason($item), + episode: parseEpisode($item), + categories: parseCategories($item), + rating: parseRating($item), + start, + stop + }) + }) + + return programs + }, + async channels() { + let channels = [] + + const queue = ['https://www.tvkaista.org/', 'https://www.tvkaista.org/maksukanavat/'] + await doFetch(queue, (url, res) => { + const $ = cheerio.load(res) + $('body > main > div > div.row > div').each((i, el) => { + const link = $(el).find('div > div > div > div.col-auto > a') + const img = link.find('img.channel-logo') + const name = link.text().trim() || img.attr('alt') + const [, site_id] = link.attr('href').split('/') + + channels.push({ + lang: 'fi', + name, + site_id + }) + }) + }) + + return channels + } +} + +function parseRating($item) { + let rating = $item( + 'div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(3) > img' + ).attr('alt') + + return rating + ? { + system: 'VET', + value: rating.replace(/\(|\)/g, '') + } + : null +} + +function parseCategories($item) { + return $item('div.collapse > .badge') + .map((i, el) => $item(el).text().trim()) + .get() +} + +function parseSeason($item) { + const string = $item( + 'div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(2)' + ) + .text() + .trim() + if (!string) return null + + let [, season] = string.match(/S(\d{2})/) || [null, null] + + return season ? parseInt(season) : null +} + +function parseEpisode($item) { + const string = $item( + 'div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(2)' + ) + .text() + .trim() + if (!string) return null + + let [, episode] = string.match(/E(\d{2})/) || [null, null] + + return episode ? parseInt(episode) : null +} + +function parseStart($item, date) { + const [time] = $item('div.d-flex.flex-row.bd-highlight > div.bd-highlight.me-2') + .text() + .trim() + .split('-') + + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', tz) +} + +function parseStop($item, date) { + const [, time] = $item('div.d-flex.flex-row.bd-highlight > div.bd-highlight.me-2') + .text() + .trim() + .split('-') + + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', tz) +} + +function parseTitle($item) { + return $item('div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(1)') + .text() + .trim() +} + +function parseDescription($item) { + return ( + $item('div.collapse > p') + .text() + .replace(/\n/g, '') + .replace(/\s\s+/g, ' ') + // eslint-disable-next-line no-irregular-whitespace + .replace(/ /g, ' ') + .trim() + ) +} + +function parseItems(content) { + const $ = cheerio.load(content) + + return $('ul.list-group > li').toArray() +}