From f9534bd23aead6bd6b7590bb9221f3d7df5cdb63 Mon Sep 17 00:00:00 2001 From: freearhey <7253922+freearhey@users.noreply.github.com> Date: Sat, 1 Mar 2025 21:58:49 +0300 Subject: [PATCH 1/5] Create __data__ --- sites/tvkaista.org/__data__/content_1.html | 1557 +++++++++++++++++++ sites/tvkaista.org/__data__/content_2.html | 1396 +++++++++++++++++ sites/tvkaista.org/__data__/no_content.html | 249 +++ 3 files changed, 3202 insertions(+) create mode 100644 sites/tvkaista.org/__data__/content_1.html create mode 100644 sites/tvkaista.org/__data__/content_2.html create mode 100644 sites/tvkaista.org/__data__/no_content.html diff --git a/sites/tvkaista.org/__data__/content_1.html b/sites/tvkaista.org/__data__/content_1.html new file mode 100644 index 00000000..6eeda761 --- /dev/null +++ b/sites/tvkaista.org/__data__/content_1.html @@ -0,0 +1,1557 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TVkaista - Yle TV1 (2025-03-01) + + +
+
+ +
+
+ +
+
+
+
+

Yle TV1

+
+ +
+ +
+
+ +
+
+
+ +
+ +
+
+ +
+ + + +
+
+ +
+ +
+ +
+ + + +
+
+
+ + +
+
+
+ +
+
+ + + + +
+
+ + + + + + + + + + + + + + + + + diff --git a/sites/tvkaista.org/__data__/content_2.html b/sites/tvkaista.org/__data__/content_2.html new file mode 100644 index 00000000..6da3c6a9 --- /dev/null +++ b/sites/tvkaista.org/__data__/content_2.html @@ -0,0 +1,1396 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TVkaista - Yle TV1 (2025-03-03) + + +
+
+ +
+
+ +
+
+
+
+

Yle TV1

+
+ +
+ +
+
+ +
+
+
+ +
+ +
+
+ +
+ + + +
+
+ +
+ +
+ +
+ + +
+
+ + + + +
+
+ + + + + + + + + + + + + + + diff --git a/sites/tvkaista.org/__data__/no_content.html b/sites/tvkaista.org/__data__/no_content.html new file mode 100644 index 00000000..7d1808b7 --- /dev/null +++ b/sites/tvkaista.org/__data__/no_content.html @@ -0,0 +1,249 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TVkaista - 404 Not Found + + +
+
+ +
+
+ + + +
+
+

404 Not Found

+
+
+ + + + + + + + + + + + + + + From 2235f0c2cd6332166a8b07f58c15c4b48660139f Mon Sep 17 00:00:00 2001 From: freearhey <7253922+freearhey@users.noreply.github.com> Date: Sat, 1 Mar 2025 21:59:14 +0300 Subject: [PATCH 2/5] Create tvkaista.org.test.js --- sites/tvkaista.org/tvkaista.org.test.js | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 sites/tvkaista.org/tvkaista.org.test.js diff --git a/sites/tvkaista.org/tvkaista.org.test.js b/sites/tvkaista.org/tvkaista.org.test.js new file mode 100644 index 00000000..2e829f17 --- /dev/null +++ b/sites/tvkaista.org/tvkaista.org.test.js @@ -0,0 +1,93 @@ +const { parser, url } = require('./tvkaista.org.config.js') +const fs = require('fs') +const path = require('path') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +let date = dayjs.utc('2025-03-01', 'YYYY-MM-DD').startOf('d') +const channel = { site_id: 'yle-tv1' } + +it('can generate valid url', () => { + expect(url({ channel, date })).toBe('https://www.tvkaista.org/yle-tv1/2025-03-01') +}) + +it('can parse response for today', () => { + const content = fs.readFileSync(path.resolve(__dirname, '__data__/content_1.html')) + + let results = parser({ content, date }) + results = results.map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + + return p + }) + + expect(results.length).toBe(45) + expect(results[0]).toMatchObject({ + title: 'Alice & Jack', + description: + 'Kausi 1, 2/6. Säröjä. Jack on onnellisesti naimisissa, ja on pienen tyttären isä. Yllättävä puhelu Alicelta suistaa Jackin elämän kuitenkin pois raiteiltaan. Tunteiden myllerryksessä Jack suostuu tapaamaan Alicen salassa vaimoltaa', + season: 1, + episode: 2, + rating: { + system: 'VET', + value: '12' + }, + categories: ['Sarja'], + start: '2025-02-28T21:20:00.000Z', + stop: '2025-02-28T22:04:00.000Z' + }) +}) + +it('can parse response for next day', () => { + date = dayjs.utc('2025-03-03', 'YYYY-MM-DD').startOf('d') + const content = fs.readFileSync(path.resolve(__dirname, '__data__/content_2.html')) + + let results = parser({ content, date }) + results = results.map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + + return p + }) + + expect(results.length).toBe(39) + expect(results[0]).toMatchObject({ + title: 'Sodan silpoma elämä', + description: + 'Oleh Stahanov haavoittui vakavasti Itä-Ukrainan rintamalla. Miten elämä rakennetaan uudelleen, kun toipuminen vaatii selviytymistä niin fyysisistä vammoista kuin henkisestä taakastakin? Ohjaus: Viivi Berghem (Suomi 2024)', + start: '2025-03-02T21:05:00.000Z', + stop: '2025-03-02T22:02:00.000Z' + }) + expect(results[5]).toMatchObject({ + title: 'La Promesa - Salaisuuksien kartano', + description: + 'Kausi 1, 3/122. Päätöksen vaikeus. Jimena pääsee lennolle Manuelin kanssa tämän tunnustettua ensin lentokilpailuun osallistumisensa. Johtaako lento näiden kahden lähentymiseen? Onko mysteerikokin henkilöllisy', + season: 1, + episode: 3, + categories: ['Sarja'], + rating: { + system: 'VET', + value: '12' + }, + start: '2025-03-03T08:00:00.000Z', + stop: '2025-03-03T08:52:00.000Z' + }) + expect(results[38]).toMatchObject({ + title: 'Unelma työstä', + description: + 'Noin miljoona suomalaista on joko työttömänä tai työskentelee osa- tai määräaikaisessa työsuhteessa. Dokumentissa tarinansa kertoo entinen työministeri, loppuun palanut oikeustieteen tohtori, akateeminen pätkätyöläinen ja nuori teatte', + start: '2025-03-03T21:15:00.000Z', + stop: '2025-03-03T22:11:00.000Z' + }) +}) + +it('can handle empty guide', () => { + const content = fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html')) + const results = parser({ content, date }) + + expect(results).toMatchObject([]) +}) From 5f48b22a28e63d60ae76044d49759bc20b83ef47 Mon Sep 17 00:00:00 2001 From: freearhey <7253922+freearhey@users.noreply.github.com> Date: Sat, 1 Mar 2025 22:00:47 +0300 Subject: [PATCH 3/5] Create tvkaista.org.config.js --- sites/tvkaista.org/tvkaista.org.config.js | 169 ++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 sites/tvkaista.org/tvkaista.org.config.js diff --git a/sites/tvkaista.org/tvkaista.org.config.js b/sites/tvkaista.org/tvkaista.org.config.js new file mode 100644 index 00000000..6647f0bc --- /dev/null +++ b/sites/tvkaista.org/tvkaista.org.config.js @@ -0,0 +1,169 @@ +const doFetch = require('@ntlab/sfetch') +const cheerio = require('cheerio') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const timezone = require('dayjs/plugin/timezone') +const customParseFormat = require('dayjs/plugin/customParseFormat') + +dayjs.extend(utc) +dayjs.extend(timezone) +dayjs.extend(customParseFormat) + +const tz = 'Europe/Helsinki' + +module.exports = { + site: 'tvkaista.org', + days: 2, + url({ channel, date }) { + return `https://www.tvkaista.org/${channel.site_id}/${date.format('YYYY-MM-DD')}` + }, + parser({ content, date }) { + let programs = [] + const items = parseItems(content) + + items.forEach(item => { + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + + let start = parseStart($item, date) + let stop = parseStop($item, start) + + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } else if (stop.isBefore(start)) { + stop = stop.add(1, 'd') + date = date.add(1, 'd') + } + } else { + if (start.hour() > 18) { + start = start.subtract(1, 'd') + date = date.subtract(1, 'd') + } + } + + programs.push({ + title: parseTitle($item), + description: parseDescription($item), + season: parseSeason($item), + episode: parseEpisode($item), + categories: parseCategories($item), + rating: parseRating($item), + start, + stop + }) + }) + + return programs + }, + async channels() { + let channels = [] + + const queue = ['https://www.tvkaista.org/', 'https://www.tvkaista.org/maksukanavat/'] + await doFetch(queue, (url, res) => { + const $ = cheerio.load(res) + $('body > main > div > div.row > div').each((i, el) => { + const link = $(el).find('div > div > div > div.col-auto > a') + const img = link.find('img.channel-logo') + const name = link.text().trim() || img.attr('alt') + const [, site_id] = link.attr('href').split('/') + + channels.push({ + lang: 'fi', + name, + site_id + }) + }) + }) + + return channels + } +} + +function parseRating($item) { + let rating = $item( + 'div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(3) > img' + ).attr('alt') + + return rating + ? { + system: 'VET', + value: rating.replace(/\(|\)/g, '') + } + : null +} + +function parseCategories($item) { + return $item('div.collapse > .badge') + .map((i, el) => $item(el).text().trim()) + .get() +} + +function parseSeason($item) { + const string = $item( + 'div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(2)' + ) + .text() + .trim() + if (!string) return null + + let [, season] = string.match(/S(\d{2})/) || [null, null] + + return season ? parseInt(season) : null +} + +function parseEpisode($item) { + const string = $item( + 'div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(2)' + ) + .text() + .trim() + if (!string) return null + + let [, episode] = string.match(/E(\d{2})/) || [null, null] + + return episode ? parseInt(episode) : null +} + +function parseStart($item, date) { + const [time] = $item('div.d-flex.flex-row.bd-highlight > div.bd-highlight.me-2') + .text() + .trim() + .split('-') + + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', tz) +} + +function parseStop($item, date) { + const [, time] = $item('div.d-flex.flex-row.bd-highlight > div.bd-highlight.me-2') + .text() + .trim() + .split('-') + + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', tz) +} + +function parseTitle($item) { + return $item('div.d-flex.flex-row.bd-highlight > div.bd-highlight.flex-fill > span:nth-child(1)') + .text() + .trim() +} + +function parseDescription($item) { + return ( + $item('div.collapse > p') + .text() + .replace(/\n/g, '') + .replace(/\s\s+/g, ' ') + // eslint-disable-next-line no-irregular-whitespace + .replace(/ /g, ' ') + .trim() + ) +} + +function parseItems(content) { + const $ = cheerio.load(content) + + return $('ul.list-group > li').toArray() +} From 0bb74b4c1bfd6c68fbaecae51138691f738b4a7b Mon Sep 17 00:00:00 2001 From: freearhey <7253922+freearhey@users.noreply.github.com> Date: Sat, 1 Mar 2025 22:01:08 +0300 Subject: [PATCH 4/5] Create tvkaista.org.channels.xml --- sites/tvkaista.org/tvkaista.org.channels.xml | 152 +++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 sites/tvkaista.org/tvkaista.org.channels.xml diff --git a/sites/tvkaista.org/tvkaista.org.channels.xml b/sites/tvkaista.org/tvkaista.org.channels.xml new file mode 100644 index 00000000..8a17dc3c --- /dev/null +++ b/sites/tvkaista.org/tvkaista.org.channels.xml @@ -0,0 +1,152 @@ + + + 3sat + 24 Horas + Alfa + Al Jazeera + Al Jazeera English + Animal Planet + ARTE + AVA + BBC Brit + BBC World News + Bloomberg + Boomerang + C More First + C More Hits + C More Juniori + C More Max + C More Max 2 + C More Series + C More Sport 1 + C More Sport 2 + C More Stars + Cartoon Network + Club MTV + CNBC + CNN + Das Erste + Deutsche Welle (English) + Discovery Channel + Discovery Science + Disney Channel + DR1 + Elisa Viihde Sport 1 + Elisa Viihde Sport 2 + Elisa Viihde Sport 3 + Elisa Viihde Sport 4 + English Club TV + ETV + Eurochannel + Euronews + Eurosport 1 + Eurosport 2 + Extreme Sports Channel + FashionTV + Fight Sports + Filmbox Arthouse + Food Network + France24 + France 2 + France 5 + France 24 Français + Frii + FashionTV HD + FashionTV UHD + Fuel TV + Ginx eSports TV + Godare + Hero + Himlen TV7 + History + History 2 + Horse TV + iConcerts + Investigation Discovery + Jim + Kanal 10 Sverige + Kunskapskanalen + Kutonen + Liiga 1 + Liiga 2 + Liiga 3 + Liiga 4 + Liiga 5 + Liiga 6 + Liiga 7 + Liiga TV + Liiga UHD + Liv + Love Nature 4K + Mezzo + Mezzo Live + Motorvision TV + MTV + MTV3 + MTV 00s + MTV 80s + MTV 90s + MTV Hits + MTV Live + Nat Geo Wild + National Geographic + Nautical Channel + Nelonen + NHK World Japan + Nick Jr. + NRK1 + NRK2 + One Way TV + OnniTV + Rai 1 + RTL + SF-kanalen + Sky News + Star Channel + Stingray Classica + Sub + SVT1 + SVT2 + SVT Barn/SVT24 + TapahtumaTV Eveo + TLC + TotoTV + Travel Channel + TRT World + TV3 Sverige + TV4 Sverige + TV5 + TV5Monde + TV6 Sverige + Taivas TV7 + TV8 Sverige + TV10 Sverige + TVE Internacional + V film action + V film family + V film hits + V film premiere + V sport 1 + V sport 1 Suomi + V sport 2 Suomi + V sport Football + V sport Golf + V sport live 1 + V sport live 2 + V sport live 3 + V sport live 4 + V sport live 5 + V sport Motor + V sport+ Suomi + V sport Premium + V Sport Ultra HD + V sport Vinter + Viasat Explore + Viasat History + Viasat Nature + Wild TV + Yle Teema Fem + Yle TV1 + Yle TV2 + ZDF + From b33260b20f49b1b06191b13333845bc3e2c3823a Mon Sep 17 00:00:00 2001 From: freearhey <7253922+freearhey@users.noreply.github.com> Date: Sat, 1 Mar 2025 22:01:20 +0300 Subject: [PATCH 5/5] Create readme.md --- sites/tvkaista.org/readme.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 sites/tvkaista.org/readme.md diff --git a/sites/tvkaista.org/readme.md b/sites/tvkaista.org/readme.md new file mode 100644 index 00000000..fef9d5ff --- /dev/null +++ b/sites/tvkaista.org/readme.md @@ -0,0 +1,21 @@ +# tvkaista.org + +https://www.tvkaista.org/ + +### Download the guide + +```sh +npm run grab --- --site=tvkaista.org +``` + +### Update channel list + +```sh +npm run channels:parse --- --config=./sites/tvkaista.org/tvkaista.org.config.js --output=./sites/tvkaista.org/tvkaista.org.channels.xml +``` + +### Test + +```sh +npm test --- tvkaista.org +```