From d97b0cd62a977e78f169d990fcd1bd3e2d34cf44 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Thu, 25 Nov 2021 00:02:42 +0300 Subject: [PATCH 1/3] Install mockdate package --- package-lock.json | 11 +++++++++++ package.json | 1 + 2 files changed, 12 insertions(+) diff --git a/package-lock.json b/package-lock.json index fe4a772f..db302306 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,6 +21,7 @@ "jsdom": "^16.5.0", "lodash": "^4.17.21", "markdown-include": "^0.4.3", + "mockdate": "^3.0.5", "parse-duration": "^1.0.0", "pdf-parse": "^1.1.1", "srcset": "^4.0.0", @@ -3554,6 +3555,11 @@ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" }, + "node_modules/mockdate": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/mockdate/-/mockdate-3.0.5.tgz", + "integrity": "sha512-iniQP4rj1FhBdBYS/+eQv7j1tadJ9lJtdzgOpvsOHng/GbcDh2Fhdeq+ZRldrPYdXvCyfFUmFeEwEGXZB5I/AQ==" + }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -7432,6 +7438,11 @@ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" }, + "mockdate": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/mockdate/-/mockdate-3.0.5.tgz", + "integrity": "sha512-iniQP4rj1FhBdBYS/+eQv7j1tadJ9lJtdzgOpvsOHng/GbcDh2Fhdeq+ZRldrPYdXvCyfFUmFeEwEGXZB5I/AQ==" + }, "ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", diff --git a/package.json b/package.json index 9fc58cd4..11baf53e 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "jsdom": "^16.5.0", "lodash": "^4.17.21", "markdown-include": "^0.4.3", + "mockdate": "^3.0.5", "parse-duration": "^1.0.0", "pdf-parse": "^1.1.1", "srcset": "^4.0.0", From 27533d399beea38dd93383d77f404a31757a2ff8 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Thu, 25 Nov 2021 00:02:48 +0300 Subject: [PATCH 2/3] Create programtv.onet.pl.test.js --- .../programtv.onet.pl.test.js | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 sites/programtv.onet.pl/programtv.onet.pl.test.js diff --git a/sites/programtv.onet.pl/programtv.onet.pl.test.js b/sites/programtv.onet.pl/programtv.onet.pl.test.js new file mode 100644 index 00000000..6cf4803b --- /dev/null +++ b/sites/programtv.onet.pl/programtv.onet.pl.test.js @@ -0,0 +1,81 @@ +// npx epg-grabber --config=sites/programtv.onet.pl/programtv.onet.pl.config.js --channels=sites/programtv.onet.pl/programtv.onet.pl_pl.channels.xml --output=.gh-pages/guides/pl/programtv.onet.pl.epg.xml --days=2 + +const MockDate = require('mockdate') +const { parser, url, logo } = require('./programtv.onet.pl.config.js') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +const date = dayjs.utc('2021-11-24', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: '13th-street-250', + xmltv_id: '13thStreetDeutschland.us' +} +const content = `
13th Street
  • 03:20
    Law & Order, odc. 15: Letzte Worte Krimiserie

    Bei einer Reality-TV-Show stirbt einer der Teilnehmer. Zunächst tappen Briscoe (Jerry Orbach) und Green (Jesse L....

  • 23:30
    Navy CIS, odc. 1: New Orleans Krimiserie

    Der Abgeordnete Dan McLane, ein ehemaliger Vorgesetzter von Gibbs, wird in New Orleans ermordet. In den 90er Jahren...

  • 01:00
    Navy CIS: L.A, odc. 13: High Society Krimiserie

    Die Zahl der Drogentoten ist gestiegen. Das Team des NCIS glaubt, dass sich Terroristen durch den zunehmenden...

` + +it('can generate valid url', () => { + MockDate.set(new Date('2021-11-24')) + expect(url({ channel, date })).toBe( + 'https://programtv.onet.pl/program-tv/13th-street-250?dzien=0' + ) + MockDate.reset() +}) + +it('can generate valid url for next day', () => { + MockDate.set(new Date('2021-11-25')) + expect(url({ channel, date })).toBe( + 'https://programtv.onet.pl/program-tv/13th-street-250?dzien=1' + ) + MockDate.reset() +}) + +it('can generate valid logo url', () => { + expect(logo({ content })).toBe( + 'https://ocdn.eu/ptv2-images-transforms/1/zB4kr1sb2dvLW1pZ3JhdGVkLzEzdGgtc3RyZWV0LnBuZ5KVAmQAwsOVAgAowsM' + ) +}) + +it('can parse response', () => { + const result = parser({ content, date }).map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + return p + }) + + expect(result).toMatchObject([ + { + start: '2021-11-24T02:20:00.000Z', + stop: '2021-11-24T22:30:00.000Z', + title: `Law & Order, odc. 15: Letzte Worte`, + category: 'Krimiserie', + description: `Bei einer Reality-TV-Show stirbt einer der Teilnehmer. Zunächst tappen Briscoe (Jerry Orbach) und Green (Jesse L....` + }, + { + start: '2021-11-24T22:30:00.000Z', + stop: '2021-11-25T00:00:00.000Z', + title: `Navy CIS, odc. 1: New Orleans`, + category: 'Krimiserie', + description: + 'Der Abgeordnete Dan McLane, ein ehemaliger Vorgesetzter von Gibbs, wird in New Orleans ermordet. In den 90er Jahren...' + }, + { + start: '2021-11-25T00:00:00.000Z', + stop: '2021-11-25T01:00:00.000Z', + title: `Navy CIS: L.A, odc. 13: High Society`, + category: 'Krimiserie', + description: + 'Die Zahl der Drogentoten ist gestiegen. Das Team des NCIS glaubt, dass sich Terroristen durch den zunehmenden...' + } + ]) +}) + +it('can handle empty guide', () => { + const result = parser({ + date, + channel, + content: `` + }) + expect(result).toMatchObject([]) +}) From 13be2f2aab01860bb4b2665e43b5c5c5c700ca3a Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Thu, 25 Nov 2021 00:03:02 +0300 Subject: [PATCH 3/3] Update programtv.onet.pl.config.js --- .../programtv.onet.pl.config.js | 65 +++++++++---------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/sites/programtv.onet.pl/programtv.onet.pl.config.js b/sites/programtv.onet.pl/programtv.onet.pl.config.js index 38d8559a..c851ef8c 100644 --- a/sites/programtv.onet.pl/programtv.onet.pl.config.js +++ b/sites/programtv.onet.pl/programtv.onet.pl.config.js @@ -1,5 +1,4 @@ -const jsdom = require('jsdom') -const { JSDOM } = jsdom +const cheerio = require('cheerio') const dayjs = require('dayjs') const utc = require('dayjs/plugin/utc') const timezone = require('dayjs/plugin/timezone') @@ -13,36 +12,36 @@ module.exports = { delay: 5000, site: 'programtv.onet.pl', url: function ({ date, channel }) { - const today = dayjs().utc().startOf('d') - const day = date.diff(today, 'd') + const currDate = dayjs.utc().startOf('d') + const day = currDate.diff(date, 'd') + return `https://programtv.onet.pl/program-tv/${channel.site_id}?dzien=${day}` }, logo: function ({ content }) { - const dom = new JSDOM(content) - const img = dom.window.document.querySelector('#channelTV > section > header > span > img') + const $ = cheerio.load(content) + const imgSrc = $('#channelTV > section > header > span > img').attr('src') - return img ? 'https:' + img.src : null + return imgSrc ? `https:${imgSrc}` : null }, parser: function ({ content, date }) { - let PM = false const programs = [] const items = parseItems(content) items.forEach(item => { - const title = parseTitle(item) - const description = parseDescription(item) - const category = parseCategory(item) - let start = parseStart(item, date) - if (start.hour() > 11) PM = true - if (start.hour() < 12 && PM) start = start.add(1, 'd') - const stop = start.add(1, 'h') - if (programs.length) { - programs[programs.length - 1].stop = start + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + let start = parseStart($item, date) + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } + prev.stop = start } - + const stop = start.add(1, 'h') programs.push({ - title, - description, - category, + title: parseTitle($item), + description: parseDescription($item), + category: parseCategory($item), start, stop }) @@ -52,27 +51,27 @@ module.exports = { } } -function parseStart(item, date) { - let time = (item.querySelector('.hours > .hour') || { textContent: '' }).textContent - time = `${date.format('MM/DD/YYYY')} ${time}` +function parseStart($item, date) { + const timeString = $item('.hours > .hour').text() + const dateString = `${date.format('MM/DD/YYYY')} ${timeString}` - return dayjs.tz(time, 'MM/DD/YYYY HH:mm', 'Europe/Warsaw') + return dayjs.tz(dateString, 'MM/DD/YYYY HH:mm', 'Europe/Warsaw') } -function parseCategory(item) { - return (item.querySelector('.titles > .type') || { textContent: '' }).textContent +function parseCategory($item) { + return $item('.titles > .type').text() } -function parseDescription(item) { - return (item.querySelector('.titles > p') || { textContent: '' }).textContent +function parseDescription($item) { + return $item('.titles > p').text().trim() } -function parseTitle(item) { - return (item.querySelector('.titles > a') || { textContent: '' }).textContent +function parseTitle($item) { + return $item('.titles > a').text().trim() } function parseItems(content) { - const dom = new JSDOM(content) + const $ = cheerio.load(content) - return dom.window.document.querySelectorAll('#channelTV > section > div.emissions > ul > li') + return $('#channelTV > section > div.emissions > ul > li').toArray() }