diff --git a/package-lock.json b/package-lock.json index fe4a772f..db302306 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,6 +21,7 @@ "jsdom": "^16.5.0", "lodash": "^4.17.21", "markdown-include": "^0.4.3", + "mockdate": "^3.0.5", "parse-duration": "^1.0.0", "pdf-parse": "^1.1.1", "srcset": "^4.0.0", @@ -3554,6 +3555,11 @@ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" }, + "node_modules/mockdate": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/mockdate/-/mockdate-3.0.5.tgz", + "integrity": "sha512-iniQP4rj1FhBdBYS/+eQv7j1tadJ9lJtdzgOpvsOHng/GbcDh2Fhdeq+ZRldrPYdXvCyfFUmFeEwEGXZB5I/AQ==" + }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -7432,6 +7438,11 @@ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" }, + "mockdate": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/mockdate/-/mockdate-3.0.5.tgz", + "integrity": "sha512-iniQP4rj1FhBdBYS/+eQv7j1tadJ9lJtdzgOpvsOHng/GbcDh2Fhdeq+ZRldrPYdXvCyfFUmFeEwEGXZB5I/AQ==" + }, "ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", diff --git a/package.json b/package.json index 9fc58cd4..11baf53e 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "jsdom": "^16.5.0", "lodash": "^4.17.21", "markdown-include": "^0.4.3", + "mockdate": "^3.0.5", "parse-duration": "^1.0.0", "pdf-parse": "^1.1.1", "srcset": "^4.0.0", diff --git a/sites/programtv.onet.pl/programtv.onet.pl.config.js b/sites/programtv.onet.pl/programtv.onet.pl.config.js index 38d8559a..c851ef8c 100644 --- a/sites/programtv.onet.pl/programtv.onet.pl.config.js +++ b/sites/programtv.onet.pl/programtv.onet.pl.config.js @@ -1,5 +1,4 @@ -const jsdom = require('jsdom') -const { JSDOM } = jsdom +const cheerio = require('cheerio') const dayjs = require('dayjs') const utc = require('dayjs/plugin/utc') const timezone = require('dayjs/plugin/timezone') @@ -13,36 +12,36 @@ module.exports = { delay: 5000, site: 'programtv.onet.pl', url: function ({ date, channel }) { - const today = dayjs().utc().startOf('d') - const day = date.diff(today, 'd') + const currDate = dayjs.utc().startOf('d') + const day = currDate.diff(date, 'd') + return `https://programtv.onet.pl/program-tv/${channel.site_id}?dzien=${day}` }, logo: function ({ content }) { - const dom = new JSDOM(content) - const img = dom.window.document.querySelector('#channelTV > section > header > span > img') + const $ = cheerio.load(content) + const imgSrc = $('#channelTV > section > header > span > img').attr('src') - return img ? 'https:' + img.src : null + return imgSrc ? `https:${imgSrc}` : null }, parser: function ({ content, date }) { - let PM = false const programs = [] const items = parseItems(content) items.forEach(item => { - const title = parseTitle(item) - const description = parseDescription(item) - const category = parseCategory(item) - let start = parseStart(item, date) - if (start.hour() > 11) PM = true - if (start.hour() < 12 && PM) start = start.add(1, 'd') - const stop = start.add(1, 'h') - if (programs.length) { - programs[programs.length - 1].stop = start + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + let start = parseStart($item, date) + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } + prev.stop = start } - + const stop = start.add(1, 'h') programs.push({ - title, - description, - category, + title: parseTitle($item), + description: parseDescription($item), + category: parseCategory($item), start, stop }) @@ -52,27 +51,27 @@ module.exports = { } } -function parseStart(item, date) { - let time = (item.querySelector('.hours > .hour') || { textContent: '' }).textContent - time = `${date.format('MM/DD/YYYY')} ${time}` +function parseStart($item, date) { + const timeString = $item('.hours > .hour').text() + const dateString = `${date.format('MM/DD/YYYY')} ${timeString}` - return dayjs.tz(time, 'MM/DD/YYYY HH:mm', 'Europe/Warsaw') + return dayjs.tz(dateString, 'MM/DD/YYYY HH:mm', 'Europe/Warsaw') } -function parseCategory(item) { - return (item.querySelector('.titles > .type') || { textContent: '' }).textContent +function parseCategory($item) { + return $item('.titles > .type').text() } -function parseDescription(item) { - return (item.querySelector('.titles > p') || { textContent: '' }).textContent +function parseDescription($item) { + return $item('.titles > p').text().trim() } -function parseTitle(item) { - return (item.querySelector('.titles > a') || { textContent: '' }).textContent +function parseTitle($item) { + return $item('.titles > a').text().trim() } function parseItems(content) { - const dom = new JSDOM(content) + const $ = cheerio.load(content) - return dom.window.document.querySelectorAll('#channelTV > section > div.emissions > ul > li') + return $('#channelTV > section > div.emissions > ul > li').toArray() } diff --git a/sites/programtv.onet.pl/programtv.onet.pl.test.js b/sites/programtv.onet.pl/programtv.onet.pl.test.js new file mode 100644 index 00000000..6cf4803b --- /dev/null +++ b/sites/programtv.onet.pl/programtv.onet.pl.test.js @@ -0,0 +1,81 @@ +// npx epg-grabber --config=sites/programtv.onet.pl/programtv.onet.pl.config.js --channels=sites/programtv.onet.pl/programtv.onet.pl_pl.channels.xml --output=.gh-pages/guides/pl/programtv.onet.pl.epg.xml --days=2 + +const MockDate = require('mockdate') +const { parser, url, logo } = require('./programtv.onet.pl.config.js') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +const date = dayjs.utc('2021-11-24', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: '13th-street-250', + xmltv_id: '13thStreetDeutschland.us' +} +const content = `
Bei einer Reality-TV-Show stirbt einer der Teilnehmer. Zunächst tappen Briscoe (Jerry Orbach) und Green (Jesse L....
Der Abgeordnete Dan McLane, ein ehemaliger Vorgesetzter von Gibbs, wird in New Orleans ermordet. In den 90er Jahren...
Die Zahl der Drogentoten ist gestiegen. Das Team des NCIS glaubt, dass sich Terroristen durch den zunehmenden...