From e8ad3d66bd2e78c152a407b122781e550115141f Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Thu, 25 Nov 2021 05:43:20 +0300 Subject: [PATCH 1/3] Delete ontvtonight.com_us.channels.xml --- sites/ontvtonight.com/ontvtonight.com_us.channels.xml | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 sites/ontvtonight.com/ontvtonight.com_us.channels.xml diff --git a/sites/ontvtonight.com/ontvtonight.com_us.channels.xml b/sites/ontvtonight.com/ontvtonight.com_us.channels.xml deleted file mode 100644 index daecced8..00000000 --- a/sites/ontvtonight.com/ontvtonight.com_us.channels.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - BBC Arabic - DW English - God TV UK - MavTV - Outdoor Channel - Sky News Arabia - - \ No newline at end of file From 1bfa040b4edb800d08b378a7768dc027226fcd3c Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Thu, 25 Nov 2021 05:43:25 +0300 Subject: [PATCH 2/3] Create ontvtonight.com.test.js --- sites/ontvtonight.com/ontvtonight.com.test.js | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 sites/ontvtonight.com/ontvtonight.com.test.js diff --git a/sites/ontvtonight.com/ontvtonight.com.test.js b/sites/ontvtonight.com/ontvtonight.com.test.js new file mode 100644 index 00000000..1807ffb7 --- /dev/null +++ b/sites/ontvtonight.com/ontvtonight.com.test.js @@ -0,0 +1,63 @@ +// npx epg-grabber --config=sites/ontvtonight.com/ontvtonight.com.config.js --channels=sites/ontvtonight.com/ontvtonight.com_au.channels.xml --output=.gh-pages/guides/au/ontvtonight.com.epg.xml --days=2 + +const { parser, url, logo } = require('./ontvtonight.com.config.js') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +const date = dayjs.utc('2021-11-25', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: 'au#1692/7two', + xmltv_id: '7Two.au' +} +const content = `
7TWO
12:10 am
What A Carry On
12:50 am
Bones
The Devil In The Details
10:50 pm
Inspector Morse: The Remorseful Day
` + +it('can generate valid url', () => { + expect(url({ channel, date })).toBe( + 'https://www.ontvtonight.com/au/guide/listings/channel/1692/7two.html?dt=2021-11-25' + ) +}) + +it('can generate valid logo url', () => { + expect(logo({ content })).toBe( + 'https://otv-us-web.s3-us-west-2.amazonaws.com/logos/guide/media/ed49cf4f-1123-4bee-9c90-a6af375af310.png' + ) +}) + +it('can parse response', () => { + const result = parser({ content, channel, date }).map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + return p + }) + + expect(result).toMatchObject([ + { + start: '2021-11-24T13:10:00.000Z', + stop: '2021-11-24T13:50:00.000Z', + title: `What A Carry On` + }, + { + start: '2021-11-24T13:50:00.000Z', + stop: '2021-11-25T11:50:00.000Z', + title: `Bones`, + description: 'The Devil In The Details' + }, + { + start: '2021-11-25T11:50:00.000Z', + stop: '2021-11-25T12:50:00.000Z', + title: `Inspector Morse: The Remorseful Day` + } + ]) +}) + +it('can handle empty guide', () => { + const result = parser({ + date, + channel, + content: `` + }) + expect(result).toMatchObject([]) +}) From e689363f542790af4bc3b2d0724bd19b5ac5de4e Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Thu, 25 Nov 2021 05:43:31 +0300 Subject: [PATCH 3/3] Update ontvtonight.com.config.js --- .../ontvtonight.com/ontvtonight.com.config.js | 64 +++++++++---------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/sites/ontvtonight.com/ontvtonight.com.config.js b/sites/ontvtonight.com/ontvtonight.com.config.js index ac5673ce..a155bee8 100644 --- a/sites/ontvtonight.com/ontvtonight.com.config.js +++ b/sites/ontvtonight.com/ontvtonight.com.config.js @@ -1,5 +1,4 @@ -const jsdom = require('jsdom') -const { JSDOM } = jsdom +const cheerio = require('cheerio') const dayjs = require('dayjs') const utc = require('dayjs/plugin/utc') const timezone = require('dayjs/plugin/timezone') @@ -26,58 +25,55 @@ module.exports = { return url }, logo: function ({ content }) { - const dom = new JSDOM(content) - const img = - dom.window.document.querySelector('#content > div > div > div.span6 > img') || - dom.window.document.querySelector('#inner-headline > div > div > div > img') + const $ = cheerio.load(content) + const imgSrc = $( + '#content > div > div > div.span6 > img,#inner-headline > div > div > div > img' + ).attr('src') - return img ? img.src : null + return imgSrc || null }, parser: function ({ content, date, channel }) { const programs = [] const items = parseItems(content) items.forEach(item => { - const title = parseTitle(item) - const start = parseStart(item, date, channel) - const stop = start.add(1, 'h') - - if (title && start) { - if (programs.length) { - programs[programs.length - 1].stop = start - } - - programs.push({ - title, - start, - stop - }) + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + const start = parseStart($item, date, channel) + if (prev) { + prev.stop = start } + const stop = start.add(1, 'h') + programs.push({ + title: parseTitle($item), + description: parseDescription($item), + start, + stop + }) }) return programs } } -function parseStart(item, date, channel) { +function parseStart($item, date, channel) { const [region, id] = channel.site_id.split('#') const timezone = region ? tz[region] : tz['uk'] + const timeString = $item('td:nth-child(1) > h5').text().trim() + const dateString = `${date.format('YYYY-MM-DD')} ${timeString}` - let time = (item.querySelector('td:nth-child(1) > h5') || { textContent: '' }).textContent.trim() - time = `${date.format('DD/MM/YYYY')} ${time.toUpperCase()}` - - return dayjs.tz(time, 'DD/MM/YYYY H:mm A', timezone) + return dayjs.tz(dateString, 'YYYY-MM-DD H:mm a', timezone) } -function parseTitle(item) { - return (item.querySelector('td:nth-child(2) > h5 > a') || { textContent: '' }).textContent - .toString() - .trim() +function parseTitle($item) { + return $item('td:nth-child(2) > h5').text().trim() +} + +function parseDescription($item) { + return $item('td:nth-child(2) > h6').text().trim() } function parseItems(content) { - const dom = new JSDOM(content) + const $ = cheerio.load(content) - return dom.window.document.querySelectorAll( - '#content > div > div > div.span6 > table > tbody > tr' - ) + return $('#content > div > div > div.span6 > table > tbody > tr').toArray() }