Merge pull request #292 from iptv-org/update-programtv-onet-pl

Update programtv.onet.pl
This commit is contained in:
Aleksandr Statciuk 2021-11-25 00:03:42 +03:00 committed by GitHub
commit c85be178f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 125 additions and 33 deletions

11
package-lock.json generated
View file

@ -21,6 +21,7 @@
"jsdom": "^16.5.0",
"lodash": "^4.17.21",
"markdown-include": "^0.4.3",
"mockdate": "^3.0.5",
"parse-duration": "^1.0.0",
"pdf-parse": "^1.1.1",
"srcset": "^4.0.0",
@ -3554,6 +3555,11 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw=="
},
"node_modules/mockdate": {
"version": "3.0.5",
"resolved": "https://registry.npmjs.org/mockdate/-/mockdate-3.0.5.tgz",
"integrity": "sha512-iniQP4rj1FhBdBYS/+eQv7j1tadJ9lJtdzgOpvsOHng/GbcDh2Fhdeq+ZRldrPYdXvCyfFUmFeEwEGXZB5I/AQ=="
},
"node_modules/ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
@ -7432,6 +7438,11 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw=="
},
"mockdate": {
"version": "3.0.5",
"resolved": "https://registry.npmjs.org/mockdate/-/mockdate-3.0.5.tgz",
"integrity": "sha512-iniQP4rj1FhBdBYS/+eQv7j1tadJ9lJtdzgOpvsOHng/GbcDh2Fhdeq+ZRldrPYdXvCyfFUmFeEwEGXZB5I/AQ=="
},
"ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",

View file

@ -25,6 +25,7 @@
"jsdom": "^16.5.0",
"lodash": "^4.17.21",
"markdown-include": "^0.4.3",
"mockdate": "^3.0.5",
"parse-duration": "^1.0.0",
"pdf-parse": "^1.1.1",
"srcset": "^4.0.0",

View file

@ -1,5 +1,4 @@
const jsdom = require('jsdom')
const { JSDOM } = jsdom
const cheerio = require('cheerio')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const timezone = require('dayjs/plugin/timezone')
@ -13,36 +12,36 @@ module.exports = {
delay: 5000,
site: 'programtv.onet.pl',
url: function ({ date, channel }) {
const today = dayjs().utc().startOf('d')
const day = date.diff(today, 'd')
const currDate = dayjs.utc().startOf('d')
const day = currDate.diff(date, 'd')
return `https://programtv.onet.pl/program-tv/${channel.site_id}?dzien=${day}`
},
logo: function ({ content }) {
const dom = new JSDOM(content)
const img = dom.window.document.querySelector('#channelTV > section > header > span > img')
const $ = cheerio.load(content)
const imgSrc = $('#channelTV > section > header > span > img').attr('src')
return img ? 'https:' + img.src : null
return imgSrc ? `https:${imgSrc}` : null
},
parser: function ({ content, date }) {
let PM = false
const programs = []
const items = parseItems(content)
items.forEach(item => {
const title = parseTitle(item)
const description = parseDescription(item)
const category = parseCategory(item)
let start = parseStart(item, date)
if (start.hour() > 11) PM = true
if (start.hour() < 12 && PM) start = start.add(1, 'd')
const stop = start.add(1, 'h')
if (programs.length) {
programs[programs.length - 1].stop = start
const prev = programs[programs.length - 1]
const $item = cheerio.load(item)
let start = parseStart($item, date)
if (prev) {
if (start.isBefore(prev.start)) {
start = start.add(1, 'd')
date = date.add(1, 'd')
}
prev.stop = start
}
const stop = start.add(1, 'h')
programs.push({
title,
description,
category,
title: parseTitle($item),
description: parseDescription($item),
category: parseCategory($item),
start,
stop
})
@ -52,27 +51,27 @@ module.exports = {
}
}
function parseStart(item, date) {
let time = (item.querySelector('.hours > .hour') || { textContent: '' }).textContent
time = `${date.format('MM/DD/YYYY')} ${time}`
function parseStart($item, date) {
const timeString = $item('.hours > .hour').text()
const dateString = `${date.format('MM/DD/YYYY')} ${timeString}`
return dayjs.tz(time, 'MM/DD/YYYY HH:mm', 'Europe/Warsaw')
return dayjs.tz(dateString, 'MM/DD/YYYY HH:mm', 'Europe/Warsaw')
}
function parseCategory(item) {
return (item.querySelector('.titles > .type') || { textContent: '' }).textContent
function parseCategory($item) {
return $item('.titles > .type').text()
}
function parseDescription(item) {
return (item.querySelector('.titles > p') || { textContent: '' }).textContent
function parseDescription($item) {
return $item('.titles > p').text().trim()
}
function parseTitle(item) {
return (item.querySelector('.titles > a') || { textContent: '' }).textContent
function parseTitle($item) {
return $item('.titles > a').text().trim()
}
function parseItems(content) {
const dom = new JSDOM(content)
const $ = cheerio.load(content)
return dom.window.document.querySelectorAll('#channelTV > section > div.emissions > ul > li')
return $('#channelTV > section > div.emissions > ul > li').toArray()
}

View file

@ -0,0 +1,81 @@
// npx epg-grabber --config=sites/programtv.onet.pl/programtv.onet.pl.config.js --channels=sites/programtv.onet.pl/programtv.onet.pl_pl.channels.xml --output=.gh-pages/guides/pl/programtv.onet.pl.epg.xml --days=2
const MockDate = require('mockdate')
const { parser, url, logo } = require('./programtv.onet.pl.config.js')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
const date = dayjs.utc('2021-11-24', 'YYYY-MM-DD').startOf('d')
const channel = {
site_id: '13th-street-250',
xmltv_id: '13thStreetDeutschland.us'
}
const content = `<!DOCTYPE html><html lang="pl"> <head></head> <body class="withFilters pageChannel"> <div id="channelPage"> <div id="channelTV" class="nextToMenu"> <section class="channelEmissions"> <header> <span class="logoTV"> <img src="//ocdn.eu/ptv2-images-transforms/1/zB4kr1sb2dvLW1pZ3JhdGVkLzEzdGgtc3RyZWV0LnBuZ5KVAmQAwsOVAgAowsM" alt="13th Street"/> </span> </header> <div class="emissions"> <ul> <li class="hh03 hh04 fltrSerie"> <div class="hours"> <span class="hour">03:20</span> </div><div class="titles"> <a href="/tv/law-and-order-odcinek-15/rlmzu?entry=21970867" >Law &amp; Order, odc. 15: Letzte Worte</a > <span class="type">Krimiserie</span> <p> Bei einer Reality-TV-Show stirbt einer der Teilnehmer. Zunächst tappen Briscoe (Jerry Orbach) und Green (Jesse L.... </p></div></li><li class="hh23 hh00 fltrSerie"> <div class="hours"> <span class="hour">23:30</span> </div><div class="titles"> <a href="/tv/navy-cis-odcinek-1/73vbw?entry=22035734" >Navy CIS, odc. 1: New Orleans</a > <span class="type">Krimiserie</span> <p> Der Abgeordnete Dan McLane, ein ehemaliger Vorgesetzter von Gibbs, wird in New Orleans ermordet. In den 90er Jahren... </p></div></li><li class="hh01 fltrSerie"> <div class="hours"> <span class="hour">01:00</span> </div><div class="titles"> <a href="/tv/navy-cis-la-odcinek-13/tuc34?entry=22035821" >Navy CIS: L.A, odc. 13: High Society</a > <span class="type">Krimiserie</span> <p> Die Zahl der Drogentoten ist gestiegen. Das Team des NCIS glaubt, dass sich Terroristen durch den zunehmenden... </p></div></li></ul> </div></section> </div></div></body></html>`
it('can generate valid url', () => {
MockDate.set(new Date('2021-11-24'))
expect(url({ channel, date })).toBe(
'https://programtv.onet.pl/program-tv/13th-street-250?dzien=0'
)
MockDate.reset()
})
it('can generate valid url for next day', () => {
MockDate.set(new Date('2021-11-25'))
expect(url({ channel, date })).toBe(
'https://programtv.onet.pl/program-tv/13th-street-250?dzien=1'
)
MockDate.reset()
})
it('can generate valid logo url', () => {
expect(logo({ content })).toBe(
'https://ocdn.eu/ptv2-images-transforms/1/zB4kr1sb2dvLW1pZ3JhdGVkLzEzdGgtc3RyZWV0LnBuZ5KVAmQAwsOVAgAowsM'
)
})
it('can parse response', () => {
const result = parser({ content, date }).map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(result).toMatchObject([
{
start: '2021-11-24T02:20:00.000Z',
stop: '2021-11-24T22:30:00.000Z',
title: `Law & Order, odc. 15: Letzte Worte`,
category: 'Krimiserie',
description: `Bei einer Reality-TV-Show stirbt einer der Teilnehmer. Zunächst tappen Briscoe (Jerry Orbach) und Green (Jesse L....`
},
{
start: '2021-11-24T22:30:00.000Z',
stop: '2021-11-25T00:00:00.000Z',
title: `Navy CIS, odc. 1: New Orleans`,
category: 'Krimiserie',
description:
'Der Abgeordnete Dan McLane, ein ehemaliger Vorgesetzter von Gibbs, wird in New Orleans ermordet. In den 90er Jahren...'
},
{
start: '2021-11-25T00:00:00.000Z',
stop: '2021-11-25T01:00:00.000Z',
title: `Navy CIS: L.A, odc. 13: High Society`,
category: 'Krimiserie',
description:
'Die Zahl der Drogentoten ist gestiegen. Das Team des NCIS glaubt, dass sich Terroristen durch den zunehmenden...'
}
])
})
it('can handle empty guide', () => {
const result = parser({
date,
channel,
content: `<!DOCTYPE html><html><head></head><body></body></html>`
})
expect(result).toMatchObject([])
})