Merge pull request #1449 from iptv-org/fix-ruv.is

Fix ruv.is
This commit is contained in:
Aleksandr Statciuk 2022-12-02 18:47:58 +03:00 committed by GitHub
commit 5016c6d889
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 59 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -11,27 +11,21 @@ dayjs.extend(customParseFormat)
module.exports = {
site: 'ruv.is',
url({ channel, date }) {
return `https://www.ruv.is/dagskra/${channel.site_id}/${date.format('YYYYMMDD')}`
return `https://www.ruv.is/sjonvarp/dagskra/${channel.site_id}/${date.format('YYYY-MM-DD')}`
},
parser({ content, date }) {
parser({ content, channel, date }) {
let programs = []
const items = parseItems(content)
const items = parseItems(content, channel, date)
items.forEach(item => {
const $item = cheerio.load(item)
const prev = programs[programs.length - 1]
let start = parseStart($item, date)
if (prev) {
if (start.isBefore(prev.start)) {
start = start.add(1, 'd')
date = date.add(1, 'd')
}
prev.stop = start
let start = parseStart(item, date)
let stop = parseStop(item, date)
if (stop.isBefore(start)) {
stop = stop.add(1, 'd')
}
let stop = start.add(1, 'h')
programs.push({
title: parseTitle($item),
description: parseDescription($item),
icon: parseIcon($item),
title: item.title,
description: item.description,
icon: parseIcon(item),
start,
stop
})
@ -41,27 +35,35 @@ module.exports = {
}
}
function parseTitle($item) {
return $item('span.field-content.ruv-color').text()
function parseIcon(item) {
return item.image.replace('$$IMAGESIZE$$', '480')
}
function parseDescription($item) {
return $item('div.views-field > span > div > span > p').text().trim()
function parseStart(item, date) {
return dayjs.tz(
`${date.format('YYYY-MM-DD')} ${item.start_time_friendly}`,
'YYYY-MM-DD HH:mm',
'Atlantic/Reykjavik'
)
}
function parseIcon($item) {
return $item('div.views-field > span > div > div img').attr('src')
function parseStop(item, date) {
return dayjs.tz(
`${date.format('YYYY-MM-DD')} ${item.end_time_friendly}`,
'YYYY-MM-DD HH:mm',
'Atlantic/Reykjavik'
)
}
function parseStart($item, date) {
const string = $item('strong').text()
const time = `${date.format('YYYY-MM-DD')} ${string}`
return dayjs.tz(time, 'YYYY-MM-DD HH : mm', 'Atlantic/Reykjavik')
}
function parseItems(content) {
function parseItems(content, channel, date) {
const $ = cheerio.load(content)
const apollo = $('#apollo').html()
const [, state] = apollo.match(/window.__APOLLO_STATE__ = ([^;<]+)/) || [null, '']
const data = JSON.parse(state)
return $('#ruv_api_calendar > ul > li').toArray()
return (
data?.ROOT_QUERY?.[
`Schedule({"channel":"${channel.site_id}","date":"${date.format('YYYY-MM-DD')}"})`
]?.events || []
)
}

View file

@ -1,60 +1,47 @@
// npx epg-grabber --config=sites/ruv.is/ruv.is.config.js --channels=sites/ruv.is/ruv.is_is.channels.xml --output=guide.xml --days=2
const { parser, url } = require('./ruv.is.config.js')
const fs = require('fs')
const path = require('path')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
const date = dayjs.utc('2021-11-25', 'YYYY-MM-DD').startOf('d')
const date = dayjs.utc('2022-12-03', 'YYYY-MM-DD').startOf('d')
const channel = {
site_id: 'ruv',
xmltv_id: 'RUV.is'
}
it('can generate valid url', () => {
expect(url({ channel, date })).toBe('https://www.ruv.is/dagskra/ruv/20211125')
expect(url({ channel, date })).toBe('https://www.ruv.is/sjonvarp/dagskra/ruv/2022-12-03')
})
it('can parse response', () => {
const content = `<!DOCTYPE html><html lang="is" dir="ltr"> <head></head> <body> <div id="main-container"> <div id="page-wrapper"> <div id="page" class="container page"> <div id="columns" class="columns clearfix"> <main id="content-column" class="content-column" role="main"> <div class="content-inner"> <section id="main-content"> <div id="content" class="region"> <div id="block-system-main"> <div class="two-75-25 at-panel panel-display clearfix"> <div class="region region-two-75-25-first"> <div class="region-inner clearfix"> <div class="panel-pane pane-custom pane-2 no-title block"> <div class="block-inner clearfix"> <div class="block-content"> <div class="fill-white pad2 border"> <div id="ruv_api_calendar"> <ul class="unlist"> <li class=" views-row views-row-0 views-row-odd views-row-first border-bottom pad0y space1 clearfix " > <div class="fr inline"> <i tabindex="0" role="button" aria-expanded="false" aria-label="Sjá nánar Heimaleikfimi" title="Sjá nánar Heimaleikfimi" rel="5215669" class="inline fa fa-plus-circle description pointer" ></i> </div><strong class="field-content">13 : 00</strong> <div class="inline grey-color" title="Upptaka verður aðgengileg í Spilara og Appi" > <i class="icon icon-sarpurinn color-sarpurinn"></i> </div><span class="field-content ruv-color" ><a href="http://www.ruv.is/sjonvarp/spila/heimaleikfimi/30389/91pviq" target="_blank" ><b>Heimaleikfimi</b></a ></span ><em class="field-content color-gray"></em ><span class="field-content color-gray" ><i class="icon icon-vod color-gray" title="Upptaka aðgengileg í VOD-þjónustum" ></i></span ><span class="field-content color-gray" ><i class="icon icon-888 color-gray" title="Dagskrárliður er textaður á síðu 888 í Textavarpinu" ></i></span ><span class="field-content" ><i class="icon icon-endursynt color-gray" title="Endurtekið efni" ></i ></span> <div class="views-field views-field-nothing"> <span class="field-content" ><div class="content hidden mar2t" id="dagskra_item_5215669" > <div class="mar2r col12-mobile fl"> <a href="http://www.ruv.is/sjonvarp/spila/heimaleikfimi/30389/91pviq" target="_blank" ><img class=" image-style-medium col12-mobile fl mar1b " src="https://d38kdhuogyllre.cloudfront.net/fit-in/480x/filters:quality(65)/hd_posters/91pvig-3p3hig.jpg" width="250" height="141" alt="Mynd með færslu" title="Mynd með færslu"/></a> </div><span ><p> Góð ráð og æfingar sem tilvalið er að gera heima. Íris Rut Garðarsdóttir sjúkraþjálfari hefur umsjón með leikfiminni. e. </p></span > </div></span > </div></li><li class="views-row views-row-1 views-row-odd views-row border-bottom pad0y space1 clearfix"><div class="fr inline"><i tabindex="0" role="button" aria-expanded="false" aria-label="Sjá nánar Kastljós" title="Sjá nánar Kastljós" rel="5215993" class="inline fa pointer description_selected fa-minus-circle"></i></div><strong class="field-content">13 : 10</strong><div class="inline grey-color" title="Upptaka verður aðgengileg í Spilara og Appi"><i class="icon icon-sarpurinn "></i></div><span class="field-content ruv-color">Kastljós</span><em class="field-content color-gray"></em><span class="field-content color-gray"><i class="icon icon-vod color-gray" title="Upptaka aðgengileg í VOD-þjónustum"></i></span><span class="field-content color-gray"><i class="icon icon-888 color-gray" title="Dagskrárliður er textaður á síðu 888 í Textavarpinu"></i></span><span class="field-content"><i class="icon icon-endursynt color-gray" title="Endurtekið efni"></i></span><div class="views-field views-field-nothing"><span class="field-content"><div class="content mar2t" id="dagskra_item_5215993"><div class="mar2r col12-mobile fl"><img class="image-style-medium col12-mobile fl mar1b" src="https://d38kdhuogyllre.cloudfront.net/fit-in/480x/filters:quality(65)/hd_posters/95erq0-tmenfg.jpg" width="250" height="141" alt="Mynd með færslu" title="Mynd með færslu"></div><span><p>Ítarleg umfjöllun um það sem er efst á baugi í fréttum og mannlífi. Farið er ofan í kjölinn á stærstu fréttamálum dagsins með viðmælendum um land allt. Umsjónarmenn eru Einar Þorsteinsson og Jóhanna Vigdís Hjaltadóttir. e.</p></span></div></span></div></li><li class="views-row views-row-28 views-row-odd views-row border-bottom pad0y space1 clearfix"><strong class="field-content">00 : 10</strong><span class="field-content ruv-color">Dagskrárlok</span><em class="field-content color-gray"></em><span class="field-content"><i class="icon icon-geoblock color-gray" title="Eingöngu aðgengilegt á Íslandi"></i></span><div class="views-field views-field-nothing"><span class="field-content"><div class="content hidden mar2t" id="dagskra_item_5160443"><div class="mar2r col12-mobile fl"></div></div></span></div></li></ul> </div></div></div></div></div></div></div></div></div></div></section> </div></main> </div></div></div></div></body></html>`
const result = parser({ content, date }).map(p => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html'))
let results = parser({ content, channel, date }).map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(result).toMatchObject([
{
start: '2021-11-25T13:00:00.000Z',
stop: '2021-11-25T13:10:00.000Z',
title: `Heimaleikfimi`,
description:
'Góð ráð og æfingar sem tilvalið er að gera heima. Íris Rut Garðarsdóttir sjúkraþjálfari hefur umsjón með leikfiminni. e.',
icon: 'https://d38kdhuogyllre.cloudfront.net/fit-in/480x/filters:quality(65)/hd_posters/91pvig-3p3hig.jpg'
},
{
start: '2021-11-25T13:10:00.000Z',
stop: '2021-11-26T00:10:00.000Z',
title: `Kastljós`,
description:
'Ítarleg umfjöllun um það sem er efst á baugi í fréttum og mannlífi. Farið er ofan í kjölinn á stærstu fréttamálum dagsins með viðmælendum um land allt. Umsjónarmenn eru Einar Þorsteinsson og Jóhanna Vigdís Hjaltadóttir. e.',
icon: 'https://d38kdhuogyllre.cloudfront.net/fit-in/480x/filters:quality(65)/hd_posters/95erq0-tmenfg.jpg'
},
{
start: '2021-11-26T00:10:00.000Z',
stop: '2021-11-26T01:10:00.000Z',
title: `Dagskrárlok`
}
])
expect(results[0]).toMatchObject({
start: '2022-12-03T07:05:00.000Z',
stop: '2022-12-03T07:15:00.000Z',
title: `Smástund`,
description:
'Smástund hentar vel fyrir þau allra yngstu, í hverjum þætti lærum við orð, liti, tölur og tónlist. e.',
icon: 'https://d38kdhuogyllre.cloudfront.net/fit-in/480x/filters:quality(65)/hd_posters/a2kmk0-mcpf0o.jpg'
})
})
it('can handle empty guide', () => {
const result = parser({
date,
channel,
content: `<!DOCTYPE html><html> <head></head> <body></body></html>`
content: fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html'))
})
expect(result).toMatchObject([])
})