diff --git a/.github/workflows/nos.pt.yml b/.github/workflows/nos.pt.yml new file mode 100644 index 00000000..13448248 --- /dev/null +++ b/.github/workflows/nos.pt.yml @@ -0,0 +1,17 @@ +name: nos.pt +on: + schedule: + - cron: '0 3 * * *' + workflow_dispatch: + workflow_run: + workflows: [_trigger] + types: + - completed +jobs: + load: + uses: ./.github/workflows/_load.yml + with: + site: ${{github.workflow}} + secrets: + APP_ID: ${{ secrets.APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/sites/nos.pt/__data__/content.html b/sites/nos.pt/__data__/content.html new file mode 100644 index 00000000..d24bcf41 --- /dev/null +++ b/sites/nos.pt/__data__/content.html @@ -0,0 +1,4224 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Pesquisa por canal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + +
+ + + +
+ + + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + diff --git a/sites/nos.pt/__data__/no_content.html b/sites/nos.pt/__data__/no_content.html new file mode 100644 index 00000000..63607bce --- /dev/null +++ b/sites/nos.pt/__data__/no_content.html @@ -0,0 +1,2988 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Pesquisa por canal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + diff --git a/sites/nos.pt/__data__/program_0.json b/sites/nos.pt/__data__/program_0.json new file mode 100644 index 00000000..089070ee --- /dev/null +++ b/sites/nos.pt/__data__/program_0.json @@ -0,0 +1 @@ +{"d":"Anatomia de Grey T.17 Ep.3_#|$_Os médicos do Grey Sloan continuam a enfrentar a nova realidade do COVID-19 e lidam com um paciente conhecido e teimoso. Koracick fica encarregue dos internos e Link opera um terapeuta sexual._#|$_b6fd27f4bd0b404abd4c3fc4faa79024_resized_352x198.jpg_#|$_undefined_#|$_undefined_#|$_RTP 1_#|$_2023-01-27T23:50:00+00:00_#|$_2023-01-28T00:36:00+00:00_#|$_false"} \ No newline at end of file diff --git a/sites/nos.pt/__data__/program_21.json b/sites/nos.pt/__data__/program_21.json new file mode 100644 index 00000000..06584a0e --- /dev/null +++ b/sites/nos.pt/__data__/program_21.json @@ -0,0 +1 @@ +{"d":"MasterChef Portugal T.1 Ep.10_#|$_A maior competição de cozinha do mundo arranca ao comando de três dos mais conceituados chefs portugueses: Pedro Pena Bastos, Noélia Jerónimo e Ricardo Costa, que nos vão transmitir os seus conhecimentos e a sua paixão pela cozinha._#|$_8aa511d697f0401a88a0cb1ec2718cc3_resized_352x198.jpg_#|$_undefined_#|$_undefined_#|$_RTP 1_#|$_2023-01-28T21:38:00+00:00_#|$_2023-01-29T00:05:00+00:00_#|$_false"} \ No newline at end of file diff --git a/sites/nos.pt/nos.pt.channels.xml b/sites/nos.pt/nos.pt.channels.xml new file mode 100644 index 00000000..0fa0aac0 --- /dev/null +++ b/sites/nos.pt/nos.pt.channels.xml @@ -0,0 +1,209 @@ + + + + 1+1 + 24Kitchen HD + 3SAT + Afro Music Channel + AMC Crime + AMC + ARTV + AXN Movies + AXN + Baby TV + Biggs + Bloomberg + BVN + Canal 11 HD + Canal Hollywood + Canal NOS HD + Canal Panda + Canal Q + Cartoon Network HD + Casa e Cozinha HD + CCTV 4 + CGTN + CMTV + CNBC + CNN Portugal + Cubavisión Internacional + Discovery Channel + Disney Channel HD + Disney Junior + DOGTV + DW (Alemão) + DW + ELEVEN SPORTS 1 + ELEVEN SPORTS 2 + ELEVEN SPORTS 3 + ELEVEN SPORTS 4 + ELEVEN SPORTS 5 + ELEVEN SPORTS 6 + Euronews + Fight Network HD + Food Network HD + FOX Comedy + FOX Crime + FOX Life + FOX Movies + FOX + France 24 (I) + France 24 (F) + Fuel TV + TV Galicia + Globo Now HD + Globo + Canal História + Horse TV + HOT Man + HOT Taboo + HOT + ID Investigation Discovery + KBS World HD + KiKa + Kuriakos TV + Localvisão TV HD + M6 + MCM Pop + MCM Top + Mezzo + Mezzo Live HD + Motorvision HD + MTV 00s + MTV Live + MTV Portugal + MyZen TV + National Geographic + National Geographic WILD + Nautical Channel + NHK World TV + Nickelodeon + Nick Jr. + Nickelodeon Ukraine Pluto TV + NOS Studios + Odisseia + Panda KIDS + Phoenix CNE + Phoenix Infonews + Porto Canal + ProSieben + Record News + Russia Today + RTP 1 + RTP 2 + RTP 3 + RTP Madeira + RTP Memória + RTR Planeta + SAT 1 + Sextreme + SIC + SIC Caras + SIC K + SIC Mulher + SIC Notícias + SIC Radical + S+ HD + Sporting TV + SPORT TV1 + SPORT TV2 + SPORT TV3 + SPORT TV4 + SPORT TV5 + Sport TV 6 HD + SPORT TV+ + Stingray iConcerts HD + Super RTL + SYFY + TCV Internacional + TPA Internacional + Trace Toca + Trace Urban HD + Travel Channel + TV5 Monde + TVCine ACTION + TVCine EDITION + TVCine EMOTION + TVCine TOP + TVEi + TVI + TVI Ficção HD + TVI Reality + TVR Internacional + Venus + ZAP Viva HD + ZDF + ZDF Neo + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sites/nos.pt/nos.pt.config.js b/sites/nos.pt/nos.pt.config.js new file mode 100644 index 00000000..b5f84a9c --- /dev/null +++ b/sites/nos.pt/nos.pt.config.js @@ -0,0 +1,148 @@ +const axios = require('axios') +const cheerio = require('cheerio') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const timezone = require('dayjs/plugin/timezone') +const customParseFormat = require('dayjs/plugin/customParseFormat') + +dayjs.extend(utc) +dayjs.extend(timezone) +dayjs.extend(customParseFormat) + +module.exports = { + site: 'nos.pt', + days: 2, + url({ channel }) { + return `https://www.nos.pt/particulares/televisao/guia-tv/Pages/channel.aspx?channel=${channel.site_id}` + }, + async parser({ content, date }) { + const programs = [] + const items = parseItems(content, date) + date = date.subtract(1, 'd') + for (let item of items) { + const prev = programs[programs.length - 1] + const $item = cheerio.load(item) + let start = parseStart($item, date) + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } + prev.stop = start + } + let stop = parseStop($item, date) + if (stop.isBefore(start)) { + stop = stop.add(1, 'd') + date = date.add(1, 'd') + } + const channelAcronym = parseChannelAcronym(content) + const programId = parseProgramId($item) + const details = await loadProgramDetails(channelAcronym, programId) + programs.push({ + title: parseTitle($item), + description: details.description, + icon: details.icon, + start, + stop + }) + } + + return programs + }, + async channels({ country }) { + const html = await axios + .get(`https://www.nos.pt/particulares/televisao/guia-tv/Pages/default.aspx`) + .then(r => r.data) + .catch(console.log) + + const $ = cheerio.load(html) + const items = $('#guide-filters > dl.dropdown-ord > dd > ul > li').toArray() + + return items.map(item => { + const $item = cheerio.load(item) + + return { + lang: 'pt', + site_id: $item('.value').text().trim(), + name: $item('a').clone().children().remove().end().text().trim() + } + }) + } +} + +async function loadProgramDetails(channelAcronym, programId) { + if (!channelAcronym || !programId) return {} + const data = await axios + .post( + `https://www.nos.pt/_layouts/15/Armstrong/ApplicationPages/EPGGetProgramsAndDetails.aspx/GetProgramDetails`, + { + programId, + channelAcronym, + hour: 'undefined', + startHour: 'undefined', + endHour: 'undefined' + }, + { + headers: { + 'content-type': 'application/json; charset=UTF-8' + } + } + ) + .then(r => r.data) + .catch(console.log) + + if (!data) return {} + + const [, description, iconFilename] = data.d.split('_#|$_') + const icon = iconFilename ? `https://images.nos.pt/${iconFilename}` : null + + return { + description, + icon + } +} + +function parseProgramId($item) { + return $item('a').attr('id') +} + +function parseChannelAcronym(content) { + const $ = cheerio.load(content) + + return $('#channel-logo > img').attr('alt') +} + +function parseTitle($item) { + return $item('a').attr('title').trim() +} + +function parseStart($item, date) { + const [time] = $item('.duration') + .text() + .replace(/\s+/g, ' ') + .trim() + .match(/^\d{2}:\d{2}/) || [null] + + if (!time) return null + + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', 'Europe/Lisbon') +} + +function parseStop($item, date) { + const [time] = $item('.duration') + .text() + .replace(/\s+/g, ' ') + .trim() + .match(/\d{2}:\d{2}$/) || [null] + + if (!time) return null + + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', 'Europe/Lisbon') +} + +function parseItems(content, date) { + const day = date.date() + const $ = cheerio.load(content) + + return $(`#day${day} > ul > li`).toArray() +} diff --git a/sites/nos.pt/nos.pt.test.js b/sites/nos.pt/nos.pt.test.js new file mode 100644 index 00000000..5e7083ee --- /dev/null +++ b/sites/nos.pt/nos.pt.test.js @@ -0,0 +1,100 @@ +// npm run channels:parse -- --config=./sites/nos.pt/nos.pt.config.js --output=./sites/nos.pt/nos.pt.channels.xml +// npx epg-grabber --config=sites/nos.pt/nos.pt.config.js --channels=sites/nos.pt/nos.pt.channels.xml --output=guide.xml --days=2 + +const { parser, url } = require('./nos.pt.config.js') +const fs = require('fs') +const path = require('path') +const axios = require('axios') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +jest.mock('axios') + +const date = dayjs.utc('2023-01-28', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: '5', + xmltv_id: 'RTP1.pt' +} + +it('can generate valid url', () => { + expect(url({ channel })).toBe( + 'https://www.nos.pt/particulares/televisao/guia-tv/Pages/channel.aspx?channel=5' + ) +}) + +it('can parse response', async () => { + const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html'), 'utf8') + + axios.post.mockImplementation((url, data) => { + if ( + url === + 'https://www.nos.pt/_layouts/15/Armstrong/ApplicationPages/EPGGetProgramsAndDetails.aspx/GetProgramDetails' && + JSON.stringify(data) === + JSON.stringify({ + programId: '81361', + channelAcronym: 'RTP1', + hour: 'undefined', + startHour: 'undefined', + endHour: 'undefined' + }) + ) { + return Promise.resolve({ + data: JSON.parse(fs.readFileSync(path.resolve(__dirname, '__data__/program_0.json'))) + }) + } else if ( + url === + 'https://www.nos.pt/_layouts/15/Armstrong/ApplicationPages/EPGGetProgramsAndDetails.aspx/GetProgramDetails' && + JSON.stringify(data) === + JSON.stringify({ + programId: '81382', + channelAcronym: 'RTP1', + hour: 'undefined', + startHour: 'undefined', + endHour: 'undefined' + }) + ) { + return Promise.resolve({ + data: JSON.parse(fs.readFileSync(path.resolve(__dirname, '__data__/program_21.json'))) + }) + } else { + return Promise.resolve({ data: '' }) + } + }) + + let results = await parser({ content, date }) + results = results.map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + return p + }) + + expect(results[0]).toMatchObject({ + start: '2023-01-27T23:50:00.000Z', + stop: '2023-01-28T00:36:00.000Z', + title: `Anatomia de Grey T.17 Ep.3`, + description: + 'Os médicos do Grey Sloan continuam a enfrentar a nova realidade do COVID-19 e lidam com um paciente conhecido e teimoso. Koracick fica encarregue dos internos e Link opera um terapeuta sexual.', + icon: 'https://images.nos.pt/b6fd27f4bd0b404abd4c3fc4faa79024_resized_352x198.jpg' + }) + + expect(results[21]).toMatchObject({ + start: '2023-01-28T21:38:00.000Z', + stop: '2023-01-29T00:05:00.000Z', + title: `MasterChef Portugal T.1 Ep.10`, + description: + 'A maior competição de cozinha do mundo arranca ao comando de três dos mais conceituados chefs portugueses: Pedro Pena Bastos, Noélia Jerónimo e Ricardo Costa, que nos vão transmitir os seus conhecimentos e a sua paixão pela cozinha.', + icon: 'https://images.nos.pt/8aa511d697f0401a88a0cb1ec2718cc3_resized_352x198.jpg' + }) +}) + +it('can handle empty guide', async () => { + const results = await parser({ + date, + content: fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html'), 'utf8') + }) + + expect(results).toMatchObject([]) +})