diff --git a/sites/telebilbao.es/__data__/content.html b/sites/telebilbao.es/__data__/content.html new file mode 100644 index 00000000..7e999b51 --- /dev/null +++ b/sites/telebilbao.es/__data__/content.html @@ -0,0 +1,1137 @@ + + + + + + + + + + + + + Programación - TeleBilbao + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + +Ir a Tienda + +
+ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sites/telebilbao.es/__data__/no_content.html b/sites/telebilbao.es/__data__/no_content.html new file mode 100644 index 00000000..01c365c9 --- /dev/null +++ b/sites/telebilbao.es/__data__/no_content.html @@ -0,0 +1,1820 @@ + + + + + + + + + + + + + + Página no encontrada - TeleBilbao + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+
+

Página no encontrada

+
+
Estás aquí:
+ +
+
+
+ + + + + + + + + + Ir a Tienda +
+ + + + + + + + + + + + + + + + + + + + + diff --git a/sites/telebilbao.es/readme.md b/sites/telebilbao.es/readme.md new file mode 100644 index 00000000..5c50e38c --- /dev/null +++ b/sites/telebilbao.es/readme.md @@ -0,0 +1,15 @@ +# telebilbao.es + +https://www.telebilbao.es/programacion-2/ + +### Download the guide + +```sh +npm run grab --- --site=telebilbao.es +``` + +### Test + +```sh +npm test --- telebilbao.es +``` diff --git a/sites/telebilbao.es/telebilbao.es.channels.xml b/sites/telebilbao.es/telebilbao.es.channels.xml new file mode 100644 index 00000000..1f98e3ff --- /dev/null +++ b/sites/telebilbao.es/telebilbao.es.channels.xml @@ -0,0 +1,4 @@ + + + TeleBilbao + \ No newline at end of file diff --git a/sites/telebilbao.es/telebilbao.es.config.js b/sites/telebilbao.es/telebilbao.es.config.js new file mode 100644 index 00000000..8355ce3f --- /dev/null +++ b/sites/telebilbao.es/telebilbao.es.config.js @@ -0,0 +1,70 @@ +const dayjs = require('dayjs') +const cheerio = require('cheerio') +const table2array = require('table2array') +const utc = require('dayjs/plugin/utc') +const timezone = require('dayjs/plugin/timezone') +const customParseFormat = require('dayjs/plugin/customParseFormat') + +dayjs.extend(utc) +dayjs.extend(timezone) +dayjs.extend(customParseFormat) + +require('dayjs/locale/es') + +module.exports = { + site: 'telebilbao.es', + days: 1, + url: 'https://www.telebilbao.es/programacion-2/', + request: { + cache: { + ttl: 24 * 60 * 60 * 1000 // 1 day + } + }, + parser({ content, date }) { + let programs = [] + const items = parseItems(content, date) + items.forEach(item => { + const prev = programs[programs.length - 1] + let start = parseStart(item, date) + if (prev) { + if (start.isBefore(prev.start)) { + start = start.add(1, 'd') + date = date.add(1, 'd') + } + prev.stop = start + } + const stop = start.add(30, 'm') + + programs.push({ + title: item.title, + start, + stop + }) + }) + + return programs + } +} + +function parseStart(item, date) { + return dayjs.tz(`${date.format('YYYY-MM-DD')} ${item.time}`, 'YYYY-MM-DD HH:mm', 'Europe/Madrid') +} + +function parseItems(content, date) { + const $ = cheerio.load(content) + const tableHtml = $('table.programacion').html() + let tableArray = table2array(`${tableHtml}
`) + const day = date.locale('es').format('dddd\nD MMMM').toUpperCase() + if (!tableArray[0]) return [] + const indexOfColumn = tableArray[0].indexOf(day) + tableArray.pop() + const items = [] + tableArray.forEach(row => { + items.push({ + time: row[0], + title: row[indexOfColumn] + }) + }) + + return items.filter(i => Boolean(i.time)) +} diff --git a/sites/telebilbao.es/telebilbao.es.test.js b/sites/telebilbao.es/telebilbao.es.test.js new file mode 100644 index 00000000..6450fb7e --- /dev/null +++ b/sites/telebilbao.es/telebilbao.es.test.js @@ -0,0 +1,44 @@ +const { parser, url } = require('./telebilbao.es.config.js') +const fs = require('fs') +const path = require('path') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +const date = dayjs.utc('2025-01-16', 'YYYY-MM-DD').startOf('d') + +it('can generate valid url', () => { + expect(url).toBe('https://www.telebilbao.es/programacion-2/') +}) + +it('can parse response', () => { + const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html')) + let results = parser({ content, date }) + results = results.map(p => { + p.start = p.start.toJSON() + p.stop = p.stop.toJSON() + return p + }) + + expect(results.length).toBe(50) + expect(results[0]).toMatchObject({ + start: '2025-01-16T06:00:00.000Z', + stop: '2025-01-16T06:30:00.000Z', + title: 'BAI HORIXE' + }) + expect(results[49]).toMatchObject({ + start: '2025-01-17T07:30:00.000Z', + stop: '2025-01-17T08:00:00.000Z', + title: 'LA KAPITAL' + }) +}) + +it('can handle empty guide', () => { + const results = parser({ + date, + content: fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html')) + }) + expect(results).toMatchObject([]) +})