From 1a8248fe922d3cf4cdf3650b5efe82d8ee426cb0 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Fri, 12 Nov 2021 18:08:51 +0300 Subject: [PATCH 1/5] Create channels.js --- scripts/channels.js | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 scripts/channels.js diff --git a/scripts/channels.js b/scripts/channels.js new file mode 100644 index 00000000..13147826 --- /dev/null +++ b/scripts/channels.js @@ -0,0 +1,34 @@ +const { Command } = require('commander') +const fs = require('fs') +const path = require('path') +const { json2xml } = require('./utils') + +const program = new Command() +program + .requiredOption('-c, --config ', 'Config file') + .option('-o, --output ', 'Output file') + .parse(process.argv) + +const options = program.opts() + +async function main() { + const config = require(path.resolve(options.config)) + let channels = config.channels() + if (isPromise(channels)) { + channels = await channels + } + const xml = json2xml(channels, config.site) + + const dir = path.parse(options.config).dir + const output = options.output || `${dir}/${config.site}.channels.xml` + + fs.writeFileSync(path.resolve(output), xml) + + console.log(`File '${output}' successfully saved`) +} + +main() + +function isPromise(promise) { + return !!promise && typeof promise.then === 'function' +} From 4a79153c5da97d6008d992a3f9e9d0aaa2d90e6d Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Fri, 12 Nov 2021 18:08:53 +0300 Subject: [PATCH 2/5] Create utils.js --- scripts/utils.js | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 scripts/utils.js diff --git a/scripts/utils.js b/scripts/utils.js new file mode 100644 index 00000000..1e934b24 --- /dev/null +++ b/scripts/utils.js @@ -0,0 +1,47 @@ +function json2xml(items, site) { + let output = `\r\n\r\n \r\n` + + items.forEach(channel => { + const logo = channel.logo ? ` logo="${channel.logo}"` : '' + const xmltv_id = channel.xmltv_id || '' + const lang = channel.lang || '' + const site_id = channel.site_id || '' + output += ` ${escapeString( + channel.name + )}\r\n` + }) + + output += ` \r\n\r\n` + + return output +} + +function escapeString(string, defaultValue = '') { + if (!string) return defaultValue + + const regex = new RegExp( + '((?:[\0-\x08\x0B\f\x0E-\x1F\uFFFD\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]))|([\\x7F-\\x84]|[\\x86-\\x9F]|[\\uFDD0-\\uFDEF]|(?:\\uD83F[\\uDFFE\\uDFFF])|(?:\\uD87F[\\uDF' + + 'FE\\uDFFF])|(?:\\uD8BF[\\uDFFE\\uDFFF])|(?:\\uD8FF[\\uDFFE\\uDFFF])|(?:\\uD93F[\\uDFFE\\uD' + + 'FFF])|(?:\\uD97F[\\uDFFE\\uDFFF])|(?:\\uD9BF[\\uDFFE\\uDFFF])|(?:\\uD9FF[\\uDFFE\\uDFFF])' + + '|(?:\\uDA3F[\\uDFFE\\uDFFF])|(?:\\uDA7F[\\uDFFE\\uDFFF])|(?:\\uDABF[\\uDFFE\\uDFFF])|(?:\\' + + 'uDAFF[\\uDFFE\\uDFFF])|(?:\\uDB3F[\\uDFFE\\uDFFF])|(?:\\uDB7F[\\uDFFE\\uDFFF])|(?:\\uDBBF' + + '[\\uDFFE\\uDFFF])|(?:\\uDBFF[\\uDFFE\\uDFFF])(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\' + + 'uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|' + + '(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]))', + 'g' + ) + + string = String(string || '').replace(regex, '') + + return string + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, ''') + .replace(/\n|\r/g, ' ') + .replace(/ +/g, ' ') + .trim() +} + +module.exports = { json2xml } From bf780b4e9a278b1a1e6eeadd2d6d6df412488666 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Fri, 12 Nov 2021 18:09:00 +0300 Subject: [PATCH 3/5] Update mncvision.id.config.js --- sites/mncvision.id/mncvision.id.config.js | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sites/mncvision.id/mncvision.id.config.js b/sites/mncvision.id/mncvision.id.config.js index 20302fdf..23af20d3 100644 --- a/sites/mncvision.id/mncvision.id.config.js +++ b/sites/mncvision.id/mncvision.id.config.js @@ -1,6 +1,8 @@ const FormData = require('form-data') const jsdom = require('jsdom') const { JSDOM } = jsdom +const axios = require('axios') +const cheerio = require('cheerio') const dayjs = require('dayjs') const utc = require('dayjs/plugin/utc') const timezone = require('dayjs/plugin/timezone') @@ -48,6 +50,26 @@ module.exports = { }) return programs + }, + async channels() { + const data = await axios + .get('https://www.mncvision.id/schedule') + .then(response => response.data) + .catch(console.log) + + const $ = cheerio.load(data) + const items = $('select[name="fchannel"] option').toArray() + const channels = items.map(item => { + const $item = cheerio.load(item) + + return { + lang: 'id', + site_id: $item('*').attr('value'), + name: $item('*').text() + } + }) + + return channels } } From b35bdf569330c69e4d88d2d366d77c673eea3355 Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Fri, 12 Nov 2021 18:59:46 +0300 Subject: [PATCH 4/5] Create mncvision.id.test.js --- sites/mncvision.id/mncvision.id.test.js | 80 +++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 sites/mncvision.id/mncvision.id.test.js diff --git a/sites/mncvision.id/mncvision.id.test.js b/sites/mncvision.id/mncvision.id.test.js new file mode 100644 index 00000000..c3478a8a --- /dev/null +++ b/sites/mncvision.id/mncvision.id.test.js @@ -0,0 +1,80 @@ +// npx epg-grabber --config=sites/mncvision.id/mncvision.id.config.js --channels=sites/mncvision.id/mncvision.id_id.channels.xml --output=.gh-pages/guides/id/mncvision.id.epg.xml --days=2 + +const { parser, url, request, logo } = require('./mncvision.id.config.js') +const axios = require('axios') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +jest.mock('axios') + +const date = dayjs.utc('2021-11-12', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: '203', + xmltv_id: 'AnimalPlanetSoutheastAsia.us' +} +const content = `
Jam Tayang Program Acara Durasi
00:00African Wild S1: Seals01:00
` + +it('can generate valid url', () => { + expect(url).toBe('https://mncvision.id/schedule/table') +}) + +it('can generate valid request headers', () => { + expect(request.headers).toMatchObject({ + 'Content-Type': 'multipart/form-data; boundary=X-EPG-BOUNDARY' + }) +}) + +it('can generate valid request data', () => { + const result = request.data({ channel, date }) + expect(result._boundary).toBe('X-EPG-BOUNDARY') +}) + +it('can get logo url', () => { + expect(logo({ content, channel })).toBe( + 'https://www.mncvision.id/userfiles/image/channel/channel_203.png' + ) +}) + +it('can parse response', done => { + axios.get.mockImplementation(() => + Promise.resolve({ + data: `
+ Nikmati suasana kehidupan koloni anjing laut di kawasan pantai barat Afrika Selatan.
` + }) + ) + + parser({ date, channel, content }) + .then(result => { + expect(result).toMatchObject([ + { + start: '2021-11-11T17:00:00.000Z', + stop: '2021-11-11T18:00:00.000Z', + title: 'African Wild S1: Seals', + description: + 'Nikmati suasana kehidupan koloni anjing laut di kawasan pantai barat Afrika Selatan.' + } + ]) + done() + }) + .catch(error => { + done(error) + }) +}) + +it('can handle empty guide', done => { + parser({ + date, + channel, + content: `` + }) + .then(result => { + expect(result).toMatchObject([]) + done() + }) + .catch(error => { + done(error) + }) +}) From c65e8f5f67c08283240d12f519e9fa3c4f2eba2b Mon Sep 17 00:00:00 2001 From: Aleksandr Statciuk Date: Fri, 12 Nov 2021 18:59:50 +0300 Subject: [PATCH 5/5] Update mncvision.id.config.js --- sites/mncvision.id/mncvision.id.config.js | 43 +++++++++++++++-------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/sites/mncvision.id/mncvision.id.config.js b/sites/mncvision.id/mncvision.id.config.js index 23af20d3..5b5cd4ee 100644 --- a/sites/mncvision.id/mncvision.id.config.js +++ b/sites/mncvision.id/mncvision.id.config.js @@ -1,6 +1,4 @@ const FormData = require('form-data') -const jsdom = require('jsdom') -const { JSDOM } = jsdom const axios = require('axios') const cheerio = require('cheerio') const dayjs = require('dayjs') @@ -14,6 +12,7 @@ dayjs.extend(customParseFormat) module.exports = { site: 'mncvision.id', + url: `https://mncvision.id/schedule/table`, request: { method: 'POST', data: function ({ channel, date }) { @@ -34,20 +33,18 @@ module.exports = { logo({ channel }) { return `https://www.mncvision.id/userfiles/image/channel/channel_${channel.site_id}.png` }, - url({ channel }) { - return `https://www.mncvision.id/schedule/table` - }, - parser({ content, date }) { + async parser({ content, date }) { const programs = [] const items = parseItems(content) - items.forEach(item => { + for (const item of items) { const title = parseTitle(item) const start = parseStart(item, date) const duration = parseDuration(item) const stop = start.add(duration, 'm') + const description = await loadDescription(item) - programs.push({ title, start, stop }) - }) + programs.push({ title, description, start: start.toJSON(), stop: stop.toJSON() }) + } return programs }, @@ -73,8 +70,23 @@ module.exports = { } } +async function loadDescription(item) { + const $item = cheerio.load(item) + const progUrl = $item('a').attr('href') + if (!progUrl) return null + const data = await axios + .get(progUrl) + .then(r => r.data) + .catch(console.log) + if (!data) return null + const $page = cheerio.load(data) + + return $page('.synopsis').text().trim() +} + function parseDuration(item) { - let duration = (item.querySelector('td:nth-child(3)') || { textContent: '' }).textContent + const $ = cheerio.load(item) + let duration = $('td:nth-child(3)').text() const match = duration.match(/(\d{2}):(\d{2})/) const hours = parseInt(match[1]) const minutes = parseInt(match[2]) @@ -83,18 +95,21 @@ function parseDuration(item) { } function parseStart(item, date) { - let time = (item.querySelector('td:nth-child(1)') || { textContent: '' }).textContent + const $ = cheerio.load(item) + let time = $('td:nth-child(1)').text() time = `${date.format('DD/MM/YYYY')} ${time}` return dayjs.tz(time, 'DD/MM/YYYY HH:mm', 'Asia/Jakarta') } function parseTitle(item) { - return (item.querySelector('td:nth-child(2) > a') || { textContent: '' }).textContent + const $ = cheerio.load(item) + + return $('td:nth-child(2) > a').text() } function parseItems(content) { - const dom = new JSDOM(content) + const $ = cheerio.load(content) - return dom.window.document.querySelectorAll('tr[valign="top"]') + return $('tr[valign="top"]').toArray() }