diff --git a/scripts/channels.js b/scripts/channels.js new file mode 100644 index 00000000..13147826 --- /dev/null +++ b/scripts/channels.js @@ -0,0 +1,34 @@ +const { Command } = require('commander') +const fs = require('fs') +const path = require('path') +const { json2xml } = require('./utils') + +const program = new Command() +program + .requiredOption('-c, --config ', 'Config file') + .option('-o, --output ', 'Output file') + .parse(process.argv) + +const options = program.opts() + +async function main() { + const config = require(path.resolve(options.config)) + let channels = config.channels() + if (isPromise(channels)) { + channels = await channels + } + const xml = json2xml(channels, config.site) + + const dir = path.parse(options.config).dir + const output = options.output || `${dir}/${config.site}.channels.xml` + + fs.writeFileSync(path.resolve(output), xml) + + console.log(`File '${output}' successfully saved`) +} + +main() + +function isPromise(promise) { + return !!promise && typeof promise.then === 'function' +} diff --git a/scripts/utils.js b/scripts/utils.js new file mode 100644 index 00000000..1e934b24 --- /dev/null +++ b/scripts/utils.js @@ -0,0 +1,47 @@ +function json2xml(items, site) { + let output = `\r\n\r\n \r\n` + + items.forEach(channel => { + const logo = channel.logo ? ` logo="${channel.logo}"` : '' + const xmltv_id = channel.xmltv_id || '' + const lang = channel.lang || '' + const site_id = channel.site_id || '' + output += ` ${escapeString( + channel.name + )}\r\n` + }) + + output += ` \r\n\r\n` + + return output +} + +function escapeString(string, defaultValue = '') { + if (!string) return defaultValue + + const regex = new RegExp( + '((?:[\0-\x08\x0B\f\x0E-\x1F\uFFFD\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]))|([\\x7F-\\x84]|[\\x86-\\x9F]|[\\uFDD0-\\uFDEF]|(?:\\uD83F[\\uDFFE\\uDFFF])|(?:\\uD87F[\\uDF' + + 'FE\\uDFFF])|(?:\\uD8BF[\\uDFFE\\uDFFF])|(?:\\uD8FF[\\uDFFE\\uDFFF])|(?:\\uD93F[\\uDFFE\\uD' + + 'FFF])|(?:\\uD97F[\\uDFFE\\uDFFF])|(?:\\uD9BF[\\uDFFE\\uDFFF])|(?:\\uD9FF[\\uDFFE\\uDFFF])' + + '|(?:\\uDA3F[\\uDFFE\\uDFFF])|(?:\\uDA7F[\\uDFFE\\uDFFF])|(?:\\uDABF[\\uDFFE\\uDFFF])|(?:\\' + + 'uDAFF[\\uDFFE\\uDFFF])|(?:\\uDB3F[\\uDFFE\\uDFFF])|(?:\\uDB7F[\\uDFFE\\uDFFF])|(?:\\uDBBF' + + '[\\uDFFE\\uDFFF])|(?:\\uDBFF[\\uDFFE\\uDFFF])(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\' + + 'uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|' + + '(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]))', + 'g' + ) + + string = String(string || '').replace(regex, '') + + return string + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, ''') + .replace(/\n|\r/g, ' ') + .replace(/ +/g, ' ') + .trim() +} + +module.exports = { json2xml } diff --git a/sites/mncvision.id/mncvision.id.config.js b/sites/mncvision.id/mncvision.id.config.js index 20302fdf..5b5cd4ee 100644 --- a/sites/mncvision.id/mncvision.id.config.js +++ b/sites/mncvision.id/mncvision.id.config.js @@ -1,6 +1,6 @@ const FormData = require('form-data') -const jsdom = require('jsdom') -const { JSDOM } = jsdom +const axios = require('axios') +const cheerio = require('cheerio') const dayjs = require('dayjs') const utc = require('dayjs/plugin/utc') const timezone = require('dayjs/plugin/timezone') @@ -12,6 +12,7 @@ dayjs.extend(customParseFormat) module.exports = { site: 'mncvision.id', + url: `https://mncvision.id/schedule/table`, request: { method: 'POST', data: function ({ channel, date }) { @@ -32,27 +33,60 @@ module.exports = { logo({ channel }) { return `https://www.mncvision.id/userfiles/image/channel/channel_${channel.site_id}.png` }, - url({ channel }) { - return `https://www.mncvision.id/schedule/table` - }, - parser({ content, date }) { + async parser({ content, date }) { const programs = [] const items = parseItems(content) - items.forEach(item => { + for (const item of items) { const title = parseTitle(item) const start = parseStart(item, date) const duration = parseDuration(item) const stop = start.add(duration, 'm') + const description = await loadDescription(item) - programs.push({ title, start, stop }) - }) + programs.push({ title, description, start: start.toJSON(), stop: stop.toJSON() }) + } return programs + }, + async channels() { + const data = await axios + .get('https://www.mncvision.id/schedule') + .then(response => response.data) + .catch(console.log) + + const $ = cheerio.load(data) + const items = $('select[name="fchannel"] option').toArray() + const channels = items.map(item => { + const $item = cheerio.load(item) + + return { + lang: 'id', + site_id: $item('*').attr('value'), + name: $item('*').text() + } + }) + + return channels } } +async function loadDescription(item) { + const $item = cheerio.load(item) + const progUrl = $item('a').attr('href') + if (!progUrl) return null + const data = await axios + .get(progUrl) + .then(r => r.data) + .catch(console.log) + if (!data) return null + const $page = cheerio.load(data) + + return $page('.synopsis').text().trim() +} + function parseDuration(item) { - let duration = (item.querySelector('td:nth-child(3)') || { textContent: '' }).textContent + const $ = cheerio.load(item) + let duration = $('td:nth-child(3)').text() const match = duration.match(/(\d{2}):(\d{2})/) const hours = parseInt(match[1]) const minutes = parseInt(match[2]) @@ -61,18 +95,21 @@ function parseDuration(item) { } function parseStart(item, date) { - let time = (item.querySelector('td:nth-child(1)') || { textContent: '' }).textContent + const $ = cheerio.load(item) + let time = $('td:nth-child(1)').text() time = `${date.format('DD/MM/YYYY')} ${time}` return dayjs.tz(time, 'DD/MM/YYYY HH:mm', 'Asia/Jakarta') } function parseTitle(item) { - return (item.querySelector('td:nth-child(2) > a') || { textContent: '' }).textContent + const $ = cheerio.load(item) + + return $('td:nth-child(2) > a').text() } function parseItems(content) { - const dom = new JSDOM(content) + const $ = cheerio.load(content) - return dom.window.document.querySelectorAll('tr[valign="top"]') + return $('tr[valign="top"]').toArray() } diff --git a/sites/mncvision.id/mncvision.id.test.js b/sites/mncvision.id/mncvision.id.test.js new file mode 100644 index 00000000..c3478a8a --- /dev/null +++ b/sites/mncvision.id/mncvision.id.test.js @@ -0,0 +1,80 @@ +// npx epg-grabber --config=sites/mncvision.id/mncvision.id.config.js --channels=sites/mncvision.id/mncvision.id_id.channels.xml --output=.gh-pages/guides/id/mncvision.id.epg.xml --days=2 + +const { parser, url, request, logo } = require('./mncvision.id.config.js') +const axios = require('axios') +const dayjs = require('dayjs') +const utc = require('dayjs/plugin/utc') +const customParseFormat = require('dayjs/plugin/customParseFormat') +dayjs.extend(customParseFormat) +dayjs.extend(utc) + +jest.mock('axios') + +const date = dayjs.utc('2021-11-12', 'YYYY-MM-DD').startOf('d') +const channel = { + site_id: '203', + xmltv_id: 'AnimalPlanetSoutheastAsia.us' +} +const content = `
Jam Tayang Program Acara Durasi
00:00African Wild S1: Seals01:00
` + +it('can generate valid url', () => { + expect(url).toBe('https://mncvision.id/schedule/table') +}) + +it('can generate valid request headers', () => { + expect(request.headers).toMatchObject({ + 'Content-Type': 'multipart/form-data; boundary=X-EPG-BOUNDARY' + }) +}) + +it('can generate valid request data', () => { + const result = request.data({ channel, date }) + expect(result._boundary).toBe('X-EPG-BOUNDARY') +}) + +it('can get logo url', () => { + expect(logo({ content, channel })).toBe( + 'https://www.mncvision.id/userfiles/image/channel/channel_203.png' + ) +}) + +it('can parse response', done => { + axios.get.mockImplementation(() => + Promise.resolve({ + data: `
+ Nikmati suasana kehidupan koloni anjing laut di kawasan pantai barat Afrika Selatan.
` + }) + ) + + parser({ date, channel, content }) + .then(result => { + expect(result).toMatchObject([ + { + start: '2021-11-11T17:00:00.000Z', + stop: '2021-11-11T18:00:00.000Z', + title: 'African Wild S1: Seals', + description: + 'Nikmati suasana kehidupan koloni anjing laut di kawasan pantai barat Afrika Selatan.' + } + ]) + done() + }) + .catch(error => { + done(error) + }) +}) + +it('can handle empty guide', done => { + parser({ + date, + channel, + content: `` + }) + .then(result => { + expect(result).toMatchObject([]) + done() + }) + .catch(error => { + done(error) + }) +})