Optimize mncvision.id grabber connection.

Currently while fetching guide for every channels, there will be one additional
connection used to set the language cookie. This optimization change this behaviour
by setting the language cookie once and then use those cookie for the rest of the
channels unless there's change in channel language.

Assume there are 10 channels and each channel only use one connection. Before the
optimization the connections made are 20 (1 for guide fetch, 1 for set language,
then multiplied by 10), and after the optimization the connections made are 11
(1 for set language, 1 for guide fetch multiplied by 10).

Signed-off-by: Toha <tohenk@yahoo.com>
This commit is contained in:
Toha 2023-11-21 22:42:02 +07:00
parent 35272ea0a4
commit f0cadf182e
No known key found for this signature in database
GPG key ID: 2D7AA6389D44DCAB
7 changed files with 459 additions and 1425 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -5,17 +5,17 @@
<div id="id-schedule-detail" class="tm-sinopsys-container">
<h2 class='page-header title text-info program-title'>Adventures With Miao Mi, Ep 1</h2>
<div class='text-warning showtime'><b>05 Oct 2022, 12:00 AM</b><br><small class="showtime-range">00:00 - 00:06 [duration:00:06]</small></div>
<h2 class='page-header title text-info program-title'>Blue Bloods S13, Ep 19</h2>
<div class='text-warning showtime'><b>19 Nov 2023, 12:15 AM</b><br><small class="showtime-range">00:15 - 01:05 [duration:00:50]</small></div>
<blockquote class="bloquet synopsis">
When children begin to disappear, a group of young kids have to face their biggest fears when they square off against a murderous, evil clown. </blockquote>
Jamie partners with the FDNY to find the arsonist responsible for a massive fire at an NYPD evidence storage facility. </blockquote>
<div class="tm-channel-info">
<img class="tm-channel-image" src="userfiles/image/channel/miawme150x150.jpg" alt="Miao Mi" title="Miao Mi #channel:38" />
<img class="tm-channel-image" src="userfiles/image/channel/axn_150x150.jpg" alt="AXN" title="AXN #channel:154" />
<div class='tm-channel-label'>
<span class='tm-channel-label'>
Channel <span class='tm-channel-no'>38</span>
Channel <span class='tm-channel-no'>154</span>
</span>
</div>

View file

@ -5,17 +5,17 @@
<div id="id-schedule-detail" class="tm-sinopsys-container">
<h2 class='page-header title text-info program-title'>Adventures With Miao Mi, Ep 1</h2>
<div class='text-warning showtime'><b>05 Oct 2022, 12:00 AM</b><br><small class="showtime-range">00:00 - 00:06 [durasi:00:06]</small></div>
<h2 class='page-header title text-info program-title'>Blue Bloods S13, Ep 19</h2>
<div class='text-warning showtime'><b>19 Nov 2023, 12:15 AM</b><br><small class="showtime-range">00:15 - 01:05 [durasi:00:50]</small></div>
<blockquote class="bloquet synopsis">
Ketika anak-anak mulai menghilang, sekelompok anak kecil harus menghadapi ketakutan terbesar mereka ketika mereka melawan sesosok badut pembunuh yang jahat. </blockquote>
Jamie bekerja sama dengan FDNY untuk menemukan pelaku pembakaran yang bertanggung jawab atas kebakaran hebat yang terjadi di fasilitas penyimpanan bukti milik NYPD. </blockquote>
<div class="tm-channel-info">
<img class="tm-channel-image" src="userfiles/image/channel/miawme150x150.jpg" alt="Miao Mi" title="Miao Mi #channel:38" />
<img class="tm-channel-image" src="userfiles/image/channel/axn_150x150.jpg" alt="AXN" title="AXN #channel:154" />
<div class='tm-channel-label'>
<span class='tm-channel-label'>
Channel <span class='tm-channel-no'>38</span>
Channel <span class='tm-channel-no'>154</span>
</span>
</div>

View file

@ -10,13 +10,17 @@ dayjs.extend(utc)
dayjs.extend(timezone)
dayjs.extend(customParseFormat)
const languages = { en: 'english', id: 'indonesia' }
const cookies = {}
const timeout = 30000
module.exports = {
site: 'mncvision.id',
days: 2,
url: 'https://www.mncvision.id/schedule/table',
request: {
method: 'POST',
data: function ({ channel, date }) {
data({ channel, date }) {
const formData = new URLSearchParams()
formData.append('search_model', 'channel')
formData.append('af0rmelement', 'aformelement')
@ -26,32 +30,33 @@ module.exports = {
return formData
},
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
async headers({ channel }) {
const headers = {
'Content-Type': 'application/x-www-form-urlencoded'
}
if (channel && !cookies[channel.lang]) {
cookies[channel.lang] = await loadLangCookies(channel)
if (cookies[channel.lang]) {
headers.Cookie = cookies[channel.lang]
}
}
return headers
},
jar: null
},
async parser({ content, date, headers, channel }) {
async parser({ content, headers, date, channel}) {
const programs = []
const cookies = parseCookies(headers)
if (!cookies) return programs
let items = parseItems(content)
if (!items.length) return programs
const pages = parsePages(content)
for (let url of pages) {
items = items.concat(parseItems(await loadNextPage(url, cookies)))
if (!cookies[channel.lang]) {
cookies[channel.lang] = parseCookies(headers)
}
const langCookies = await loadLangCookies(channel)
if (!langCookies) return programs
const [$, items] = parseItems(content)
for (const item of items) {
const $item = cheerio.load(item)
const $item = $(item)
const start = parseStart($item, date)
const duration = parseDuration($item)
const stop = start.add(duration, 'm')
const description = await loadDescription($item, langCookies)
const description = await loadDescription($item, cookies[channel.lang])
programs.push({
title: parseTitle($item),
season: parseSeason($item),
@ -78,7 +83,7 @@ module.exports = {
const $item = $(item)
return {
lang: lang,
lang,
site_id: $item.attr('value'),
name: $item.text().split(' - ')[0].trim()
}
@ -103,7 +108,7 @@ function parseEpisode($item) {
}
function parseDuration($item) {
let duration = $item('td:nth-child(3)').text()
let duration = $item.find('td:nth-child(3)').text()
const match = duration.match(/(\d{2}):(\d{2})/)
const hours = parseInt(match[1])
const minutes = parseInt(match[2])
@ -112,67 +117,41 @@ function parseDuration($item) {
}
function parseStart($item, date) {
let time = $item('td:nth-child(1)').text()
let time = $item.find('td:nth-child(1)').text()
time = `${date.format('DD/MM/YYYY')} ${time}`
return dayjs.tz(time, 'DD/MM/YYYY HH:mm', 'Asia/Jakarta')
}
function parseTitle($item) {
return $item('td:nth-child(2) > a').text()
return $item.find('td:nth-child(2) > a').text()
}
function parseItems(content) {
const $ = cheerio.load(content)
return $('tr[valign="top"]').toArray()
}
function parsePages(content) {
const $ = cheerio.load(content)
const links = $('#schedule > div.schedule_search_result_container > div.box.well > a')
.map((i, el) => {
return $(el).attr('href')
})
.get()
return _.uniq(links)
}
function loadNextPage(url, cookies) {
return axios
.get(url, { headers: { Cookie: cookies }, timeout: 30000 })
.then(r => r.data)
.catch(err => {
console.log(err.message)
return null
})
return [$, $('tr[valign="top"]').toArray()]
}
function loadLangCookies(channel) {
const languages = {
en: 'english',
id: 'indonesia'
}
const url = `https://www.mncvision.id/language_switcher/setlang/${languages[channel.lang]}/`
return axios
.get(url, { timeout: 30000 })
.get(url, { timeout })
.then(r => parseCookies(r.headers))
.catch(error => console.log(error.message))
.catch(error => console.error(error.message))
}
async function loadDescription($item, cookies) {
const url = $item('a').attr('href')
const url = $item.find('a').attr('href')
if (!url) return null
const content = await axios
.get(url, {
headers: { 'X-Requested-With': 'XMLHttpRequest', Cookie: cookies },
timeout: 30000
timeout
})
.then(r => r.data)
.catch(error => console.log(error.message))
.catch(error => console.error(error.message))
if (!content) return null
const $page = cheerio.load(content)

View file

@ -8,22 +8,58 @@ const axios = require('axios')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
jest.mock('axios')
const date = dayjs.utc('2022-10-05', 'YYYY-MM-DD').startOf('d')
const date = dayjs.utc('2023-11-19').startOf('d')
const channel = {
site_id: '38',
xmltv_id: 'MiaoMi.hk',
site_id: '154',
xmltv_id: 'AXN.id',
lang: 'id'
}
const headers = {
const indonesiaHeaders = {
'set-cookie': [
's1nd0vL=05e9pr6gi112tdmutsn7big93o75r0b0; expires=Wed, 05-Oct-2022 14:18:22 GMT; Max-Age=7200; path=/; HttpOnly'
's1nd0vL=uo6gsashc1rmloqbb50m6b13qkglfvpl; expires=Sat, 18-Nov-2023 20:45:02 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
const englishHeaders = {
'set-cookie': [
's1nd0vL=imtot2v1cs0pbemaohj9fee3hlbqo699; expires=Sat, 18-Nov-2023 20:38:31 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
axios.get.mockImplementation((url, opts) => {
if (url === 'https://www.mncvision.id/language_switcher/setlang/indonesia/') {
return Promise.resolve({
headers: indonesiaHeaders
})
}
if (url === 'https://www.mncvision.id/language_switcher/setlang/english/') {
return Promise.resolve({
headers: englishHeaders
})
}
if (
url ===
'https://www.mncvision.id/schedule/detail/20231119001500154/Blue-Bloods-S13-Ep-19/1'
) {
if (opts.headers['Cookie'] === indonesiaHeaders['set-cookie'][0]) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_id.html'))
})
}
if (opts.headers['Cookie'] === englishHeaders['set-cookie'][0]) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_en.html'))
})
}
}
return Promise.resolve({ data: '' })
})
it('can generate valid url', () => {
expect(url).toBe('https://www.mncvision.id/schedule/table')
@ -33,8 +69,8 @@ it('can generate valid request method', () => {
expect(request.method).toBe('POST')
})
it('can generate valid request headers', () => {
expect(request.headers).toMatchObject({
it('can generate valid request headers', async () => {
expect(await request.headers({ channel })).toMatchObject({
'Content-Type': 'application/x-www-form-urlencoded'
})
})
@ -43,108 +79,51 @@ it('can generate valid request data', () => {
const data = request.data({ channel, date })
expect(data.get('search_model')).toBe('channel')
expect(data.get('af0rmelement')).toBe('aformelement')
expect(data.get('fdate')).toBe('2022-10-05')
expect(data.get('fchannel')).toBe('38')
expect(data.get('fdate')).toBe('2023-11-19')
expect(data.get('fchannel')).toBe('154')
expect(data.get('submit')).toBe('Search')
})
it('can parse response', async () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html'))
const indonesiaHeaders = {
'set-cookie': [
's1nd0vL=e3vjb0oaf9vijiqsg7cml4i7fdkq16db; expires=Wed, 05-Oct-2022 14:54:16 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
const englishHeaders = {
'set-cookie': [
's1nd0vL=hfd6hpnpr6gvgart0d8rf7ef6t4gi7nr; expires=Wed, 05-Oct-2022 15:08:55 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
axios.get.mockImplementation((url, opts) => {
if (
url === 'https://www.mncvision.id/schedule/table/startno/50' &&
opts.headers['Cookie'] === headers['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/content_p2.html'))
})
} else if (url === 'https://www.mncvision.id/language_switcher/setlang/indonesia/') {
return Promise.resolve({
headers: indonesiaHeaders
})
} else if (url === 'https://www.mncvision.id/language_switcher/setlang/english/') {
return Promise.resolve({
headers: englishHeaders
})
} else if (
url ===
'https://mncvision.id/schedule/detail/2022100500000038/Adventures-With-Miao-Mi-Ep-1/1' &&
opts.headers['Cookie'] === indonesiaHeaders['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_id.html'))
})
} else if (
url ===
'https://mncvision.id/schedule/detail/2022100500000038/Adventures-With-Miao-Mi-Ep-1/1' &&
opts.headers['Cookie'] === englishHeaders['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_en.html'))
})
}
return Promise.resolve({ data: '' })
})
let indonesiaResults = await parser({ date, content, channel, headers })
indonesiaResults = indonesiaResults.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
const indonesiaResults = (await parser({ date, content, channel, headers: indonesiaHeaders }))
.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(indonesiaResults[0]).toMatchObject({
start: '2022-10-04T17:00:00.000Z',
stop: '2022-10-04T17:06:00.000Z',
title: 'Adventures With Miao Mi, Ep 1',
episode: 1,
start: '2023-11-18T17:15:00.000Z',
stop: '2023-11-18T18:05:00.000Z',
title: 'Blue Bloods S13, Ep 19',
episode: 19,
description:
'Ketika anak-anak mulai menghilang, sekelompok anak kecil harus menghadapi ketakutan terbesar mereka ketika mereka melawan sesosok badut pembunuh yang jahat.'
})
expect(indonesiaResults[4]).toMatchObject({
start: '2022-10-04T17:33:00.000Z',
stop: '2022-10-04T17:46:00.000Z',
title: 'Leo Wildlife Ranger S2, Ep 27',
season: 2,
episode: 27
})
let englishResults = await parser({ date, content, channel: { ...channel, lang: 'en' }, headers })
englishResults = englishResults.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
'Jamie bekerja sama dengan FDNY untuk menemukan pelaku pembakaran yang bertanggung jawab atas kebakaran hebat yang terjadi di fasilitas penyimpanan bukti milik NYPD.'
})
const englishResults = (await parser({ date, content, channel: { ...channel, lang: 'en' }, headers: englishHeaders }))
.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(englishResults[0]).toMatchObject({
start: '2022-10-04T17:00:00.000Z',
stop: '2022-10-04T17:06:00.000Z',
title: 'Adventures With Miao Mi, Ep 1',
episode: 1,
start: '2023-11-18T17:15:00.000Z',
stop: '2023-11-18T18:05:00.000Z',
title: 'Blue Bloods S13, Ep 19',
episode: 19,
description:
'When children begin to disappear, a group of young kids have to face their biggest fears when they square off against a murderous, evil clown.'
'Jamie partners with the FDNY to find the arsonist responsible for a massive fire at an NYPD evidence storage facility.'
})
})
it('can handle empty guide', async () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html'))
let results = await parser({
const results = await parser({
date,
channel,
content,
headers
headers: indonesiaHeaders
})
expect(results).toMatchObject([])
})