Merge pull request #2242 from tohenk/mncvision.id-optimize

Optimize mncvision.id grabber connection.
This commit is contained in:
Aleksandr Statciuk 2023-11-22 10:29:08 +03:00 committed by GitHub
commit f382a89049
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 459 additions and 1425 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -5,17 +5,17 @@
<div id="id-schedule-detail" class="tm-sinopsys-container"> <div id="id-schedule-detail" class="tm-sinopsys-container">
<h2 class='page-header title text-info program-title'>Adventures With Miao Mi, Ep 1</h2> <h2 class='page-header title text-info program-title'>Blue Bloods S13, Ep 19</h2>
<div class='text-warning showtime'><b>05 Oct 2022, 12:00 AM</b><br><small class="showtime-range">00:00 - 00:06 [duration:00:06]</small></div> <div class='text-warning showtime'><b>19 Nov 2023, 12:15 AM</b><br><small class="showtime-range">00:15 - 01:05 [duration:00:50]</small></div>
<blockquote class="bloquet synopsis"> <blockquote class="bloquet synopsis">
When children begin to disappear, a group of young kids have to face their biggest fears when they square off against a murderous, evil clown. </blockquote> Jamie partners with the FDNY to find the arsonist responsible for a massive fire at an NYPD evidence storage facility. </blockquote>
<div class="tm-channel-info"> <div class="tm-channel-info">
<img class="tm-channel-image" src="userfiles/image/channel/miawme150x150.jpg" alt="Miao Mi" title="Miao Mi #channel:38" /> <img class="tm-channel-image" src="userfiles/image/channel/axn_150x150.jpg" alt="AXN" title="AXN #channel:154" />
<div class='tm-channel-label'> <div class='tm-channel-label'>
<span class='tm-channel-label'> <span class='tm-channel-label'>
Channel <span class='tm-channel-no'>38</span> Channel <span class='tm-channel-no'>154</span>
</span> </span>
</div> </div>

View file

@ -5,17 +5,17 @@
<div id="id-schedule-detail" class="tm-sinopsys-container"> <div id="id-schedule-detail" class="tm-sinopsys-container">
<h2 class='page-header title text-info program-title'>Adventures With Miao Mi, Ep 1</h2> <h2 class='page-header title text-info program-title'>Blue Bloods S13, Ep 19</h2>
<div class='text-warning showtime'><b>05 Oct 2022, 12:00 AM</b><br><small class="showtime-range">00:00 - 00:06 [durasi:00:06]</small></div> <div class='text-warning showtime'><b>19 Nov 2023, 12:15 AM</b><br><small class="showtime-range">00:15 - 01:05 [durasi:00:50]</small></div>
<blockquote class="bloquet synopsis"> <blockquote class="bloquet synopsis">
Ketika anak-anak mulai menghilang, sekelompok anak kecil harus menghadapi ketakutan terbesar mereka ketika mereka melawan sesosok badut pembunuh yang jahat. </blockquote> Jamie bekerja sama dengan FDNY untuk menemukan pelaku pembakaran yang bertanggung jawab atas kebakaran hebat yang terjadi di fasilitas penyimpanan bukti milik NYPD. </blockquote>
<div class="tm-channel-info"> <div class="tm-channel-info">
<img class="tm-channel-image" src="userfiles/image/channel/miawme150x150.jpg" alt="Miao Mi" title="Miao Mi #channel:38" /> <img class="tm-channel-image" src="userfiles/image/channel/axn_150x150.jpg" alt="AXN" title="AXN #channel:154" />
<div class='tm-channel-label'> <div class='tm-channel-label'>
<span class='tm-channel-label'> <span class='tm-channel-label'>
Channel <span class='tm-channel-no'>38</span> Channel <span class='tm-channel-no'>154</span>
</span> </span>
</div> </div>

View file

@ -10,13 +10,17 @@ dayjs.extend(utc)
dayjs.extend(timezone) dayjs.extend(timezone)
dayjs.extend(customParseFormat) dayjs.extend(customParseFormat)
const languages = { en: 'english', id: 'indonesia' }
const cookies = {}
const timeout = 30000
module.exports = { module.exports = {
site: 'mncvision.id', site: 'mncvision.id',
days: 2, days: 2,
url: 'https://www.mncvision.id/schedule/table', url: 'https://www.mncvision.id/schedule/table',
request: { request: {
method: 'POST', method: 'POST',
data: function ({ channel, date }) { data({ channel, date }) {
const formData = new URLSearchParams() const formData = new URLSearchParams()
formData.append('search_model', 'channel') formData.append('search_model', 'channel')
formData.append('af0rmelement', 'aformelement') formData.append('af0rmelement', 'aformelement')
@ -26,32 +30,33 @@ module.exports = {
return formData return formData
}, },
headers: { async headers({ channel }) {
'Content-Type': 'application/x-www-form-urlencoded' const headers = {
'Content-Type': 'application/x-www-form-urlencoded'
}
if (channel && !cookies[channel.lang]) {
cookies[channel.lang] = await loadLangCookies(channel)
if (cookies[channel.lang]) {
headers.Cookie = cookies[channel.lang]
}
}
return headers
}, },
jar: null jar: null
}, },
async parser({ content, date, headers, channel }) { async parser({ content, headers, date, channel}) {
const programs = [] const programs = []
const cookies = parseCookies(headers)
if (!cookies) return programs
let items = parseItems(content)
if (!items.length) return programs
const pages = parsePages(content) if (!cookies[channel.lang]) {
for (let url of pages) { cookies[channel.lang] = parseCookies(headers)
items = items.concat(parseItems(await loadNextPage(url, cookies)))
} }
const [$, items] = parseItems(content)
const langCookies = await loadLangCookies(channel)
if (!langCookies) return programs
for (const item of items) { for (const item of items) {
const $item = cheerio.load(item) const $item = $(item)
const start = parseStart($item, date) const start = parseStart($item, date)
const duration = parseDuration($item) const duration = parseDuration($item)
const stop = start.add(duration, 'm') const stop = start.add(duration, 'm')
const description = await loadDescription($item, langCookies) const description = await loadDescription($item, cookies[channel.lang])
programs.push({ programs.push({
title: parseTitle($item), title: parseTitle($item),
season: parseSeason($item), season: parseSeason($item),
@ -78,7 +83,7 @@ module.exports = {
const $item = $(item) const $item = $(item)
return { return {
lang: lang, lang,
site_id: $item.attr('value'), site_id: $item.attr('value'),
name: $item.text().split(' - ')[0].trim() name: $item.text().split(' - ')[0].trim()
} }
@ -103,7 +108,7 @@ function parseEpisode($item) {
} }
function parseDuration($item) { function parseDuration($item) {
let duration = $item('td:nth-child(3)').text() let duration = $item.find('td:nth-child(3)').text()
const match = duration.match(/(\d{2}):(\d{2})/) const match = duration.match(/(\d{2}):(\d{2})/)
const hours = parseInt(match[1]) const hours = parseInt(match[1])
const minutes = parseInt(match[2]) const minutes = parseInt(match[2])
@ -112,67 +117,41 @@ function parseDuration($item) {
} }
function parseStart($item, date) { function parseStart($item, date) {
let time = $item('td:nth-child(1)').text() let time = $item.find('td:nth-child(1)').text()
time = `${date.format('DD/MM/YYYY')} ${time}` time = `${date.format('DD/MM/YYYY')} ${time}`
return dayjs.tz(time, 'DD/MM/YYYY HH:mm', 'Asia/Jakarta') return dayjs.tz(time, 'DD/MM/YYYY HH:mm', 'Asia/Jakarta')
} }
function parseTitle($item) { function parseTitle($item) {
return $item('td:nth-child(2) > a').text() return $item.find('td:nth-child(2) > a').text()
} }
function parseItems(content) { function parseItems(content) {
const $ = cheerio.load(content) const $ = cheerio.load(content)
return $('tr[valign="top"]').toArray() return [$, $('tr[valign="top"]').toArray()]
}
function parsePages(content) {
const $ = cheerio.load(content)
const links = $('#schedule > div.schedule_search_result_container > div.box.well > a')
.map((i, el) => {
return $(el).attr('href')
})
.get()
return _.uniq(links)
}
function loadNextPage(url, cookies) {
return axios
.get(url, { headers: { Cookie: cookies }, timeout: 30000 })
.then(r => r.data)
.catch(err => {
console.log(err.message)
return null
})
} }
function loadLangCookies(channel) { function loadLangCookies(channel) {
const languages = {
en: 'english',
id: 'indonesia'
}
const url = `https://www.mncvision.id/language_switcher/setlang/${languages[channel.lang]}/` const url = `https://www.mncvision.id/language_switcher/setlang/${languages[channel.lang]}/`
return axios return axios
.get(url, { timeout: 30000 }) .get(url, { timeout })
.then(r => parseCookies(r.headers)) .then(r => parseCookies(r.headers))
.catch(error => console.log(error.message)) .catch(error => console.error(error.message))
} }
async function loadDescription($item, cookies) { async function loadDescription($item, cookies) {
const url = $item('a').attr('href') const url = $item.find('a').attr('href')
if (!url) return null if (!url) return null
const content = await axios const content = await axios
.get(url, { .get(url, {
headers: { 'X-Requested-With': 'XMLHttpRequest', Cookie: cookies }, headers: { 'X-Requested-With': 'XMLHttpRequest', Cookie: cookies },
timeout: 30000 timeout
}) })
.then(r => r.data) .then(r => r.data)
.catch(error => console.log(error.message)) .catch(error => console.error(error.message))
if (!content) return null if (!content) return null
const $page = cheerio.load(content) const $page = cheerio.load(content)

View file

@ -8,22 +8,58 @@ const axios = require('axios')
const dayjs = require('dayjs') const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc') const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat') const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat) dayjs.extend(customParseFormat)
dayjs.extend(utc) dayjs.extend(utc)
jest.mock('axios') jest.mock('axios')
const date = dayjs.utc('2022-10-05', 'YYYY-MM-DD').startOf('d') const date = dayjs.utc('2023-11-19').startOf('d')
const channel = { const channel = {
site_id: '38', site_id: '154',
xmltv_id: 'MiaoMi.hk', xmltv_id: 'AXN.id',
lang: 'id' lang: 'id'
} }
const headers = { const indonesiaHeaders = {
'set-cookie': [ 'set-cookie': [
's1nd0vL=05e9pr6gi112tdmutsn7big93o75r0b0; expires=Wed, 05-Oct-2022 14:18:22 GMT; Max-Age=7200; path=/; HttpOnly' 's1nd0vL=uo6gsashc1rmloqbb50m6b13qkglfvpl; expires=Sat, 18-Nov-2023 20:45:02 GMT; Max-Age=7200; path=/; HttpOnly'
] ]
} }
const englishHeaders = {
'set-cookie': [
's1nd0vL=imtot2v1cs0pbemaohj9fee3hlbqo699; expires=Sat, 18-Nov-2023 20:38:31 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
axios.get.mockImplementation((url, opts) => {
if (url === 'https://www.mncvision.id/language_switcher/setlang/indonesia/') {
return Promise.resolve({
headers: indonesiaHeaders
})
}
if (url === 'https://www.mncvision.id/language_switcher/setlang/english/') {
return Promise.resolve({
headers: englishHeaders
})
}
if (
url ===
'https://www.mncvision.id/schedule/detail/20231119001500154/Blue-Bloods-S13-Ep-19/1'
) {
if (opts.headers['Cookie'] === indonesiaHeaders['set-cookie'][0]) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_id.html'))
})
}
if (opts.headers['Cookie'] === englishHeaders['set-cookie'][0]) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_en.html'))
})
}
}
return Promise.resolve({ data: '' })
})
it('can generate valid url', () => { it('can generate valid url', () => {
expect(url).toBe('https://www.mncvision.id/schedule/table') expect(url).toBe('https://www.mncvision.id/schedule/table')
@ -33,8 +69,8 @@ it('can generate valid request method', () => {
expect(request.method).toBe('POST') expect(request.method).toBe('POST')
}) })
it('can generate valid request headers', () => { it('can generate valid request headers', async () => {
expect(request.headers).toMatchObject({ expect(await request.headers({ channel })).toMatchObject({
'Content-Type': 'application/x-www-form-urlencoded' 'Content-Type': 'application/x-www-form-urlencoded'
}) })
}) })
@ -43,108 +79,51 @@ it('can generate valid request data', () => {
const data = request.data({ channel, date }) const data = request.data({ channel, date })
expect(data.get('search_model')).toBe('channel') expect(data.get('search_model')).toBe('channel')
expect(data.get('af0rmelement')).toBe('aformelement') expect(data.get('af0rmelement')).toBe('aformelement')
expect(data.get('fdate')).toBe('2022-10-05') expect(data.get('fdate')).toBe('2023-11-19')
expect(data.get('fchannel')).toBe('38') expect(data.get('fchannel')).toBe('154')
expect(data.get('submit')).toBe('Search') expect(data.get('submit')).toBe('Search')
}) })
it('can parse response', async () => { it('can parse response', async () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html')) const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html'))
const indonesiaHeaders = { const indonesiaResults = (await parser({ date, content, channel, headers: indonesiaHeaders }))
'set-cookie': [ .map(p => {
's1nd0vL=e3vjb0oaf9vijiqsg7cml4i7fdkq16db; expires=Wed, 05-Oct-2022 14:54:16 GMT; Max-Age=7200; path=/; HttpOnly' p.start = p.start.toJSON()
] p.stop = p.stop.toJSON()
} return p
const englishHeaders = { })
'set-cookie': [
's1nd0vL=hfd6hpnpr6gvgart0d8rf7ef6t4gi7nr; expires=Wed, 05-Oct-2022 15:08:55 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
axios.get.mockImplementation((url, opts) => {
if (
url === 'https://www.mncvision.id/schedule/table/startno/50' &&
opts.headers['Cookie'] === headers['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/content_p2.html'))
})
} else if (url === 'https://www.mncvision.id/language_switcher/setlang/indonesia/') {
return Promise.resolve({
headers: indonesiaHeaders
})
} else if (url === 'https://www.mncvision.id/language_switcher/setlang/english/') {
return Promise.resolve({
headers: englishHeaders
})
} else if (
url ===
'https://mncvision.id/schedule/detail/2022100500000038/Adventures-With-Miao-Mi-Ep-1/1' &&
opts.headers['Cookie'] === indonesiaHeaders['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_id.html'))
})
} else if (
url ===
'https://mncvision.id/schedule/detail/2022100500000038/Adventures-With-Miao-Mi-Ep-1/1' &&
opts.headers['Cookie'] === englishHeaders['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_en.html'))
})
}
return Promise.resolve({ data: '' })
})
let indonesiaResults = await parser({ date, content, channel, headers })
indonesiaResults = indonesiaResults.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(indonesiaResults[0]).toMatchObject({ expect(indonesiaResults[0]).toMatchObject({
start: '2022-10-04T17:00:00.000Z', start: '2023-11-18T17:15:00.000Z',
stop: '2022-10-04T17:06:00.000Z', stop: '2023-11-18T18:05:00.000Z',
title: 'Adventures With Miao Mi, Ep 1', title: 'Blue Bloods S13, Ep 19',
episode: 1, episode: 19,
description: description:
'Ketika anak-anak mulai menghilang, sekelompok anak kecil harus menghadapi ketakutan terbesar mereka ketika mereka melawan sesosok badut pembunuh yang jahat.' 'Jamie bekerja sama dengan FDNY untuk menemukan pelaku pembakaran yang bertanggung jawab atas kebakaran hebat yang terjadi di fasilitas penyimpanan bukti milik NYPD.'
})
expect(indonesiaResults[4]).toMatchObject({
start: '2022-10-04T17:33:00.000Z',
stop: '2022-10-04T17:46:00.000Z',
title: 'Leo Wildlife Ranger S2, Ep 27',
season: 2,
episode: 27
})
let englishResults = await parser({ date, content, channel: { ...channel, lang: 'en' }, headers })
englishResults = englishResults.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
}) })
const englishResults = (await parser({ date, content, channel: { ...channel, lang: 'en' }, headers: englishHeaders }))
.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(englishResults[0]).toMatchObject({ expect(englishResults[0]).toMatchObject({
start: '2022-10-04T17:00:00.000Z', start: '2023-11-18T17:15:00.000Z',
stop: '2022-10-04T17:06:00.000Z', stop: '2023-11-18T18:05:00.000Z',
title: 'Adventures With Miao Mi, Ep 1', title: 'Blue Bloods S13, Ep 19',
episode: 1, episode: 19,
description: description:
'When children begin to disappear, a group of young kids have to face their biggest fears when they square off against a murderous, evil clown.' 'Jamie partners with the FDNY to find the arsonist responsible for a massive fire at an NYPD evidence storage facility.'
}) })
}) })
it('can handle empty guide', async () => { it('can handle empty guide', async () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html')) const content = fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html'))
let results = await parser({ const results = await parser({
date, date,
channel, channel,
content, content,
headers headers: indonesiaHeaders
}) })
expect(results).toMatchObject([]) expect(results).toMatchObject([])
}) })