Merge pull request #2242 from tohenk/mncvision.id-optimize

Optimize mncvision.id grabber connection.
This commit is contained in:
Aleksandr Statciuk 2023-11-22 10:29:08 +03:00 committed by GitHub
commit f382a89049
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 459 additions and 1425 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -5,17 +5,17 @@
<div id="id-schedule-detail" class="tm-sinopsys-container">
<h2 class='page-header title text-info program-title'>Adventures With Miao Mi, Ep 1</h2>
<div class='text-warning showtime'><b>05 Oct 2022, 12:00 AM</b><br><small class="showtime-range">00:00 - 00:06 [duration:00:06]</small></div>
<h2 class='page-header title text-info program-title'>Blue Bloods S13, Ep 19</h2>
<div class='text-warning showtime'><b>19 Nov 2023, 12:15 AM</b><br><small class="showtime-range">00:15 - 01:05 [duration:00:50]</small></div>
<blockquote class="bloquet synopsis">
When children begin to disappear, a group of young kids have to face their biggest fears when they square off against a murderous, evil clown. </blockquote>
Jamie partners with the FDNY to find the arsonist responsible for a massive fire at an NYPD evidence storage facility. </blockquote>
<div class="tm-channel-info">
<img class="tm-channel-image" src="userfiles/image/channel/miawme150x150.jpg" alt="Miao Mi" title="Miao Mi #channel:38" />
<img class="tm-channel-image" src="userfiles/image/channel/axn_150x150.jpg" alt="AXN" title="AXN #channel:154" />
<div class='tm-channel-label'>
<span class='tm-channel-label'>
Channel <span class='tm-channel-no'>38</span>
Channel <span class='tm-channel-no'>154</span>
</span>
</div>

View file

@ -5,17 +5,17 @@
<div id="id-schedule-detail" class="tm-sinopsys-container">
<h2 class='page-header title text-info program-title'>Adventures With Miao Mi, Ep 1</h2>
<div class='text-warning showtime'><b>05 Oct 2022, 12:00 AM</b><br><small class="showtime-range">00:00 - 00:06 [durasi:00:06]</small></div>
<h2 class='page-header title text-info program-title'>Blue Bloods S13, Ep 19</h2>
<div class='text-warning showtime'><b>19 Nov 2023, 12:15 AM</b><br><small class="showtime-range">00:15 - 01:05 [durasi:00:50]</small></div>
<blockquote class="bloquet synopsis">
Ketika anak-anak mulai menghilang, sekelompok anak kecil harus menghadapi ketakutan terbesar mereka ketika mereka melawan sesosok badut pembunuh yang jahat. </blockquote>
Jamie bekerja sama dengan FDNY untuk menemukan pelaku pembakaran yang bertanggung jawab atas kebakaran hebat yang terjadi di fasilitas penyimpanan bukti milik NYPD. </blockquote>
<div class="tm-channel-info">
<img class="tm-channel-image" src="userfiles/image/channel/miawme150x150.jpg" alt="Miao Mi" title="Miao Mi #channel:38" />
<img class="tm-channel-image" src="userfiles/image/channel/axn_150x150.jpg" alt="AXN" title="AXN #channel:154" />
<div class='tm-channel-label'>
<span class='tm-channel-label'>
Channel <span class='tm-channel-no'>38</span>
Channel <span class='tm-channel-no'>154</span>
</span>
</div>

View file

@ -10,13 +10,17 @@ dayjs.extend(utc)
dayjs.extend(timezone)
dayjs.extend(customParseFormat)
const languages = { en: 'english', id: 'indonesia' }
const cookies = {}
const timeout = 30000
module.exports = {
site: 'mncvision.id',
days: 2,
url: 'https://www.mncvision.id/schedule/table',
request: {
method: 'POST',
data: function ({ channel, date }) {
data({ channel, date }) {
const formData = new URLSearchParams()
formData.append('search_model', 'channel')
formData.append('af0rmelement', 'aformelement')
@ -26,32 +30,33 @@ module.exports = {
return formData
},
headers: {
async headers({ channel }) {
const headers = {
'Content-Type': 'application/x-www-form-urlencoded'
}
if (channel && !cookies[channel.lang]) {
cookies[channel.lang] = await loadLangCookies(channel)
if (cookies[channel.lang]) {
headers.Cookie = cookies[channel.lang]
}
}
return headers
},
jar: null
},
async parser({ content, date, headers, channel }) {
async parser({ content, headers, date, channel}) {
const programs = []
const cookies = parseCookies(headers)
if (!cookies) return programs
let items = parseItems(content)
if (!items.length) return programs
const pages = parsePages(content)
for (let url of pages) {
items = items.concat(parseItems(await loadNextPage(url, cookies)))
if (!cookies[channel.lang]) {
cookies[channel.lang] = parseCookies(headers)
}
const langCookies = await loadLangCookies(channel)
if (!langCookies) return programs
const [$, items] = parseItems(content)
for (const item of items) {
const $item = cheerio.load(item)
const $item = $(item)
const start = parseStart($item, date)
const duration = parseDuration($item)
const stop = start.add(duration, 'm')
const description = await loadDescription($item, langCookies)
const description = await loadDescription($item, cookies[channel.lang])
programs.push({
title: parseTitle($item),
season: parseSeason($item),
@ -78,7 +83,7 @@ module.exports = {
const $item = $(item)
return {
lang: lang,
lang,
site_id: $item.attr('value'),
name: $item.text().split(' - ')[0].trim()
}
@ -103,7 +108,7 @@ function parseEpisode($item) {
}
function parseDuration($item) {
let duration = $item('td:nth-child(3)').text()
let duration = $item.find('td:nth-child(3)').text()
const match = duration.match(/(\d{2}):(\d{2})/)
const hours = parseInt(match[1])
const minutes = parseInt(match[2])
@ -112,67 +117,41 @@ function parseDuration($item) {
}
function parseStart($item, date) {
let time = $item('td:nth-child(1)').text()
let time = $item.find('td:nth-child(1)').text()
time = `${date.format('DD/MM/YYYY')} ${time}`
return dayjs.tz(time, 'DD/MM/YYYY HH:mm', 'Asia/Jakarta')
}
function parseTitle($item) {
return $item('td:nth-child(2) > a').text()
return $item.find('td:nth-child(2) > a').text()
}
function parseItems(content) {
const $ = cheerio.load(content)
return $('tr[valign="top"]').toArray()
}
function parsePages(content) {
const $ = cheerio.load(content)
const links = $('#schedule > div.schedule_search_result_container > div.box.well > a')
.map((i, el) => {
return $(el).attr('href')
})
.get()
return _.uniq(links)
}
function loadNextPage(url, cookies) {
return axios
.get(url, { headers: { Cookie: cookies }, timeout: 30000 })
.then(r => r.data)
.catch(err => {
console.log(err.message)
return null
})
return [$, $('tr[valign="top"]').toArray()]
}
function loadLangCookies(channel) {
const languages = {
en: 'english',
id: 'indonesia'
}
const url = `https://www.mncvision.id/language_switcher/setlang/${languages[channel.lang]}/`
return axios
.get(url, { timeout: 30000 })
.get(url, { timeout })
.then(r => parseCookies(r.headers))
.catch(error => console.log(error.message))
.catch(error => console.error(error.message))
}
async function loadDescription($item, cookies) {
const url = $item('a').attr('href')
const url = $item.find('a').attr('href')
if (!url) return null
const content = await axios
.get(url, {
headers: { 'X-Requested-With': 'XMLHttpRequest', Cookie: cookies },
timeout: 30000
timeout
})
.then(r => r.data)
.catch(error => console.log(error.message))
.catch(error => console.error(error.message))
if (!content) return null
const $page = cheerio.load(content)

View file

@ -8,22 +8,58 @@ const axios = require('axios')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
dayjs.extend(utc)
jest.mock('axios')
const date = dayjs.utc('2022-10-05', 'YYYY-MM-DD').startOf('d')
const date = dayjs.utc('2023-11-19').startOf('d')
const channel = {
site_id: '38',
xmltv_id: 'MiaoMi.hk',
site_id: '154',
xmltv_id: 'AXN.id',
lang: 'id'
}
const headers = {
const indonesiaHeaders = {
'set-cookie': [
's1nd0vL=05e9pr6gi112tdmutsn7big93o75r0b0; expires=Wed, 05-Oct-2022 14:18:22 GMT; Max-Age=7200; path=/; HttpOnly'
's1nd0vL=uo6gsashc1rmloqbb50m6b13qkglfvpl; expires=Sat, 18-Nov-2023 20:45:02 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
const englishHeaders = {
'set-cookie': [
's1nd0vL=imtot2v1cs0pbemaohj9fee3hlbqo699; expires=Sat, 18-Nov-2023 20:38:31 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
axios.get.mockImplementation((url, opts) => {
if (url === 'https://www.mncvision.id/language_switcher/setlang/indonesia/') {
return Promise.resolve({
headers: indonesiaHeaders
})
}
if (url === 'https://www.mncvision.id/language_switcher/setlang/english/') {
return Promise.resolve({
headers: englishHeaders
})
}
if (
url ===
'https://www.mncvision.id/schedule/detail/20231119001500154/Blue-Bloods-S13-Ep-19/1'
) {
if (opts.headers['Cookie'] === indonesiaHeaders['set-cookie'][0]) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_id.html'))
})
}
if (opts.headers['Cookie'] === englishHeaders['set-cookie'][0]) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_en.html'))
})
}
}
return Promise.resolve({ data: '' })
})
it('can generate valid url', () => {
expect(url).toBe('https://www.mncvision.id/schedule/table')
@ -33,8 +69,8 @@ it('can generate valid request method', () => {
expect(request.method).toBe('POST')
})
it('can generate valid request headers', () => {
expect(request.headers).toMatchObject({
it('can generate valid request headers', async () => {
expect(await request.headers({ channel })).toMatchObject({
'Content-Type': 'application/x-www-form-urlencoded'
})
})
@ -43,108 +79,51 @@ it('can generate valid request data', () => {
const data = request.data({ channel, date })
expect(data.get('search_model')).toBe('channel')
expect(data.get('af0rmelement')).toBe('aformelement')
expect(data.get('fdate')).toBe('2022-10-05')
expect(data.get('fchannel')).toBe('38')
expect(data.get('fdate')).toBe('2023-11-19')
expect(data.get('fchannel')).toBe('154')
expect(data.get('submit')).toBe('Search')
})
it('can parse response', async () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/content.html'))
const indonesiaHeaders = {
'set-cookie': [
's1nd0vL=e3vjb0oaf9vijiqsg7cml4i7fdkq16db; expires=Wed, 05-Oct-2022 14:54:16 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
const englishHeaders = {
'set-cookie': [
's1nd0vL=hfd6hpnpr6gvgart0d8rf7ef6t4gi7nr; expires=Wed, 05-Oct-2022 15:08:55 GMT; Max-Age=7200; path=/; HttpOnly'
]
}
axios.get.mockImplementation((url, opts) => {
if (
url === 'https://www.mncvision.id/schedule/table/startno/50' &&
opts.headers['Cookie'] === headers['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/content_p2.html'))
})
} else if (url === 'https://www.mncvision.id/language_switcher/setlang/indonesia/') {
return Promise.resolve({
headers: indonesiaHeaders
})
} else if (url === 'https://www.mncvision.id/language_switcher/setlang/english/') {
return Promise.resolve({
headers: englishHeaders
})
} else if (
url ===
'https://mncvision.id/schedule/detail/2022100500000038/Adventures-With-Miao-Mi-Ep-1/1' &&
opts.headers['Cookie'] === indonesiaHeaders['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_id.html'))
})
} else if (
url ===
'https://mncvision.id/schedule/detail/2022100500000038/Adventures-With-Miao-Mi-Ep-1/1' &&
opts.headers['Cookie'] === englishHeaders['set-cookie'][0]
) {
return Promise.resolve({
data: fs.readFileSync(path.resolve(__dirname, '__data__/program_en.html'))
})
}
return Promise.resolve({ data: '' })
})
let indonesiaResults = await parser({ date, content, channel, headers })
indonesiaResults = indonesiaResults.map(p => {
const indonesiaResults = (await parser({ date, content, channel, headers: indonesiaHeaders }))
.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(indonesiaResults[0]).toMatchObject({
start: '2022-10-04T17:00:00.000Z',
stop: '2022-10-04T17:06:00.000Z',
title: 'Adventures With Miao Mi, Ep 1',
episode: 1,
start: '2023-11-18T17:15:00.000Z',
stop: '2023-11-18T18:05:00.000Z',
title: 'Blue Bloods S13, Ep 19',
episode: 19,
description:
'Ketika anak-anak mulai menghilang, sekelompok anak kecil harus menghadapi ketakutan terbesar mereka ketika mereka melawan sesosok badut pembunuh yang jahat.'
'Jamie bekerja sama dengan FDNY untuk menemukan pelaku pembakaran yang bertanggung jawab atas kebakaran hebat yang terjadi di fasilitas penyimpanan bukti milik NYPD.'
})
expect(indonesiaResults[4]).toMatchObject({
start: '2022-10-04T17:33:00.000Z',
stop: '2022-10-04T17:46:00.000Z',
title: 'Leo Wildlife Ranger S2, Ep 27',
season: 2,
episode: 27
})
let englishResults = await parser({ date, content, channel: { ...channel, lang: 'en' }, headers })
englishResults = englishResults.map(p => {
const englishResults = (await parser({ date, content, channel: { ...channel, lang: 'en' }, headers: englishHeaders }))
.map(p => {
p.start = p.start.toJSON()
p.stop = p.stop.toJSON()
return p
})
expect(englishResults[0]).toMatchObject({
start: '2022-10-04T17:00:00.000Z',
stop: '2022-10-04T17:06:00.000Z',
title: 'Adventures With Miao Mi, Ep 1',
episode: 1,
start: '2023-11-18T17:15:00.000Z',
stop: '2023-11-18T18:05:00.000Z',
title: 'Blue Bloods S13, Ep 19',
episode: 19,
description:
'When children begin to disappear, a group of young kids have to face their biggest fears when they square off against a murderous, evil clown.'
'Jamie partners with the FDNY to find the arsonist responsible for a massive fire at an NYPD evidence storage facility.'
})
})
it('can handle empty guide', async () => {
const content = fs.readFileSync(path.resolve(__dirname, '__data__/no_content.html'))
let results = await parser({
const results = await parser({
date,
channel,
content,
headers
headers: indonesiaHeaders
})
expect(results).toMatchObject([])
})