Fix snrt.ma parser and implement channel parser.

Signed-off-by: Toha <tohenk@yahoo.com>
This commit is contained in:
Toha 2025-01-11 19:19:27 +07:00
parent e5e1988824
commit aa1356e10b
No known key found for this signature in database
GPG key ID: 2D7AA6389D44DCAB
5 changed files with 1389 additions and 22662 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<channels>
<channel site="snrt.ma" lang="ar" xmltv_id="" site_id="4076">Alidaa alwatania</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="" site_id="4077">Chaine Inter</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="" site_id="4078">Idaât Mohammed Assadiss</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="" site_id="4079">Alidaâ Alamazighia</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="AlAoula.ma" site_id="1208">Al Aoula</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Laayoune.ma" site_id="4069">Laâyoune</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="AlMaghribia.ma" site_id="4072">Almaghribia</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Arryadia.ma" site_id="4070">Arryadia</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Athaqafia.ma" site_id="4071">Athaqafia</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="AlMaghribia.ma" site_id="4072">Al Maghribia</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Assadissa.ma" site_id="4073">Assadissa</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Athaqafia.ma" site_id="4071">Athaqafia</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Laayoune.ma" site_id="4069">Laâyoune</channel>
<channel site="snrt.ma" lang="ar" xmltv_id="Tamazight.ma" site_id="4075">Tamazight</channel>
</channels>

View file

@ -8,82 +8,91 @@ dayjs.extend(utc)
dayjs.extend(timezone)
dayjs.extend(customParseFormat)
const tz = 'Africa/Casablanca'
module.exports = {
site: 'snrt.ma',
channels: 'snrt.ma.channels.xml',
days: 2,
url: function ({ channel }) {
url({ channel }) {
return `https://www.snrt.ma/ar/node/${channel.site_id}`
},
request: {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
},
data: function ({ date }) {
const params = new URLSearchParams()
params.append('_method', 'POST')
params.append('data-date', date.format('YYYYMMDD'))
params.append('current_date', date.format('YYYYMMDD'))
return params
cache: {
ttl: 24 * 60 * 60 * 1000 // 1 day
}
},
parser: function ({ content, date }) {
const programs = []
const items = parseItems(content)
items.forEach(item => {
const prev = programs[programs.length - 1]
const $item = cheerio.load(item)
let start = parseStart($item, date)
if (prev) {
if (start.isBefore(prev.start)) {
start = start.add(1, 'd')
date = date.add(1, 'd')
}
prev.stop = start
}
const stop = start.add(30, 'm')
programs.push({
parser({ content, date }) {
const [$, items] = parseItems(content)
const programs = items.map(item => {
const $item = $(item)
const start = parseStart($item)
return {
title: parseTitle($item),
description: parseDescription($item),
category: parseCategory($item),
start,
stop
})
start
}
}).filter(item => item.start).sort((a, b) => a.start - b.start)
// fill start-stop
for (let i = 0; i < programs.length; i++) {
if (i < programs.length - 1) {
programs[i].stop = programs[i + 1].start
} else {
programs[i].stop = dayjs.tz(
`${date.add(1, 'd').format('YYYY-MM-DD')} 00:00`,
'YYYY-MM-DD HH:mm',
tz
)
}
}
return programs.filter(p => p.start.isSame(date, 'd'))
},
async channels({ lang = 'ar' }) {
const axios = require('axios')
const result = await axios
.get('https://www.snrt.ma/ar/node/1208')
.then(response => response.data)
.catch(console.error)
const $ = cheerio.load(result)
const items = $('.channels-row h4').toArray()
const channels = items.map(item => {
const $item = $(item)
const url = $item.find('a').attr('href')
return {
lang,
site_id: url.substr(url.lastIndexOf('/') + 1),
name: $item.find('img').attr('alt')
}
})
return programs
return channels
}
}
function parseStart($item, date) {
const timeString = $item('.grille-time').text().trim()
const [hours, minutes] = timeString.includes('H')
? timeString.split('H').map(Number)
: timeString.split(':').map(Number)
const formattedTime = `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:00`
const dateString = `${date.format('YYYY-MM-DD')} ${formattedTime}`
return dayjs.tz(dateString, 'YYYY-MM-DD HH:mm:ss', 'Africa/Casablanca')
function parseStart($item) {
const date = $item.attr('class').match(/\d{8}/)[0]
const time = $item.find('.grille-time').text().trim()
if (time) {
return dayjs.tz(`${date} ${time.replace('H', ':')}`, 'YYYYMMDD HH:mm', tz)
}
}
function parseTitle($item) {
return $item('.program-title-sm').text().trim()
return $item.find('.program-title-sm').text().trim()
}
function parseDescription($item) {
return $item('.program-description-sm').text().trim()
return $item.find('.program-description-sm').text().trim()
}
function parseCategory($item) {
return $item('.genre-first').text().trim()
return $item.find('.genre-first').text().trim()
}
function parseItems(content) {
const $ = cheerio.load(content)
return $('.grille-line').toArray()
return [$, $('.grille-line').toArray()]
}

View file

@ -4,14 +4,15 @@ const utc = require('dayjs/plugin/utc')
const timezone = require('dayjs/plugin/timezone')
const fs = require('fs')
const path = require('path')
dayjs.extend(utc)
dayjs.extend(timezone)
const date = dayjs.utc('2024-12-19', 'YYYY-MM-DD').startOf('d')
const channel = { site_id: '1208', xmltv_id: 'AlAoula.ma', lang: 'ar' }
const date = dayjs.utc('2025-01-13', 'YYYY-MM-DD').startOf('d')
const channel = { site_id: '4075', xmltv_id: 'Tamazight.ma', lang: 'ar' }
it('can generate valid url', () => {
expect(url({ channel })).toBe('https://www.snrt.ma/ar/node/1208')
expect(url({ channel })).toBe('https://www.snrt.ma/ar/node/4075')
})
it('can parse response', () => {
@ -22,12 +23,17 @@ it('can parse response', () => {
return p
})
expect(results.length).toBe(27)
expect(results[0]).toMatchObject({
category: 'القرآن الكريم',
description: '',
start: '2024-12-19T06:00:00.000Z',
stop: '2024-12-19T06:30:00.000Z',
title: 'ﺍﻟﺴﻼﻡ ﺍﻟﻮﻃﻨﻲ + ﺍﻟﻘﺮﺁﻥ ﺍﻟﻜﺮﻳﻢ'
start: '2025-01-12T23:15:00.000Z',
stop: '2025-01-12T23:30:00.000Z',
title: 'الموعد الرياضي'
})
expect(results[26]).toMatchObject({
start: '2025-01-13T21:30:00.000Z',
stop: '2025-01-13T23:00:00.000Z',
title: 'سهرة خاصة براس السنة الامازيغية',
category: 'ترفيه'
})
})