mirror of
https://github.com/iptv-org/database.git
synced 2025-05-09 19:20:01 -04:00
Update scripts
This commit is contained in:
parent
66ec908b6e
commit
179ef6a41d
28 changed files with 958 additions and 866 deletions
271
scripts/db/validate.ts
Normal file
271
scripts/db/validate.ts
Normal file
|
@ -0,0 +1,271 @@
|
|||
import { Collection, Storage, File, Dictionary, Logger } from '@freearhey/core'
|
||||
import { DATA_DIR } from '../constants'
|
||||
import { transliterate } from 'transliteration'
|
||||
import { program } from 'commander'
|
||||
import Joi from 'joi'
|
||||
import { CSVParser } from '../core'
|
||||
import chalk from 'chalk'
|
||||
|
||||
program.argument('[filepath]', 'Path to file to validate').parse(process.argv)
|
||||
|
||||
const logger = new Logger()
|
||||
const buffer = new Dictionary()
|
||||
const files = new Dictionary()
|
||||
const schemes: { [key: string]: object } = require('../schemes')
|
||||
|
||||
async function main() {
|
||||
const dataStorage = new Storage(DATA_DIR)
|
||||
const _files = await dataStorage.list('*.csv')
|
||||
let globalErrors = new Collection()
|
||||
const parser = new CSVParser()
|
||||
|
||||
for (const filepath of _files) {
|
||||
const file = new File(filepath)
|
||||
if (file.extension() !== 'csv') continue
|
||||
|
||||
const csv = await dataStorage.load(file.basename())
|
||||
if (/\s+$/.test(csv))
|
||||
return handleError(`Error: empty lines at the end of file not allowed (${filepath})`)
|
||||
|
||||
const rows = csv.split(/\r\n/)
|
||||
const headers = rows[0].split(',')
|
||||
for (const [i, line] of rows.entries()) {
|
||||
if (line.indexOf('\n') > -1)
|
||||
return handleError(
|
||||
`Error: row ${i + 1} has the wrong line ending character, should be CRLF (${filepath})`
|
||||
)
|
||||
if (line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).length !== headers.length)
|
||||
return handleError(`Error: row ${i + 1} has the wrong number of columns (${filepath})`)
|
||||
}
|
||||
|
||||
const data = await parser.parse(csv)
|
||||
const filename = file.name()
|
||||
|
||||
let grouped
|
||||
switch (filename) {
|
||||
case 'blocklist':
|
||||
grouped = data.keyBy(item => item.channel)
|
||||
break
|
||||
case 'categories':
|
||||
case 'channels':
|
||||
grouped = data.keyBy(item => item.id)
|
||||
break
|
||||
default:
|
||||
grouped = data.keyBy(item => item.code)
|
||||
break
|
||||
}
|
||||
|
||||
buffer.set(filename, grouped)
|
||||
files.set(filename, data)
|
||||
}
|
||||
|
||||
const filesToCheck = program.args.length ? program.args : _files
|
||||
for (const filepath of filesToCheck) {
|
||||
const file = new File(filepath)
|
||||
const filename = file.name()
|
||||
if (!schemes[filename]) return handleError(`Error: "${filename}" scheme is missing`)
|
||||
|
||||
const rows: Collection = files.get(filename)
|
||||
const rowsCopy = JSON.parse(JSON.stringify(rows.all()))
|
||||
|
||||
let fileErrors = new Collection()
|
||||
switch (filename) {
|
||||
case 'channels':
|
||||
fileErrors = fileErrors.concat(findDuplicatesBy(rowsCopy, 'id'))
|
||||
for (const [i, row] of rowsCopy.entries()) {
|
||||
fileErrors = fileErrors.concat(validateChannelId(row, i))
|
||||
fileErrors = fileErrors.concat(validateChannelBroadcastArea(row, i))
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'id', 'subdivision', buffer.get('subdivisions'))
|
||||
)
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'id', 'categories', buffer.get('categories'))
|
||||
)
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'id', 'replaced_by', buffer.get('channels'))
|
||||
)
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'id', 'languages', buffer.get('languages'))
|
||||
)
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'id', 'country', buffer.get('countries'))
|
||||
)
|
||||
}
|
||||
break
|
||||
case 'blocklist':
|
||||
for (const [i, row] of rowsCopy.entries()) {
|
||||
fileErrors = fileErrors.concat(validateChannel(row.channel, i))
|
||||
}
|
||||
break
|
||||
case 'countries':
|
||||
fileErrors = fileErrors.concat(findDuplicatesBy(rowsCopy, 'code'))
|
||||
for (const [i, row] of rowsCopy.entries()) {
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'code', 'languages', buffer.get('languages'))
|
||||
)
|
||||
}
|
||||
break
|
||||
case 'subdivisions':
|
||||
fileErrors = fileErrors.concat(findDuplicatesBy(rowsCopy, 'code'))
|
||||
for (const [i, row] of rowsCopy.entries()) {
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'code', 'country', buffer.get('countries'))
|
||||
)
|
||||
}
|
||||
break
|
||||
case 'regions':
|
||||
fileErrors = fileErrors.concat(findDuplicatesBy(rowsCopy, 'code'))
|
||||
for (const [i, row] of rowsCopy.entries()) {
|
||||
fileErrors = fileErrors.concat(
|
||||
checkValue(i, row, 'code', 'countries', buffer.get('countries'))
|
||||
)
|
||||
}
|
||||
break
|
||||
case 'categories':
|
||||
fileErrors = fileErrors.concat(findDuplicatesBy(rowsCopy, 'id'))
|
||||
break
|
||||
case 'languages':
|
||||
fileErrors = fileErrors.concat(findDuplicatesBy(rowsCopy, 'code'))
|
||||
break
|
||||
}
|
||||
|
||||
const schema = Joi.object(schemes[filename])
|
||||
rows.forEach((row: string | string[] | boolean, i: number) => {
|
||||
const { error } = schema.validate(row, { abortEarly: false })
|
||||
if (error) {
|
||||
error.details.forEach(detail => {
|
||||
fileErrors.push({ line: i + 2, message: detail.message })
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
if (fileErrors.count()) {
|
||||
logger.info(`\n${chalk.underline(filepath)}`)
|
||||
fileErrors.forEach(err => {
|
||||
const position = err.line.toString().padEnd(6, ' ')
|
||||
logger.info(` ${chalk.gray(position)} ${err.message}`)
|
||||
})
|
||||
globalErrors = globalErrors.concat(fileErrors)
|
||||
}
|
||||
}
|
||||
|
||||
if (globalErrors.count()) return handleError(`${globalErrors.count()} error(s)`)
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
function checkValue(
|
||||
i: number,
|
||||
row: { [key: string]: string[] | string | boolean },
|
||||
key: string,
|
||||
field: string,
|
||||
collection: Collection
|
||||
) {
|
||||
const errors = new Collection()
|
||||
let values: string[] = []
|
||||
if (Array.isArray(row[field])) {
|
||||
values = row[field] as string[]
|
||||
} else if (typeof row[field] === 'string') {
|
||||
values = new Array(row[field]) as string[]
|
||||
}
|
||||
|
||||
values.forEach((value: string) => {
|
||||
if (collection.missing(value)) {
|
||||
errors.push({
|
||||
line: i + 2,
|
||||
message: `"${row[key]}" has an invalid ${field} "${value}"`
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
function validateChannel(channelId: string, i: number) {
|
||||
const errors = new Collection()
|
||||
const channels = buffer.get('channels')
|
||||
|
||||
if (channels.missing(channelId)) {
|
||||
errors.push({
|
||||
line: i + 2,
|
||||
message: `"${channelId}" is missing in the channels.csv`
|
||||
})
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
function findDuplicatesBy(rows: { [key: string]: string }[], key: string) {
|
||||
const errors = new Collection()
|
||||
const buffer = new Dictionary()
|
||||
|
||||
rows.forEach((row, i) => {
|
||||
const normId = row[key].toLowerCase()
|
||||
if (buffer.has(normId)) {
|
||||
errors.push({
|
||||
line: i + 2,
|
||||
message: `entry with the ${key} "${row[key]}" already exists`
|
||||
})
|
||||
}
|
||||
|
||||
buffer.set(normId, true)
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
function validateChannelId(row: { [key: string]: string }, i: number) {
|
||||
const errors = new Collection()
|
||||
|
||||
const name = normalize(row.name)
|
||||
const code = row.country.toLowerCase()
|
||||
const expected = `${name}.${code}`
|
||||
|
||||
if (expected !== row.id) {
|
||||
errors.push({
|
||||
line: i + 2,
|
||||
message: `"${row.id}" must be derived from the channel name "${row.name}" and the country code "${row.country}"`
|
||||
})
|
||||
}
|
||||
|
||||
function normalize(name: string) {
|
||||
const translit = transliterate(name)
|
||||
|
||||
return translit
|
||||
.replace(/^@/i, 'At')
|
||||
.replace(/^&/i, 'And')
|
||||
.replace(/\+/gi, 'Plus')
|
||||
.replace(/\s-(\d)/gi, ' Minus$1')
|
||||
.replace(/[^a-z\d]+/gi, '')
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
function validateChannelBroadcastArea(row: { [key: string]: string[] }, i: number) {
|
||||
const errors = new Collection()
|
||||
const regions = buffer.get('regions')
|
||||
const countries = buffer.get('countries')
|
||||
const subdivisions = buffer.get('subdivisions')
|
||||
|
||||
row.broadcast_area.forEach((areaCode: string) => {
|
||||
const [type, code] = areaCode.split('/')
|
||||
if (
|
||||
(type === 'r' && regions.missing(code)) ||
|
||||
(type === 'c' && countries.missing(code)) ||
|
||||
(type === 's' && subdivisions.missing(code))
|
||||
) {
|
||||
errors.push({
|
||||
line: i + 2,
|
||||
message: `"${row.id}" has the wrong broadcast_area "${areaCode}"`
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
function handleError(message: string) {
|
||||
logger.error(chalk.red(message))
|
||||
process.exit(1)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue