From dfebca32e74a9b2b404e30753c8c807515b9d2b1 Mon Sep 17 00:00:00 2001 From: freearhey Date: Wed, 10 Mar 2021 01:15:51 +0300 Subject: [PATCH] Add english guide --- guides/en/WebGrab++.config.xml | 15 ++ guides/en/guide.xml | 71 ++++++++ guides/en/hot_cookies.txt | 6 + guides/en/rex/rex.config.xml | 171 ++++++++++++++++++ guides/en/robots/tv.yandex.ru.robots | 12 ++ guides/en/robots/tvtv.ca.robots | 6 + guides/en/robots/tvtv.us.robots | 6 + guides/en/robots/www.tvguide.co.uk.robots | 48 +++++ .../en/siteini.pack/m.tvguide.co.uk-full.ini | 70 +++++++ run.sh | 3 +- 10 files changed, 407 insertions(+), 1 deletion(-) create mode 100755 guides/en/WebGrab++.config.xml create mode 100644 guides/en/guide.xml create mode 100644 guides/en/hot_cookies.txt create mode 100755 guides/en/rex/rex.config.xml create mode 100644 guides/en/robots/tv.yandex.ru.robots create mode 100644 guides/en/robots/tvtv.ca.robots create mode 100644 guides/en/robots/tvtv.us.robots create mode 100644 guides/en/robots/www.tvguide.co.uk.robots create mode 100755 guides/en/siteini.pack/m.tvguide.co.uk-full.ini diff --git a/guides/en/WebGrab++.config.xml b/guides/en/WebGrab++.config.xml new file mode 100755 index 00000000..6bf25076 --- /dev/null +++ b/guides/en/WebGrab++.config.xml @@ -0,0 +1,15 @@ + + + + guide.xml + + rex + Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71 + off + 4 + 0 + f + + BBC News + + diff --git a/guides/en/guide.xml b/guides/en/guide.xml new file mode 100644 index 00000000..9fd5f5bb --- /dev/null +++ b/guides/en/guide.xml @@ -0,0 +1,71 @@ + + + + BBC News + http://www.tvguide.co.uk + + + BBC News at 9 + 09/03/2021.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + BBC News at One; Weather + 09/03/2021.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + BBC News at Six; Weather + 09/03/2021.(n) + + + Sportsday + Results and analysis from countrywide events.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + Outside Source + 09/03/2021 Live from the heart of the BBC newsroom, Ros Atkins with an innovative take on the latest global stories.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + BBC News at Ten + 09/03/2021.(n) + + + The Papers + 09/03/2021 - 1 No need to wait until tomorrow morning to see what's in the papers - tune in for a lively and informed conversation about the next day's headlines.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + BBC News + The latest national and international stories as they break.(n) + + + The Papers + 09/03/2021 - 2 No need to wait until tomorrow morning to see what's in the papers - tune in for a lively and informed conversation about the next day's headlines.(n) + + \ No newline at end of file diff --git a/guides/en/hot_cookies.txt b/guides/en/hot_cookies.txt new file mode 100644 index 00000000..fed3ae38 --- /dev/null +++ b/guides/en/hot_cookies.txt @@ -0,0 +1,6 @@ +# Hot_Cookies, saved for the purpose of an actual WebGrab run +# Saved by WG++ 10/03/2021 00:58 +# Response cookie from url :http://www.tvguide.co.uk/mobile/channellisting.asp?ch=66 + +www.tvguide.co.uk FALSE / FALSE 1646791200 TVGID%5Fsession 9999 +www.tvguide.co.uk FALSE / TRUE 0 ASPSESSIONIDSUQRQBRD NOJDMHPDBIFHPGBBMEGOFOKK diff --git a/guides/en/rex/rex.config.xml b/guides/en/rex/rex.config.xml new file mode 100755 index 00000000..d8bbf072 --- /dev/null +++ b/guides/en/rex/rex.config.xml @@ -0,0 +1,171 @@ + + + + + + + + + + guide.xml + + + 'description'\n'actor' + The result is the existing 'description', followed by, on a newline, the actor(s) separated by the standard WG++ element separator |. + The result: + This is the original description. + Michael Douglas|Kim Basinger + + You probably don't like the | as separator between the actors, so you specify another separator like this: + 'description'\n'actor(, )' + The result: + This is the original description. + Michael Douglas, Kim Basinger + + You can make this prettier by adding some text to the actors addition: + 'description'\nActors: 'actor(, )'. + The result: + This is the original description. + Actors: Michael Douglas, Kim Basinger. + + A small problem: Suppose the source xmltv show doesn't have any actors, then the result would be not so pretty: + This is the original description. + Actors: . + + To avoid that, the added text can be linked to the element it must be added to, like this: + 'description'{\nActors: 'actor(, )'.} + Result with actors: + This is the original description. + Actors: Michael Douglas, Kim Basinger. + And without actors: + This is the original description. + + An example with some more elements: + 'description'{\n\tYear of production: 'productiondate'.}{\n\tProducer: 'producer(, )'.}{\n\tActors: 'actor(, )'.} + Result: + This is the original description. + Year of production: 2002. + Producer: Steven Spielberg. + Actors: Michael Douglas, Kim Basinger. + + And another one: + {Episode: 'episode'\t}'subtitle' + Result: + Episode: 3.2/12.1 The original subtitle + + You can also remove elements (but not the title!) from the xmltv listing by specifying an empty element, like this: + or simply + This will remove all elements + And this: + + Will remove the element, including all its child elements like , etc. + +Additional options : + +** Operations : optionally to do certain operations on the element value to expand e.g: + These operations must be specified within the ' ' characters that specify the elementname, enclosed by [] and separated by a , e.g. + {Summary: 'description[cleanup(style=upper), max_chars=500]'}{\nActors: 'actor(, )'} + supported operations : + - cleanup with style and tags arguments, + - max_chars, max_words and max_sentences to limit the content data of the expanded element. + +** Xmltv Attributes in content to expand: If the source xmltv element has an attribute, like lang="en" or role="rolename" (in actor) or system="US", + it is possible to add it to the expanded content by add /a (for attribute value only) or /a+ (for attributename and value) to the element name. + This /a or /a+ addition must be added directly after the element name, like 'actor/a' or combined with a custom separator, 'actor/a(, )' + or combined with an operation 'country/a(/)[cleanup(style=lower)]' + Example (assuming the actors role values are provided in the source xmltv file): + 'description'{\n\tYear of production: 'productiondate',}{ Rating: 'rating/a+'.}{\n\tProducer: 'producer(, )'.}{\n\tActors: 'actor/a+(, )'.} + + Result: + This is the original description. + Year of production: 2002, Rating TV-14(system=US). + Producer: Steven Spielberg. + Actors: Michael Douglas(role=The carpenter), Kim Basinger(role=Mary). + +Summary of Content/Values: +1. Syntax +content + - the content of the xmltv-target elements can be specified by means of a mixture of text and element-values. + - content can be left empty to remove the xmltv element (except the element ) + - the element-values must be entered by their (wg++) element-name enclosed by ' ' + - optionally, element values can be processed by means of certain operations, + E.g. 'description[cleanup(style=upper), max_sentences=2]' + - optionally, element xmltv attribute values can be added to the content by adding /a (only attribute value) or /a+ (value + attributename) to the 'elementname' + - multiple value elements (like actor) will be converted to single value elements if the xmltv-target element is a single value element, like <desc>. + The individual values will be listed with a (standard WG++ internal element separator character) | as separator unless another separator is specified as follows: +'element-name(separator-string)' e.g. 'actor(, )' or with attribute 'actor/a(, )' + - text and element-names can be linked together by enclosing them by {}. This will ensure that, when the element in it is empty, everything between the {} is + ignored. E.g. {\nProduced in : ('productiondate')} + - the text in the xmltv-target elements may contain the following simple formatting : + - \n or \r to force a newline + - \t to add a tab + +2. The allowed xmltv-target elements (the ones in the target file specified above) are : +- IMPORTANT! : any of the next listed xmltv-target elements that is specified in this allocation specification, replaces the existing xmltv element and +its content! +2.1 'Full' function , these can be added, changed and removed + <title> <sub-title> <desc> <star-rating> <director> <actor> <category> <episode> <icon> + <review> (=optional new xmltv element) +2.2 'Remove/Keep' only, cannot be added, changed, only removed or kept as 'is' + <date> <producer> <writer> <presenter> <composer> <commentator> <rating> <aspect> <quality> <url> <country> + +3. Supported element-names (from the existing xmltv listing, name definitions as in Appendix E of the documentation) to be used as content to expand: + 'title' 'description' 'starrating' 'subtitle' 'productiondate' 'category' 'director' 'actor' 'presenter' 'writer' 'composer' 'producer' 'commentator' 'rating' + 'episode' 'showicon' 'review' 'subtitles' 'premiere' 'previously-shown' 'aspect' 'quality' 'country' 'url' + +4. Attributes + - for each of the xmltv-elements the following attribute can be specified + (if not specified, the existing one, if present in the xmltv, will be used) : + - lang for <title> and <desc> , default : no attribute + - system for <star-rating> , default : no attribute + - type for <review> , default: type="text" + - Existing xmltv attribute values can be added to expanded content. (see above) + ]]> + <!-- examples--> + <sub-title>{Episode: 'episode' }'subtitle'</sub-title> + <desc>'description[max_words=100]'{\n\t¤ Produced in: 'productiondate'. }{¤ Category: 'category(, )'. }{\n\t¤ Actors: 'actor/a+(, )'}{\n\t¤ Director: 'director(, )'}{\n\t¤ Presenter: 'presenter(, )'}</desc> + <credits></credits> + <episode-num></episode-num> + <date></date> + <category></category> + <review>{Ratings: 'rating(, )'.}</review> + <rating></rating> +</settings> \ No newline at end of file diff --git a/guides/en/robots/tv.yandex.ru.robots b/guides/en/robots/tv.yandex.ru.robots new file mode 100644 index 00000000..3e2c3676 --- /dev/null +++ b/guides/en/robots/tv.yandex.ru.robots @@ -0,0 +1,12 @@ +User-agent: * +Disallow: /*/search$ +Disallow: /search$ +Disallow: /*/search?*text +Disallow: /search?*text +Disallow: /*/my/favorites +Disallow: /my/favorites +Clean-param: eventId /program/* +Clean-param: eventId /*/program/* +Sitemap: https://tv.yandex.ru/sitemap.xml +Host: https://tv.yandex.ru + diff --git a/guides/en/robots/tvtv.ca.robots b/guides/en/robots/tvtv.ca.robots new file mode 100644 index 00000000..0bf91dd6 --- /dev/null +++ b/guides/en/robots/tvtv.ca.robots @@ -0,0 +1,6 @@ +User-agent: * +Disallow: /tvm/ +Disallow: /gn/ +User-agent: WebGrab+Plus +Disallow: / + diff --git a/guides/en/robots/tvtv.us.robots b/guides/en/robots/tvtv.us.robots new file mode 100644 index 00000000..0bf91dd6 --- /dev/null +++ b/guides/en/robots/tvtv.us.robots @@ -0,0 +1,6 @@ +User-agent: * +Disallow: /tvm/ +Disallow: /gn/ +User-agent: WebGrab+Plus +Disallow: / + diff --git a/guides/en/robots/www.tvguide.co.uk.robots b/guides/en/robots/www.tvguide.co.uk.robots new file mode 100644 index 00000000..da53120e --- /dev/null +++ b/guides/en/robots/www.tvguide.co.uk.robots @@ -0,0 +1,48 @@ +Sitemap: https://www.tvguide.co.uk/sitemaps/sitemap_shows.xml +Sitemap: https://www.tvguide.co.uk/sitemaps/sitemap.xml + +User-agent: * +Disallow: /iplayerfeeds.asp + +User-agent: * +Disallow: /iphone/* + +User-agent: * +Disallow: /storydetail.asp + +User-agent: * +Disallow: /search_all.asp + +User-agent: * +Disallow: /livechatlink.asp + +User-agent: * +Disallow: /programmewatched.asp + +User-agent: * +Disallow: /London2012Olympics.asp + +User-agent: * +Disallow: /addfreeview.asp + +User-agent: * +Disallow: /trailers/trailer.asp + +User-agent: * +Disallow: /petproblems/Highlights-TV-Show-Multi.asp + +User-agent: * +Disallow: /*ajax* + +User-agent: * +Disallow: /*flash/* + +User-agent: * +Disallow: /*.vcs + +User-agent: * +Disallow: /*rewrite.asp + + + + diff --git a/guides/en/siteini.pack/m.tvguide.co.uk-full.ini b/guides/en/siteini.pack/m.tvguide.co.uk-full.ini new file mode 100755 index 00000000..2f60b0fc --- /dev/null +++ b/guides/en/siteini.pack/m.tvguide.co.uk-full.ini @@ -0,0 +1,70 @@ +**------------------------------------------------------------------------------------------------ +* @header_start +* WebGrab+Plus ini for grabbing EPG data from TvGuide websites +* @Site: tvguide.co.uk +* @MinSWversion: V1.1.1/53 +* none +* @Revision 2 - [06/03/2017] Netuddki +* added showicon +* @Revision 1 - [17/07/2016] Blackberar199 +* showsplit change,start fix +* @Revision 0 - [19/05/2016] Willy De Wilde +* mobile version +* @Remarks: +* none +* @header_end +**------------------------------------------------------------------------------------------------ + +site {url=tvguide.co.uk|timezone=Europe/London|maxdays=6.1|cultureinfo=en-GB|charset=UTF-8|titlematchfactor=50|allowlastpageoverflow} +url_index{url|http://www.tvguide.co.uk/mobile/channellisting.asp?ch=|channel|} +url_index.headers {customheader=Accept-Encoding=gzip,deflate} +urldate.format {datestring|M/d/yyyy} +* +index_showsplit.scrub {multi(exclude="ADTECH")|<div id="channel-listings">|<tr>|</tr>|<!-- Begin comScore Tag -->} +index_start.scrub {single|<td class="time|">|</td>|</td>} +index_start.modify {replace|am| am} +index_start.modify {replace|pm| pm} +index_title.scrub {single|<div class="title"|">|</div>|</div>} +index_title.modify {cleanup(tags="<"">")} +index_title.modify {cleanup} +index_description.scrub {multi |<div class="detail">||</div>} +index_episode.scrub {single (separator="<br>" include="Season ""Episode ")|<div class="detail">||</div>} +index_description.modify {remove|'index_episode'} +index_description.modify {replace|<div class="other">|**BR**} +index_description.modify {replace|<br><br><br>|**BR**} +index_description.modify {replace|<br><br>|**BR**} +index_description.modify {replace|<br>|**BR**} +index_description.modify {cleanup(tags="<"">")} +index_description.modify {replace|**BR**|\n} +index_description.modify {remove|**BR*} +index_description.modify {remove(type=regex)|\(.+?\)} +index_description.modify {cleanup} +* +index_urlshow {url ()|| href="||"} +title.scrub {single ()|<h1 class="showname"|>|</h1>} +title.modify {cleanup(tags="<"">")} +title.modify {cleanup} +showicon.scrub {single|<div id="headerImage|:url(|);|</div>} +director.scrub {multi((exclude="IMDB"))|Director:|<span class="actor">|</span>} +director.modify {cleanup(tags="<"">")} +*producer.scrub {single(exclude="IMDB")|<span class="role">Series Producer|<span class="actor">|</span>|</a><br>} +*producer.scrub {single(exclude="IMDB")|<span class="role">Executive Producer|<span class="actor">|</span>|</a><br>} +writer.scrub {single(separator=", ")|Writer:|<span class="actor">|</span>} +writer.modify {cleanup(tags="<"">")} +actor.scrub {multi(exclude="IMDB")|Cast</span><br>|itemprop="name">|</span>|<span class="tvchannel">} +category.scrub {multi(separator="/")|<span class="tvchannel">Category: </span>|<span class="programmetext">|</span>|<br>} +productiondate.scrub {single()|<h1|(|)|</h1>} * in title +*title.modify {remove|('productiondate')} +*title.modify {cleanup} +* +** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +** ##### CHANNEL FILE CREATION (only to create the xxx-channel.xml file) +** +** @auto_xml_channel_start +* enable the following three lines to create a channel list file for only the selected files +* channels from the desktop version +*url_index{url|http://my.tvguide.co.uk/channellisting.asp?ch=74} +*index_site_channel.scrub {multi|<option value=|>|</option>|</table>} +*index_site_id.scrub {multi|<option value=||>|</table>} +** @auto_xml_channel_end diff --git a/run.sh b/run.sh index 3aa1541b..a9c80aec 100755 --- a/run.sh +++ b/run.sh @@ -31,7 +31,8 @@ else fi fi ; -mono "$DIR/bin/WebGrab+Plus.exe" "$DIR" +mono "$DIR/bin/WebGrab+Plus.exe" "/Users/Arhey/Code/iptv-org/epg/guides/en" +mono "$DIR/bin/WebGrab+Plus.exe" "/Users/Arhey/Code/iptv-org/epg/guides/ru" quit 0;