#!/usr/bin/perl # Copyright © 2011, 2012 Petr Písař # This is free software. You may redistribute copies of it under the terms of # the GNU General Public License . # There is NO WARRANTY, to the extent permitted by law. # Changelog: # # Version 6 # - Accept iframe player URL with channel name in the file name # - Send XML-RPC header needed for SMIL generator since 2012-05-09 # # Version 5 # - Support Apple M3U and MPEG2-TS/MPEG-4 format # - Improve HTML parser # - Remove autoStart=false option from iframe player URL to get page with # JSON instead of similar page with iframe again # (http://www.ceskatelevize.cz/porady/10316155327-horizont-ct24/) # # Version 4 # - Support RTMP app with slashes # - Report URI in error messages # # Version 3: # - Output in rtmpdump(1) format if `-d' option is specified # - Do not append playpath to URL if ambigous # # Version 2: # - Output playpath as librtmp option if necessary # (http://www.ct24.cz/vysilani/10099403120-kultura-v-regionech/) # - Perl 5.10 support # - Find JSON via iframe first and fall back to direct JSON # - More general example entry page URL in usage output # - Show content provider error message if exists # use strict; use warnings; our $VERSION = 6; use LWP::UserAgent; use HTTP::Request::Common; use HTTP::Response; use XML::XPath; use URI; use JSON 2.0; use Getopt::Std; use IO::Handle; my $SMIL_GENERATOR = '/ajax/playlistURL.php'; my $ENTRY = 'http://www.ceskatelevize.cz/ivysilani/zive/ct24/'; sub usage { return<). If this is the only argument, output list of all available streams in format `STREAM_BITRATE: URL' separated by new line. If STREAM_BITRATE is given, output URL of the stream with given rate only. By default, URL of RTMP/FLV/MPEG-4 stream is printed in librtmp(3) format (space-separated librtmp options may follow the URL and all of them must be passed as one argument to librtmp application). If `-d' option is specied, URL with possible arguments are printed as rtmpdump(1) arguments. Note ampersands are kept literal (this should work in simple subshell substition). If `-A' option is specified, HTTP/MPEG-TS/MPEG-4 video will be retrieved. There exist three levels selected by second option: -l Apple M3U play-list pointing to unbound stream segments will be output. This is handy if you have a player that supports the very special play-list. Specification can be found on . -f The Apple play-list will be processed and URLs of the underlying stream segmented to files will be printed. Due to nature of the play-list, the locators are printed periodically, a new segment a few seconds, possibly in endless loop. Intended workflow is to pipe the URLs to an HTTP client which echoes downloaded segments on standard input of a multimedia player. This is the default level. The bit rate must be specified, if more bit rates are available. -s The stream segments will be retrieved and dumped to standard output as continuous stream. You can pipe it your player. The bit rate must be specified, if more bit rates are available. Version: $VERSION. Copyright © 2011, 2012 Petr Písař This is free software. You may redistribute copies of it under the terms of the GNU General Public License . There is NO WARRANTY, to the extent permitted by law. EOM } our ($opt_A, $opt_d, $opt_l, $opt_f, $opt_s); getopts('Adlfs') or die usage; if ($#ARGV < 0 || $#ARGV > 1) { die "Bad invocation\n\n" . usage; } $ENTRY = $ARGV[0]; my $BITRATE = $ARGV[1]; if (!($opt_l || $opt_s)) { $opt_f = 1; } # each that operates on reference to array or hash # Works with perl 5.10.1 too. sub eachref { my $ref = shift; if (ref $ref eq 'HASH') { # Built-in implementation always supports HASH return sub { each %$ref; } } if (eval 'each @$ref' ) { # Built-in Perl 5.12 implementation eval 'return sub { each @$ref; }' } else { # Manual implementation of each ARRAY (needed for Perl < 5.12) my $index = -1; return sub { $index++; if ($index <= $#$ref) { ($index, $$ref[$index]); } else { (); } } } } # Convert nested JSON structure expressed as native hash reference into flat # array of key and value pairs. # E.g. { "x" => [ "y" => "1", "z" => undef ] } # becomes ( "x[0][y]", "1", "x[1][z] => null ). # This is handy when sending nested JSON structure as # application/x-www-form-urlencoded by HTTP::Request::Common. sub flatten { my ($ref, $prefix) = @_; my @output = (); my $doeach = eachref($ref); while (my ($key, $val) = &$doeach) { # TODO: Escape /[[]=]/ my $id = (defined $prefix) ? $prefix . '[' . $key . ']' : $key; if (ref $val eq 'HASH' || ref $val eq 'ARRAY') { push @output, flatten($val, $id); } else { push @output, ($id, $val // 'null'); } } return @output; } # Format RTMP URL for librtmp sub formaturl_librtmp { my ($rtmp, $app, $playpath) = @_; my $stream_url = $rtmp; if ($playpath =~ qr{/} or $app =~ qr{/}) { $stream_url .= ' app=' . $app . ' playpath=' . $playpath; } else { $stream_url .= $app . '/' . $playpath; } } # Format RTMP URL for librtmp sub formaturl_rtmpdump { my ($rtmp, $app, $playpath) = @_; my $stream_url = '--rtmp ' . $rtmp; if ($playpath =~ qr{/} or $app =~ qr{/}) { $stream_url .= ' --app ' . $app . ' --playpath ' . $playpath; } else { $stream_url .= $app . '/' . $playpath; } } # Find first pattern match in HTML page, HTML-unescape it and return it. # Otherwise return undef. sub htmlgrep { my ($html_page, $pattern) = @_; my ($text) = ($html_page =~ $pattern); if (defined $text) { $text =~ s/>/>/g; $text =~ s/</new(shift); s/&/%26/g; return $_; } # Try to get JSON request data from HTML page text passed as argument. # Return the JSON data or undef. sub findjson { htmlgrep(shift, qr{callSOAP\(([^)]*)\);}); } # Try to get setRequestHeader function arguments from JS page text passed as # argument. # Return list (header, value) or undef. sub findrequestheader { local $_ = htmlgrep(shift, qr{setRequestHeader\(([^)]*)\);}); if (!defined) { return undef; } return (m/'([^']*)', '([^']*)'/); } # Return array of { bitrate => INTEGER, url => URL } found in RTMP SMIL play # list. Arguments is playlist as string, URL of the playlist, # playlist as XML::XPath object and boolean signaling URL format (true for # rtmpdump format, false for librtmp format). sub extract_urls_from_rtmp_smil { my ($smil, $smil_url, $parser, $opt_d) = @_; my $videos = $parser->find('/data/smilRoot/body/switchItem/video[@enabled=true()]'); if ($videos->size <= 0) { die "No videos found in SMIL playlist <" . $smil_url . ">:\n" . $smil . "\n"; } my @bitrate_url_pairs = (); foreach my $video ($videos->get_nodelist) { my $suffix = $video->getAttribute('src'); if (! defined $suffix) { print STDERR q{Missing `video/@src' attribute} . "\n"; next; } my $prefix = $video->getParentNode->getAttribute('base'); if (! defined $suffix) { print STDERR q{Missing `video/../@base' attribute for video } . "`$suffix'\n"; next; } my $bitrate = $video->getAttribute('system-bitrate'); if (! defined $suffix) { print STDERR q{Missing `video/@system-bitrate' attribute for video } . "`$suffix'\n"; next; } # Build stream URL. Because RTMP URL can be ambigous, # applications accept aditional arguments separated by space # (the space must not be URI-encoded). my $stream_url; { my $rtmp = URI->new($prefix); my $app = substr($rtmp->path_query, 1); $rtmp->path('/'); $rtmp->query(undef); my $playpath = URI->new($suffix); if ($opt_d) { $stream_url = formaturl_rtmpdump($rtmp, $app, $playpath); } else { $stream_url = formaturl_librtmp($rtmp, $app, $playpath); } } # Store URL push @bitrate_url_pairs, {'bitrate' => $bitrate, 'url' => $stream_url}; } return @bitrate_url_pairs; } # Print segment URLs or their contnent found in bottom-level Apple M3U # play-list. This function can never return if live stream is served by # a server. # Arguments are URL of the playlist, LWP::UserAgent object, and boolean # signalling content of stream segments should be printed instead of their # URLs. # See . sub iterate_bottom_apple_m3u { my ($m3u_url, $ua, $stream_content) = @_; autoflush STDOUT 1; my $reload = 1; my $last_segment = -1; my $target_duration; while ($reload) { # Get bottom-level Apple MPEG play-list. my $response = $ua->request(GET $m3u_url); $response->is_success or die "Could not get bottom-level Apple M3U play-list from <" . $m3u_url . ">: " . $response->status_line . "\n"; my $retrieved_at = time; my $duration; my $sequence = 0; for (split(/(\r)?\n/, $response->decoded_content)) { if (! defined) { next; } if (/\A#EXT-X-TARGETDURATION:(\d+)/) { $target_duration = $1; next; } if (/\A#EXT-X-MEDIA-SEQUENCE:(\d+)/) { $sequence = $1; next; } if (/\A#EXT-X-ENDLIST\b/) { $reload = 0; next; } if (/\A#EXTINF:(\d+)/) { $duration = $1; next; } if (/\A[^#]/) { if (! defined $duration) { print STDERR 'Stray URL in bottom-level Apple play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; next; } if ($sequence > $last_segment) { my $segment_url = URI->new_abs($_, $m3u_url); if ($stream_content) { # Get segment content. $ua->set_my_handler('response_data', sub { print $_[3]; 1; }, 'm_code' => 2); my $response = $ua->request(GET $segment_url); $response->is_success or die "Could not get stream segment content from <" . $segment_url . ">: " . $response->status_line . "\n"; } else { print $segment_url, "\n"; } $last_segment = $sequence; } $duration = undef; $sequence++; next; } } if (! defined $target_duration || $last_segment == -1) { die 'No target duration or URL found in bottom-level Apple " . "play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; } if ($reload) { my $sleep = $target_duration - (time - $retrieved_at); if ($sleep > 0) { sleep $sleep; } } } } # Return array of { bitrate => INTEGER, url => URL } found in top-level # Apple M3U play-list. Returned URLs are locators of bottom-level Apple M3U # play-list for given bitrate. The play-list specification is on # . # Arguments are URL of the playlist and LWP::UserAgent object. sub extract_urls_from_top_apple_m3u { my ($m3u_url, $ua) = @_; my @bitrate_url_pairs = (); # Get top-level Apple MPEG playlist. my $response = $ua->request(GET $m3u_url); $response->is_success or die "Could not get top-level Apple M3U play-list from <" . $m3u_url . ">: " . $response->status_line . "\n"; my $bitrate; for (split(/(\r)?\n/, $response->decoded_content)) { if (! defined) { next; } if (/\A#EXT-X-STREAM-INF:(?:.*,)?BANDWIDTH=(\d+)/) { $bitrate = $1; next; } if (/\A[^#]/) { if (! defined $bitrate) { print STDERR 'Stray URL in top-level Apple play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; next; } push @bitrate_url_pairs, {'bitrate' => $bitrate, 'url' => URI->new_abs($_, $m3u_url)}; $bitrate = undef; next; } } if ($#bitrate_url_pairs < 0) { die 'No URL found in top-level Apple play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; } return @bitrate_url_pairs; } # Return array of { bitrate => INTEGER, url => URL } found in Apple SMIL play # list. Arguments is playlist as string, URL of the playlist, # playlist as XML::XPath object, and LWP::UserAgent object. sub extract_urls_from_apple_smil { my ($smil, $smil_url, $parser, $ua) = @_; my $videos = $parser->find('/data/smilRoot/body/video'); if ($videos->size <= 0) { die "No videos found in SMIL playlist <" . $smil_url . ">:\n" . $smil . "\n"; } my @bitrate_url_pairs = (); foreach my $video ($videos->get_nodelist) { my $m3u_url = $video->getAttribute('src'); if (! defined $m3u_url) { print STDERR q{Missing `video/@src' attribute} . "\n"; next; } push @bitrate_url_pairs, extract_urls_from_top_apple_m3u($m3u_url, $ua); } return @bitrate_url_pairs; } # Get entry HTML page my $ua = LWP::UserAgent->new; if ($opt_A) { $ua->agent('Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) ' . 'AppleWebKit/531.21.10 (KHTML, like Gecko) ' . 'Version/4.0.4 Mobile/7B314 Safari/531.21.10'); } my $response = $ua->request(GET $ENTRY); $response->is_success or die "Could not get entry page from <" . $ENTRY . ">: " . $response->status_line . "\n"; my $page = $response->decoded_content; # Try to get iframe player URL # The web page is not well-formed XML, we cannot use XPath # '//html:div[@id="iFramePositionContainer"]/html:iframe/@src' or # '//html:p[@id="iframeHolder"]/html:iframe/@src'; # This is sometimes relative, sometimes absolute path my $iframe_url = htmlgrep($page, qr{src="([^"]*/embed/iFramePlayer(?:[^"]*)\.php[^"]*)"}); if (defined $iframe_url && $iframe_url) { # If it ends with "&autoStart=false", it links to another page with the # same iframe player URL without the parameter. Thus remove the parameter. $iframe_url =~ s/&autoStart=false//; # Get iframe player page $iframe_url = URI->new_abs($iframe_url, $ENTRY); $response = $ua->request(GET $iframe_url); $response->is_success or die "Could not get iframe player from <" . $iframe_url . ">: " . $response->status_line . "\n"; $page = $response->decoded_content; } # Get JSON request data my $json_data = htmlgrep($page, qr{callSOAP\(([^)]*)\);}); unless (defined $json_data && $json_data) { # Try to get error message from stream provider my $message = htmlgrep($page, qr{([^<]*)<}); if (defined $message && $message) { die "$message\n"; } # else die in general way die "Could not find JSON data structure\n"; } # Get XML-RPC header definition my ($xmlrpc_header, $xmlrpc_value) = findrequestheader($page); if (!defined $xmlrpc_value or !defined $xmlrpc_value) { print STDERR "XML-RPC header definition not found.\n"; } # Decode JSON request data my $data; eval { $data = decode_json($json_data) } or die "Could not decode JSON string: $json_data: $@\n"; my @data = flatten($data); # Get SMIL playlist URL my $smil_generator_url = URI->new_abs($SMIL_GENERATOR, $ENTRY); $ua->default_header($xmlrpc_header => $xmlrpc_value) if defined $xmlrpc_header; $response = $ua->request(POST $smil_generator_url, \@data); $ua->default_header($xmlrpc_header => undef) if defined $xmlrpc_header; $response->is_success or die "Could not get SMIL playlist URL from <" . $smil_generator_url . ">: " . $response->status_line . "\n"; my $smil_url = $response->decoded_content; # Get SMIL playlist $ua->agent('NSPlayer/0 (Fuck libwwperl discrimination)'); $response = $ua->request(GET $smil_url); $response->is_success or die "Could not get SMIL playlist from <" . $smil_url . ">: " . $response->status_line . "\n"; my $smil = $response->decoded_content; # Get stream URLs my $parser = XML::XPath->new('xml' => $smil) or die "Could not parse SMIL playlist from <" . $smil_url . ">:\n" . $smil . "\n"; my @bitrate_url_pairs = (); if ($opt_A) { @bitrate_url_pairs = extract_urls_from_apple_smil($smil, $smil_url, $parser, $ua); } else { @bitrate_url_pairs = extract_urls_from_rtmp_smil($smil, $smil_url, $parser, $opt_d); } if ($opt_A && ($opt_f || $opt_s)) { # Select Apple M3U URL if (defined $BITRATE) { @bitrate_url_pairs = grep { ${$_}{'bitrate'} == $BITRATE } @bitrate_url_pairs; } if ($#bitrate_url_pairs < 0) { die "No usable video streams found in SMIL playlist:\n$smil\n"; } if ($#bitrate_url_pairs == 0 ) { iterate_bottom_apple_m3u(${$bitrate_url_pairs[0]}{'url'}, $ua, $opt_s); } else { die "Multiple bit-rate play-lists not implemented yet.\n"; } } else { # Output URL my $video_counter = 0; for my $pair (@bitrate_url_pairs) { if (defined $BITRATE) { if (${$pair}{'bitrate'} == $BITRATE) { print "${$pair}{'url'}\n"; $video_counter++; } } else { print "${$pair}{'bitrate'}: ${$pair}{'url'}\n"; $video_counter++; } } if ($video_counter <= 0) { die "No usable video streams found in SMIL playlist:\n$smil\n"; } }