diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index cc37cbdab..7c448662f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -1,11 +1,11 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; + import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.downloader.Downloader; @@ -17,11 +17,18 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; -import javax.annotation.Nonnull; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.annotation.Nonnull; + +import static org.schabi.newpipe.extractor.utils.Utils.HTTP; +import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; /* * Created by Christian Schabesberger on 25.07.16. @@ -49,6 +56,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private Document doc; + private JsonObject initialData; public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); @@ -59,11 +67,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor { String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS; final Response response = downloader.get(channelUrl, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } + @Override public String getNextPageUrl() throws ExtractionException { - return getNextPageUrlFrom(doc); + return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations")); } @Nonnull @@ -80,15 +90,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getId() throws ParsingException { try { - return doc.select("meta[itemprop=\"channelId\"]").first().attr("content"); - } catch (Exception ignored) {} - - // fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO) - try { - Element element = doc.getElementsByClass("yt-uix-subscription-button").first(); - if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first(); - - return element.attr("data-channel-external-id"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId"); } catch (Exception e) { throw new ParsingException("Could not get channel id", e); } @@ -98,7 +100,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getName() throws ParsingException { try { - return doc.select("meta[property=\"og:title\"]").first().attr("content"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title"); } catch (Exception e) { throw new ParsingException("Could not get channel name", e); } @@ -107,7 +109,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getAvatarUrl() throws ParsingException { try { - return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar") + .getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get avatar", e); } @@ -116,13 +119,27 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getBannerUrl() throws ParsingException { try { - Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first(); - String cssContent = el.html(); - String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent); + String url = null; + try { + url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner") + .getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception ignored) {} + if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) { + return null; + } + // the first characters of the banner URLs are different for each channel and some are not even valid URLs + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } - return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url; + return url; } catch (Exception e) { - throw new ParsingException("Could not get Banner", e); + throw new ParsingException("Could not get banner", e); } } @@ -137,12 +154,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public long getSubscriberCount() throws ParsingException { - - final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); - if (el != null) { - String elTitle = el.attr("title"); + final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); + if (subscriberInfo != null) { try { - return Utils.mixedNumberWordToLong(elTitle); + return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text")); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } @@ -155,7 +170,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getDescription() throws ParsingException { try { - return doc.select("meta[name=\"description\"]").first().attr("content"); + return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description"); } catch (Exception e) { throw new ParsingException("Could not get channel description", e); } @@ -165,8 +180,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public InfoItemsPage getInitialPage() throws ExtractionException { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Element ul = doc.select("ul[id=\"browse-items-primary\"]").first(); - collectStreamsFrom(collector, ul); + + JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents"); + collectStreamsFrom(collector, videos); + return new InfoItemsPage<>(collector, getNextPageUrl()); } @@ -181,106 +198,98 @@ public class YoutubeChannelExtractor extends ChannelExtractor { fetchPage(); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - JsonObject ajaxJson; + JsonArray ajaxJson; + + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); try { - final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - ajaxJson = JsonParser.object().from(response); - } catch (JsonParserException pe) { - throw new ParsingException("Could not parse json data for next streams", pe); + // Use the hardcoded client version first to get JSON with a structure we know + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (Exception e) { + try { + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException ignored) { + throw new ParsingException("Could not parse json data for next streams", e); + } } - final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl); - collectStreamsFrom(collector, ajaxHtml.select("body").first()); + JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("sectionListContinuation"); - return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl)); + collectStreamsFrom(collector, sectionListContinuation.getArray("contents")); + + return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations"))); } - private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl) - throws ParsingException { - String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html"); - if (!loadMoreHtmlDataRaw.isEmpty()) { - return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl)); - } else { + + private String getNextPageUrlFrom(JsonArray continuations) { + if (continuations == null) { return ""; } + + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; } - private String getNextPageUrlFrom(Document d) throws ParsingException { - try { - Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); - if (button != null) { - return button.attr("abs:data-uix-load-more-href"); - } else { - // Sometimes channels are simply so small, they don't have a more streams/videos - return ""; - } - } catch (Exception e) { - throw new ParsingException("Could not get next page url", e); - } - } - - private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException { + private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException { collector.reset(); final String uploaderName = getName(); final String uploaderUrl = getUrl(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (final Element li : element.children()) { - if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) { - collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { + for (Object video : videos) { + JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer") + .getArray("contents").getObject(0); + if (videoInfo.getObject("videoRenderer") != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) { @Override - public String getUrl() throws ParsingException { - try { - Element el = li.select("div[class=\"feed-item-dismissable\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.attr("abs:href"); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } - - @Override - public String getName() throws ParsingException { - try { - Element el = li.select("div[class=\"feed-item-dismissable\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.text(); - } catch (Exception e) { - throw new ParsingException("Could not get title", e); - } - } - - @Override - public String getUploaderName() throws ParsingException { + public String getUploaderName() { return uploaderName; } @Override - public String getUploaderUrl() throws ParsingException { + public String getUploaderUrl() { return uploaderUrl; } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - String url; - Element te = li.select("span[class=\"yt-thumb-clip\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we've caught such an item. - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } - return url; - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } }); } } } + + private JsonObject getVideoTab() throws ParsingException { + JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs"); + JsonObject videoTab = null; + + for (Object tab : tabs) { + if (((JsonObject) tab).getObject("tabRenderer") != null) { + if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) { + videoTab = ((JsonObject) tab).getObject("tabRenderer"); + break; + } + } + } + + if (videoTab == null) { + throw new ParsingException("Could not find Videos tab"); + } + + return videoTab; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java index a687c0504..483cd894c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java @@ -1,12 +1,14 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import org.jsoup.nodes.Element; +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import static org.schabi.newpipe.extractor.utils.Utils.HTTP; +import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; /* * Created by Christian Schabesberger on 12.02.17. @@ -29,87 +31,75 @@ import java.util.regex.Pattern; */ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor { - private final Element el; + private JsonObject channelInfoItem; - public YoutubeChannelInfoItemExtractor(Element el) { - this.el = el; + public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) { + this.channelInfoItem = channelInfoItem; } @Override public String getThumbnailUrl() throws ParsingException { - Element img = el.select("span[class*=\"yt-thumb-simple\"]").first() - .select("img").first(); - - String url = img.attr("abs:src"); - - if (url.contains("gif")) { - url = img.attr("abs:data-thumb"); + try { + String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } + return url; + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); } - return url; } @Override public String getName() throws ParsingException { - return el.select("a[class*=\"yt-uix-tile-link\"]").first() - .text(); + try { + return channelInfoItem.getObject("title").getString("simpleText"); + } catch (Exception e) { + throw new ParsingException("Could not get name", e); + } } @Override public String getUrl() throws ParsingException { try { - String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first() - .attr("abs:data-href"); - - Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)"); - Matcher match = channelIdPattern.matcher(buttonTrackingUrl); - - if (match.matches()) { - return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1); - } - } catch(Exception ignored) {} - - // fallback method for channels without "Subscribe" button (or just in case yt changes things) - // provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail - try { - return el.select("a[class*=\"yt-uix-tile-link\"]").first() - .attr("abs:href"); + String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work? + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { - throw new ParsingException("Could not get channel url", e); + throw new ParsingException("Could not get url", e); } } @Override public long getSubscriberCount() throws ParsingException { - final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first(); - if (subsEl != null) { - try { - return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text())); - } catch (NumberFormatException e) { - throw new ParsingException("Could not get subscriber count", e); - } - } else { - // If the element is null, the channel have the subscriber count disabled - return -1; + try { + String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0]; + return Utils.mixedNumberWordToLong(subscribers); + } catch (Exception e) { + throw new ParsingException("Could not get subscriber count", e); } } @Override public long getStreamCount() throws ParsingException { - Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first(); - if (metaEl == null) { - return 0; - } else { - return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text())); + try { + return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText") + .getArray("runs").getObject(0).getString("text"))); + } catch (Exception e) { + throw new ParsingException("Could not get stream count", e); } } @Override public String getDescription() throws ParsingException { - Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first(); - if (desEl == null) { - return ""; - } else { - return desEl.text(); + try { + return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text"); + } catch (Exception e) { + throw new ParsingException("Could not get description", e); } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 520bda80c..0ac2dcf05 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -1,34 +1,39 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; + import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.utils.Utils; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.annotation.Nonnull; @SuppressWarnings("WeakerAccess") public class YoutubePlaylistExtractor extends PlaylistExtractor { private Document doc; + private JsonObject initialData; + private JsonObject uploaderInfo; + private JsonObject playlistInfo; public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); @@ -39,18 +44,61 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { final String url = getUrl(); final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); + uploaderInfo = getUploaderInfo(); + playlistInfo = getPlaylistInfo(); + } + + private JsonObject getUploaderInfo() throws ParsingException { + JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items"); + try { + JsonObject uploaderInfo = items.getObject(1).getObject("playlistSidebarSecondaryInfoRenderer") + .getObject("videoOwner").getObject("videoOwnerRenderer"); + if (uploaderInfo != null) { + return uploaderInfo; + } + } catch (Exception ignored) {} + + // we might want to create a loop here instead of using duplicated code + try { + JsonObject uploaderInfo = items.getObject(items.size()).getObject("playlistSidebarSecondaryInfoRenderer") + .getObject("videoOwner").getObject("videoOwnerRenderer"); + if (uploaderInfo != null) { + return uploaderInfo; + } + } catch (Exception e) { + throw new ParsingException("Could not get uploader info", e); + } + throw new ParsingException("Could not get uploader info"); + } + + private JsonObject getPlaylistInfo() throws ParsingException { + try { + return initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items") + .getObject(0).getObject("playlistSidebarPrimaryInfoRenderer"); + } catch (Exception e) { + throw new ParsingException("Could not get PlaylistInfo", e); + } } @Override - public String getNextPageUrl() throws ExtractionException { - return getNextPageUrlFrom(doc); + public String getNextPageUrl() { + return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("continuations")); } @Nonnull @Override public String getName() throws ParsingException { try { - return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); + String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text"); + if (name != null) return name; + } catch (Exception ignored) {} + try { + return initialData.getObject("microformat").getObject("microformatDataRenderer").getString("title"); } catch (Exception e) { throw new ParsingException("Could not get playlist name", e); } @@ -59,7 +107,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getThumbnailUrl() throws ParsingException { try { - return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); + return playlistInfo.getObject("thumbnailRenderer").getObject("playlistVideoThumbnailRenderer") + .getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception ignored) {} + try { + return initialData.getObject("microformat").getObject("microformatDataRenderer").getObject("thumbnail") + .getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get playlist thumbnail", e); } @@ -75,8 +128,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { public String getUploaderUrl() throws ParsingException { try { return YoutubeChannelExtractor.CHANNEL_URL_BASE + - doc.select("button[class*=\"yt-uix-subscription-button\"]") - .first().attr("data-channel-external-id"); + uploaderInfo.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader url", e); } @@ -85,7 +137,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderName() throws ParsingException { try { - return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); + return uploaderInfo.getObject("title").getArray("runs").getObject(0).getString("text"); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader name", e); } @@ -94,7 +146,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderAvatarUrl() throws ParsingException { try { - return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); + return uploaderInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader avatar", e); } @@ -102,33 +154,26 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public long getStreamCount() throws ParsingException { - String input; - try { - input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text(); - } catch (IndexOutOfBoundsException e) { + String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text"); + return Long.parseLong(Utils.removeNonDigitCharacters(viewsText)); + } catch (Exception e) { throw new ParsingException("Could not get video count from playlist", e); } - - try { - return Long.parseLong(Utils.removeNonDigitCharacters(input)); - } catch (NumberFormatException e) { - // When there's no videos in a playlist, there's no number in the "innerHtml", - // all characters that is not a number is removed, so we try to parse a empty string - if (!input.isEmpty()) { - return 0; - } else { - throw new ParsingException("Could not handle input: " + input, e); - } - } } @Nonnull @Override - public InfoItemsPage getInitialPage() throws ExtractionException { + public InfoItemsPage getInitialPage() { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first(); - collectStreamsFrom(collector, tbody); + + JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("contents"); + + collectStreamsFrom(collector, videos); return new InfoItemsPage<>(collector, getNextPageUrl()); } @@ -139,156 +184,67 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { } StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - JsonObject pageJson; + JsonArray ajaxJson; + + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); try { - final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - pageJson = JsonParser.object().from(responseBody); - } catch (JsonParserException pe) { - throw new ParsingException("Could not parse ajax json", pe); + // Use the hardcoded client version first to get JSON with a structure we know + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (Exception e) { + try { + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException ignored) { + throw new ParsingException("Could not parse json data for next streams", e); + } } - final Document pageHtml = Jsoup.parse("" - + pageJson.getString("content_html") - + "
", pageUrl); + JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("playlistVideoListContinuation"); - collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first()); + collectStreamsFrom(collector, sectionListContinuation.getArray("contents")); - return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl)); + return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations"))); } - private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl) - throws ParsingException { - String nextPageHtml = pageJson.getString("load_more_widget_html"); - if (!nextPageHtml.isEmpty()) { - return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl)); - } else { + private String getNextPageUrlFrom(JsonArray continuations) { + if (continuations == null) { return ""; } + + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; } - private String getNextPageUrlFrom(Document d) throws ParsingException { - try { - Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); - if (button != null) { - return button.attr("abs:data-uix-load-more-href"); - } else { - // Sometimes playlists are simply so small, they don't have a more streams/videos - return ""; - } - } catch (Exception e) { - throw new ParsingException("could not get next streams' url", e); - } - } - - private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) { + private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) { collector.reset(); - if (element == null) { - return; - } - - final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (final Element li : element.children()) { - if (isDeletedItem(li)) { - continue; + for (Object video : videos) { + if (((JsonObject) video).getObject("playlistVideoRenderer") != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) { + @Override + public long getViewCount() { + return -1; + } + }); } - - collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { - public Element uploaderLink; - - @Override - public boolean isAd() { - return false; - } - - @Override - public String getUrl() throws ParsingException { - try { - return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl(); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } - - @Override - public String getName() throws ParsingException { - try { - return li.attr("data-title"); - } catch (Exception e) { - throw new ParsingException("Could not get title", e); - } - } - - @Override - public long getDuration() throws ParsingException { - try { - if (getStreamType() == StreamType.LIVE_STREAM) return -1; - - Element first = li.select("div[class=\"timestamp\"] span").first(); - if (first == null) { - // Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists, - // because in other cases, those videos don't even show up - return -1; - } - - return YoutubeParsingHelper.parseDurationString(first.text()); - } catch (Exception e) { - throw new ParsingException("Could not get duration" + getUrl(), e); - } - } - - - private Element getUploaderLink() { - // should always be present since we filter deleted items - if (uploaderLink == null) { - uploaderLink = li.select("div[class=pl-video-owner] a").first(); - } - return uploaderLink; - } - - @Override - public String getUploaderName() throws ParsingException { - return getUploaderLink().text(); - } - - @Override - public String getUploaderUrl() throws ParsingException { - // this url is not always in the form "/channel/..." - // sometimes Youtube provides urls in the from "/user/..." - return getUploaderLink().attr("abs:href"); - } - - @Override - public String getTextualUploadDate() throws ParsingException { - return ""; - } - - @Override - public long getViewCount() throws ParsingException { - return -1; - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + "/hqdefault.jpg"; - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }); } } - - /** - * Check if the playlist item is deleted - * - * @param li the list item - * @return true if the item is deleted - */ - private boolean isDeletedItem(Element li) { - return li.select("div[class=pl-video-owner] a").isEmpty(); - } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java index 63fef225f..358fa2e69 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java @@ -1,97 +1,63 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import org.jsoup.nodes.Element; +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor { - private final Element el; + private JsonObject playlistInfoItem; - public YoutubePlaylistInfoItemExtractor(Element el) { - this.el = el; + public YoutubePlaylistInfoItemExtractor(JsonObject playlistInfoItem) { + this.playlistInfoItem = playlistInfoItem; } @Override public String getThumbnailUrl() throws ParsingException { - String url; - try { - Element te = el.select("div[class=\"yt-thumb video-thumb\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } + return playlistInfoItem.getArray("thumbnails").getObject(0).getArray("thumbnails") + .getObject(0).getString("url"); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist thumbnail url", e); + throw new ParsingException("Could not get thumbnail url", e); } - - return url; } @Override public String getName() throws ParsingException { - String name; try { - final Element title = el.select("[class=\"yt-lockup-title\"]").first() - .select("a").first(); - - name = title == null ? "" : title.text(); + return playlistInfoItem.getObject("title").getString("simpleText"); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist name", e); + throw new ParsingException("Could not get name", e); } - - return name; } @Override public String getUrl() throws ParsingException { try { - final Element a = el.select("div[class=\"yt-lockup-meta\"]") - .select("ul[class=\"yt-lockup-meta-info\"]") - .select("li").select("a").first(); - - if (a != null) { - return a.attr("abs:href"); - } - - // this is for yt premium playlists - return el.select("h3[class=\"yt-lockup-title\"").first() - .select("a").first() - .attr("abs:href"); - + String id = playlistInfoItem.getString("playlistId"); + return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist url", e); + throw new ParsingException("Could not get url", e); } } @Override public String getUploaderName() throws ParsingException { - String name; - try { - final Element div = el.select("div[class=\"yt-lockup-byline\"]").first() - .select("a").first(); - - name = div.text(); + return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text"); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist uploader", e); + throw new ParsingException("Could not get uploader name", e); } - - return name; } @Override public long getStreamCount() throws ParsingException { try { - final Element count = el.select("span[class=\"formatted-video-count-label\"]").first() - .select("b").first(); - - return count == null ? 0 : Long.parseLong(Utils.removeNonDigitCharacters(count.text())); + return Long.parseLong(Utils.removeNonDigitCharacters(playlistInfoItem.getString("videoCount"))); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist stream count", e); + throw new ParsingException("Could not get stream count", e); } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 48420814b..b06699098 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -1,8 +1,11 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import org.jsoup.Jsoup; +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; @@ -14,13 +17,14 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.utils.Parser; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import javax.annotation.Nonnull; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; /* * Created by Christian Schabesberger on 22.07.2018 @@ -45,6 +49,7 @@ import java.net.URL; public class YoutubeSearchExtractor extends SearchExtractor { private Document doc; + private JsonObject initialData; public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) { super(service, linkHandler); @@ -55,6 +60,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { final String url = getUrl(); final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } @Nonnull @@ -65,80 +71,109 @@ public class YoutubeSearchExtractor extends SearchExtractor { @Override public String getSearchSuggestion() { - final Element el = doc.select("div[class*=\"spell-correction\"]").first(); - if (el != null) { - return el.select("a").first().text(); - } else { + JsonObject showingResultsForRenderer = initialData.getObject("contents") + .getObject("twoColumnSearchResultsRenderer").getObject("primaryContents") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("showingResultsForRenderer"); + if (showingResultsForRenderer == null) { return ""; + } else { + return showingResultsForRenderer.getObject("correctedQuery").getArray("runs") + .getObject(0).getString("text"); } } @Nonnull @Override public InfoItemsPage getInitialPage() throws ExtractionException { - return new InfoItemsPage<>(collectItems(doc), getNextPageUrl()); + InfoItemsSearchCollector collector = getInfoItemSearchCollector(); + JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") + .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") + .getObject(0).getObject("itemSectionRenderer").getArray("contents"); + + collectStreamsFrom(collector, videos); + return new InfoItemsPage<>(collector, getNextPageUrl()); } @Override public String getNextPageUrl() throws ExtractionException { - return getUrl() + "&page=" + 2; + return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") + .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") + .getObject(0).getObject("itemSectionRenderer").getArray("continuations")); } @Override public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { - final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - doc = Jsoup.parse(response, pageUrl); + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } - return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl)); - } - - private String getNextPageUrlFromCurrentUrl(String currentUrl) - throws MalformedURLException, UnsupportedEncodingException { - final int pageNr = Integer.parseInt( - Parser.compatParseMap( - new URL(currentUrl) - .getQuery()) - .get("page")); - - return currentUrl.replace("&page=" + pageNr, - "&page=" + Integer.toString(pageNr + 1)); - } - - private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException { InfoItemsSearchCollector collector = getInfoItemSearchCollector(); - collector.reset(); + JsonArray ajaxJson; - Element list = doc.select("ol[class=\"item-section\"]").first(); - final TimeAgoParser timeAgoParser = getTimeAgoParser(); + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); - for (Element item : list.children()) { - /* First we need to determine which kind of item we are working with. - Youtube depicts five different kinds of items on its search result page. These are - regular videos, playlists, channels, two types of video suggestions, and a "no video - found" item. Since we only want videos, we need to filter out all the others. - An example for this can be seen here: - https://www.youtube.com/results?search_query=asdf&page=1 - - We already applied a filter to the url, so we don't need to care about channels and - playlists now. - */ - - Element el; - - if ((el = item.select("div[class*=\"search-message\"]").first()) != null) { - throw new NothingFoundException(el.text()); - - // video item type - } else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) { - collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser)); - } else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) { - collector.commit(new YoutubeChannelInfoItemExtractor(el)); - } else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null && - item.select(".yt-pl-icon-mix").isEmpty()) { - collector.commit(new YoutubePlaylistInfoItemExtractor(el)); + try { + // Use the hardcoded client version first to get JSON with a structure we know + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (Exception e) { + try { + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException ignored) { + throw new ParsingException("Could not parse json data for next streams", e); } } - return collector; + JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("itemSectionContinuation"); + + collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); + + return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations"))); + } + + private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException { + collector.reset(); + + final TimeAgoParser timeAgoParser = getTimeAgoParser(); + + for (Object item : videos) { + if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) { + throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer") + .getObject("bodyText").getArray("runs").getObject(0).getString("text")); + } else if (((JsonObject) item).getObject("videoRenderer") != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser)); + } else if (((JsonObject) item).getObject("channelRenderer") != null) { + collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer"))); + } else if (((JsonObject) item).getObject("playlistRenderer") != null) { + collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); + } + } + } + + private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException { + if (continuations == null) { + return ""; + } + + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 9568c7ff5..2408b5c02 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -3,11 +3,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; -import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; + import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; @@ -15,7 +13,6 @@ import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; -import org.schabi.newpipe.extractor.downloader.Request; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; @@ -23,23 +20,41 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.stream.*; -import org.schabi.newpipe.extractor.utils.JsonUtils; +import org.schabi.newpipe.extractor.stream.AudioStream; +import org.schabi.newpipe.extractor.stream.Description; +import org.schabi.newpipe.extractor.stream.Frameset; +import org.schabi.newpipe.extractor.stream.Stream; +import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; +import org.schabi.newpipe.extractor.stream.StreamType; +import org.schabi.newpipe.extractor.stream.SubtitlesStream; +import org.schabi.newpipe.extractor.stream.VideoStream; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; /* * Created by Christian Schabesberger on 06.08.15. @@ -62,8 +77,6 @@ import java.util.regex.Pattern; */ public class YoutubeStreamExtractor extends StreamExtractor { - private static final String TAG = YoutubeStreamExtractor.class.getSimpleName(); - /*////////////////////////////////////////////////////////////////////////// // Exceptions //////////////////////////////////////////////////////////////////////////*/ @@ -74,12 +87,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - public class SubtitlesException extends ContentNotAvailableException { - SubtitlesException(String message, Throwable cause) { - super(message, cause); - } - } - /*//////////////////////////////////////////////////////////////////////////*/ private Document doc; @@ -88,6 +95,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull private final Map videoInfoPage = new HashMap<>(); private JsonObject playerResponse; + private JsonObject initialData; @Nonnull private List subtitlesInfos = new ArrayList<>(); @@ -106,22 +114,17 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getName() throws ParsingException { assertPageFetched(); + String title = null; try { - return playerResponse.getObject("videoDetails").getString("title"); - - } catch (Exception e) { - // fallback HTML method - String name = null; + title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} + if (title == null) { try { - name = doc.select("meta[name=title]").attr(CONTENT); - } catch (Exception ignored) { - } - - if (name == null) { - throw new ParsingException("Could not get name", e); - } - return name; + title = playerResponse.getObject("videoDetails").getString("title"); + } catch (Exception ignored) {} } + if (title != null) return title; + throw new ParsingException("Could not get name"); } @Override @@ -131,19 +134,33 @@ public class YoutubeStreamExtractor extends StreamExtractor { } try { - return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); - } catch (Exception e) { - String uploadDate = null; - try { - uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT); - } catch (Exception ignored) { - } + // return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); + } catch (Exception ignored) {} - if (uploadDate == null) { - throw new ParsingException("Could not get upload date", e); + try { + if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) { + String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10); + + try { // Premiered 20 hours ago + TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en")); + Calendar parsedTime = timeAgoParser.parse(time).date(); + return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime()); + } catch (Exception ignored) {} + + try { // Premiered Premiered Feb 21, 2020 + Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time); + return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime()); + } catch (Exception ignored) {} } - return uploadDate; - } + } catch (Exception ignored) {} + + try { + // TODO this parses English formatted dates only, we need a better approach to parse the textual date + Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse( + getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText")); + return new SimpleDateFormat("yyyy-MM-dd").format(d); + } catch (Exception ignored) {} + throw new ParsingException("Could not get upload date"); } @Override @@ -167,15 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { return thumbnails.getObject(thumbnails.size() - 1).getString("url"); } catch (Exception e) { - String url = null; - try { - url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); - } catch (Exception ignored) {} - - if (url == null) { - throw new ParsingException("Could not get thumbnail url", e); - } - return url; + throw new ParsingException("Could not get thumbnail url"); } } @@ -184,88 +193,65 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public Description getDescription() throws ParsingException { assertPageFetched(); + // description with more info on links try { - // first try to get html-formatted description - return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML); - } catch (Exception e) { - try { - // fallback to raw non-html description - return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); - } catch (Exception ignored) { - throw new ParsingException("Could not get the description", e); - } - } - } - - // onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;" - // :00 is NOT recognized as a timestamp in description or comments. - // 0:00 is recognized in both description and comments. - // https://www.youtube.com/watch?v=4cccfDXu1vA - private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile( - "seekTo\\(" - + "(?:(\\d+)\\*3600\\+)?" // hours? - + "(\\d+)\\*60\\+" // minutes - + "(\\d+)" // seconds - + "\\)"); - - @SafeVarargs - private static T coalesce(T... args) { - for (T arg : args) { - if (arg != null) return arg; - } - throw new IllegalArgumentException("all arguments to coalesce() were null"); - } - - private String parseHtmlAndGetFullLinks(String descriptionHtml) - throws MalformedURLException, UnsupportedEncodingException, ParsingException { - final Document description = Jsoup.parse(descriptionHtml, getUrl()); - for (Element a : description.select("a")) { - final String rawUrl = a.attr("abs:href"); - final URL redirectLink = new URL(rawUrl); - - final Matcher onClickTimestamp; - final String queryString; - if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick"))) - .find()) { - a.removeAttr("onclick"); - - String hours = coalesce(onClickTimestamp.group(1), "0"); - String minutes = onClickTimestamp.group(2); - String seconds = onClickTimestamp.group(3); - - int timestamp = 0; - timestamp += Integer.parseInt(hours) * 3600; - timestamp += Integer.parseInt(minutes) * 60; - timestamp += Integer.parseInt(seconds); - - String setTimestamp = "&t=" + timestamp; - - // Even after clicking https://youtu.be/...?t=6, - // getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=. - a.attr("href", getUrl() + setTimestamp); - - } else if ((queryString = redirectLink.getQuery()) != null) { - // if the query string is null we are not dealing with a redirect link, - // so we don't need to override it. - final String link = - Parser.compatParseMap(queryString).get("q"); - - if (link != null) { - // if link is null the a tag is a hashtag. - // They refer to the youtube search. We do not handle them. - a.text(link); - a.attr("href", link); - } else if (redirectLink.toString().contains("https://www.youtube.com/")) { - a.text(redirectLink.toString()); - a.attr("href", redirectLink.toString()); + boolean htmlConversionRequired = false; + JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs"); + StringBuilder descriptionBuilder = new StringBuilder(descriptions.size()); + for (Object textObjectHolder : descriptions) { + JsonObject textHolder = (JsonObject) textObjectHolder; + String text = textHolder.getString("text"); + if (textHolder.getObject("navigationEndpoint") != null) { + // The text is a link. Get the URL it points to and generate a HTML link of it + if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) { + String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url"); + if (internUrl.startsWith("/redirect?")) { + // q parameter can be the first parameter + internUrl = internUrl.substring(10); + String[] params = internUrl.split("&"); + for (String param : params) { + if (param.split("=")[0].equals("q")) { + String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); + if (url != null && !url.isEmpty()) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; + } else { + descriptionBuilder.append(text); + } + break; + } + } + } else if (internUrl.startsWith("http")) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; + } + continue; + } + continue; + } + if (text != null) { + descriptionBuilder.append(text); } - } else if (redirectLink.toString().contains("https://www.youtube.com/")) { - descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString()); - a.text(redirectLink.toString()); - a.attr("href", redirectLink.toString()); } + + String description = descriptionBuilder.toString(); + + if (!description.isEmpty()) { + if (htmlConversionRequired) { + description = description.replaceAll("\\n", "
"); + description = description.replaceAll(" ", "  "); + return new Description(description, Description.HTML); + } + return new Description(description, Description.PLAIN_TEXT); + } + } catch (Exception ignored) { } + + // raw non-html description + try { + return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); + } catch (Exception ignored) { + throw new ParsingException("Could not get description"); } - return description.select("body").first().html(); } @Override @@ -318,68 +304,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public long getViewCount() throws ParsingException { assertPageFetched(); + String views = null; try { - if (getStreamType().equals(StreamType.LIVE_STREAM)) { - return getLiveStreamWatchingCount(); - } else { - return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount")); - } - } catch (Exception e) { + views = getVideoPrimaryInfoRenderer().getObject("viewCount") + .getObject("videoViewCountRenderer").getObject("viewCount") + .getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} + if (views == null) { try { - return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); - } catch (Exception ignored) { - throw new ParsingException("Could not get view count", e); - } + views = getVideoPrimaryInfoRenderer().getObject("viewCount") + .getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText"); + } catch (Exception ignored) {} } - } - - private long getLiveStreamWatchingCount() throws ExtractionException, IOException, JsonParserException { - // https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key= - String innerTubeKey = null, clientVersion = null; - if (playerArgs != null && !playerArgs.isEmpty()) { - innerTubeKey = playerArgs.getString("innertube_api_key"); - clientVersion = playerArgs.getString("innertube_context_client_version"); - } else if (!videoInfoPage.isEmpty()) { - innerTubeKey = videoInfoPage.get("innertube_api_key"); - clientVersion = videoInfoPage.get("innertube_context_client_version"); + if (views == null) { + try { + views = playerResponse.getObject("videoDetails").getString("viewCount"); + } catch (Exception ignored) {} } - - if (innerTubeKey == null || innerTubeKey.isEmpty()) { - throw new ExtractionException("Couldn't get innerTube key"); - } - - if (clientVersion == null || clientVersion.isEmpty()) { - throw new ExtractionException("Couldn't get innerTube client version"); - } - - final String metadataUrl = "https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=" + innerTubeKey; - final byte[] dataBody = ("{\"context\":{\"client\":{\"clientName\":1,\"clientVersion\":\"" + clientVersion + "\"}}" + - ",\"videoId\":\"" + getId() + "\"}").getBytes("UTF-8"); - final Response response = getDownloader().execute(Request.newBuilder() - .post(metadataUrl, dataBody) - .addHeader("Content-Type", "application/json") - .build()); - final JsonObject jsonObject = JsonParser.object().from(response.responseBody()); - - for (Object actionEntry : jsonObject.getArray("actions")) { - if (!(actionEntry instanceof JsonObject)) continue; - final JsonObject entry = (JsonObject) actionEntry; - - final JsonObject updateViewershipAction = entry.getObject("updateViewershipAction", null); - if (updateViewershipAction == null) continue; - - final JsonArray viewCountRuns = JsonUtils.getArray(updateViewershipAction, "viewership.videoViewCountRenderer.viewCount.runs"); - if (viewCountRuns.isEmpty()) continue; - - final JsonObject textObject = viewCountRuns.getObject(0); - if (!textObject.has("text")) { - throw new ExtractionException("Response don't have \"text\" element"); - } - - return Long.parseLong(Utils.removeNonDigitCharacters(textObject.getString("text"))); - } - - throw new ExtractionException("Could not find correct results in response"); + if (views != null) return Long.parseLong(Utils.removeNonDigitCharacters(views)); + throw new ParsingException("Could not get view count"); } @Override @@ -387,9 +330,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String likesString = ""; try { - Element button = doc.select("button.like-button-renderer-like-button").first(); try { - likesString = button.select("span.yt-uix-button-content").first().text(); + likesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar") + .getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { @@ -410,9 +353,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String dislikesString = ""; try { - Element button = doc.select("button.like-button-renderer-dislike-button").first(); try { - dislikesString = button.select("span.yt-uix-button-content").first().text(); + dislikesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar") + .getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { @@ -432,40 +375,36 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getUploaderUrl() throws ParsingException { assertPageFetched(); + String uploaderId = null; try { - return "https://www.youtube.com/channel/" + - playerResponse.getObject("videoDetails").getString("channelId"); - } catch (Exception e) { - String uploaderUrl = null; + uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + .getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + if (uploaderId == null) { try { - uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children() - .select("a").first().attr("abs:href"); + uploaderId = playerResponse.getObject("videoDetails").getString("channelId"); } catch (Exception ignored) {} - - if (uploaderUrl == null) { - throw new ParsingException("Could not get channel link", e); - } - return uploaderUrl; } + if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId; + throw new ParsingException("Could not get uploader url"); } @Nonnull @Override public String getUploaderName() throws ParsingException { assertPageFetched(); + String uploaderName = null; try { - return playerResponse.getObject("videoDetails").getString("author"); - } catch (Exception e) { - String name = null; + uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + .getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} + if (uploaderName == null) { try { - name = doc.select("div.yt-user-info").first().text(); + uploaderName = playerResponse.getObject("videoDetails").getString("author"); } catch (Exception ignored) {} - - if (name == null) { - throw new ParsingException("Could not get uploader name"); - } - return name; } + if (uploaderName != null) return uploaderName; + throw new ParsingException("Could not get uploader name"); } @Nonnull @@ -475,12 +414,19 @@ public class YoutubeStreamExtractor extends StreamExtractor { String uploaderAvatarUrl = null; try { - uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first() - .select("img").first() - .attr("abs:data-thumb"); - } catch (Exception e) {//todo: add fallback method - throw new ParsingException("Could not get uploader avatar url", e); - } + uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") + .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") + .getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail") + .getArray("thumbnails").getObject(0).getString("url"); + if (uploaderAvatarUrl != null && !uploaderAvatarUrl.isEmpty()) { + return uploaderAvatarUrl; + } + } catch (Exception ignored) {} + + try { + uploaderAvatarUrl = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + .getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception ignored) {} if (uploaderAvatarUrl == null) { throw new ParsingException("Could not get uploader avatar url"); @@ -594,13 +540,13 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override @Nonnull - public List getSubtitlesDefault() throws IOException, ExtractionException { + public List getSubtitlesDefault() { return getSubtitles(MediaFormat.TTML); } @Override @Nonnull - public List getSubtitles(final MediaFormat format) throws IOException, ExtractionException { + public List getSubtitles(final MediaFormat format) { assertPageFetched(); List subtitles = new ArrayList<>(); for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) { @@ -624,18 +570,20 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Override - public StreamInfoItem getNextStream() throws IOException, ExtractionException { + public StreamInfoItem getNextStream() throws ExtractionException { assertPageFetched(); + if (isAgeRestricted) { + return null; + } try { - StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("secondaryResults").getObject("secondaryResults").getArray("results") + .getObject(0).getObject("compactAutoplayRenderer").getArray("contents") + .getObject(0).getObject("compactVideoRenderer"); final TimeAgoParser timeAgoParser = getTimeAgoParser(); + StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Elements watch = doc.select("div[class=\"watch-sidebar-section\"]"); - if (watch.size() < 1) { - return null;// prevent the snackbar notification "report error" on age-restricted videos - } - - collector.commit(extractVideoPreviewInfo(watch.first().select("li").first(), timeAgoParser)); + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser)); return collector.getItems().get(0); } catch (Exception e) { throw new ParsingException("Could not get next video", e); @@ -643,20 +591,22 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Override - public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException { + public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException { assertPageFetched(); + if (isAgeRestricted) { + return null; + } try { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("secondaryResults").getObject("secondaryResults").getArray("results"); + final TimeAgoParser timeAgoParser = getTimeAgoParser(); - Element ul = doc.select("ul[id=\"watch-related\"]").first(); - if (ul != null) { - for (Element li : ul.children()) { - // first check if we have a playlist. If so leave them out - if (li.select("a[class*=\"content-link\"]").first() != null) { - collector.commit(extractVideoPreviewInfo(li, timeAgoParser)); - } - } + for (Object ul : results) { + final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer"); + + if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser)); } return collector; } catch (Exception e) { @@ -736,6 +686,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { isAgeRestricted = false; } playerResponse = getPlayerResponse(); + initialData = YoutubeParsingHelper.getInitialData(pageHtml); if (decryptionCode.isEmpty()) { decryptionCode = loadDecryptionCode(playerUrl); @@ -752,12 +703,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { return JsonParser.object().from(ytPlayerConfigRaw); } catch (Parser.RegexException e) { String errorReason = getErrorMessage(); - switch (errorReason) { - case "": - throw new ContentNotAvailableException("Content not available: player config empty", e); - default: - throw new ContentNotAvailableException("Content not available", e); + if (errorReason.isEmpty()) { + throw new ContentNotAvailableException("Content not available: player config empty", e); } + throw new ContentNotAvailableException("Content not available", e); } catch (Exception e) { throw new ParsingException("Could not parse yt player config", e); } @@ -912,7 +861,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Nonnull - private List getAvailableSubtitlesInfo() throws SubtitlesException { + private List getAvailableSubtitlesInfo() { // If the video is age restricted getPlayerConfig will fail if (isAgeRestricted) return Collections.emptyList(); @@ -926,7 +875,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject()); final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray()); // todo: use this to apply auto translation to different language from a source language - final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray()); +// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray()); // This check is necessary since there may be cases where subtitles metadata do not contain caption track info // e.g. https://www.youtube.com/watch?v=-Vpwatutnko @@ -983,6 +932,44 @@ public class YoutubeStreamExtractor extends StreamExtractor { // Utils //////////////////////////////////////////////////////////////////////////*/ + private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("results").getObject("results").getArray("contents"); + JsonObject videoPrimaryInfoRenderer = null; + + for (Object content : contents) { + if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) { + videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer"); + break; + } + } + + if (videoPrimaryInfoRenderer == null) { + throw new ParsingException("Could not find videoPrimaryInfoRenderer"); + } + + return videoPrimaryInfoRenderer; + } + + private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException { + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("results").getObject("results").getArray("contents"); + JsonObject videoSecondaryInfoRenderer = null; + + for (Object content : contents) { + if (((JsonObject) content).getObject("videoSecondaryInfoRenderer") != null) { + videoSecondaryInfoRenderer = ((JsonObject) content).getObject("videoSecondaryInfoRenderer"); + break; + } + } + + if (videoSecondaryInfoRenderer == null) { + throw new ParsingException("Could not find videoSecondaryInfoRenderer"); + } + + return videoSecondaryInfoRenderer; + } + @Nonnull private static String getVideoInfoUrl(final String id, final String sts) { return "https://www.youtube.com/get_video_info?" + "video_id=" + id + @@ -1026,60 +1013,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { return urlAndItags; } - /** - * Provides information about links to other videos on the video page, such as related videos. - * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo. - */ - private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li, final TimeAgoParser timeAgoParser) { - return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { - - @Override - public String getUrl() throws ParsingException { - return li.select("a.content-link").first().attr("abs:href"); - } - - @Override - public String getName() throws ParsingException { - //todo: check NullPointerException causing - return li.select("span.title").first().text(); - //this page causes the NullPointerException, after finding it by searching for "tjvg": - //https://www.youtube.com/watch?v=Uqg0aEhLFAg - } - - @Override - public String getUploaderName() throws ParsingException { - return li.select("span[class*=\"attribution\"").first() - .select("span").first().text(); - } - - @Override - public String getUploaderUrl() throws ParsingException { - return ""; // The uploader is not linked - } - - @Override - public String getTextualUploadDate() throws ParsingException { - return ""; - } - - @Override - public String getThumbnailUrl() throws ParsingException { - Element img = li.select("img").first(); - String thumbnailUrl = img.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we caught such an item. - if (thumbnailUrl.contains(".gif")) { - thumbnailUrl = img.attr("data-thumb"); - } - if (thumbnailUrl.startsWith("//")) { - thumbnailUrl = HTTPS + thumbnailUrl; - } - return thumbnailUrl; - } - }; - } - @Nonnull @Override public List getFrames() throws ExtractionException { @@ -1137,40 +1070,44 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } + @Nonnull @Override - public String getHost() throws ParsingException { + public String getHost() { + return ""; + } + + @Nonnull + @Override + public String getPrivacy() { + return ""; + } + + @Nonnull + @Override + public String getCategory() { + return ""; + } + + @Nonnull + @Override + public String getLicence() { return ""; } @Override - public String getPrivacy() throws ParsingException { - return ""; - } - - @Override - public String getCategory() throws ParsingException { - return ""; - } - - @Override - public String getLicence() throws ParsingException { - return ""; - } - - @Override - public Locale getLanguageInfo() throws ParsingException { + public Locale getLanguageInfo() { return null; } @Nonnull @Override - public List getTags() throws ParsingException { + public List getTags() { return new ArrayList<>(); } @Nonnull @Override - public String getSupportInfo() throws ParsingException { + public String getSupportInfo() { return ""; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index eda36d21e..2010cfb5e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -1,19 +1,19 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; /* * Copyright (C) Christian Schabesberger 2016 @@ -35,263 +35,190 @@ import java.util.Date; public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { - private final Element item; + private JsonObject videoInfo; private final TimeAgoParser timeAgoParser; - private String cachedUploadDate; - /** * Creates an extractor of StreamInfoItems from a YouTube page. * - * @param item The page element + * @param videoInfoItem The JSON page element * @param timeAgoParser A parser of the textual dates or {@code null}. */ - public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) { - this.item = item; + public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) { + this.videoInfo = videoInfoItem; this.timeAgoParser = timeAgoParser; } @Override - public StreamType getStreamType() throws ParsingException { - if (isLiveStream(item)) { - return StreamType.LIVE_STREAM; - } else { - return StreamType.VIDEO_STREAM; - } + public StreamType getStreamType() { + try { + if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) { + return StreamType.LIVE_STREAM; + } + } catch (Exception ignored) {} + return StreamType.VIDEO_STREAM; } @Override public boolean isAd() throws ParsingException { - return !item.select("span[class*=\"icon-not-available\"]").isEmpty() - || !item.select("span[class*=\"yt-badge-ad\"]").isEmpty() - || isPremiumVideo(); - } - - private boolean isPremiumVideo() { - Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first(); - if (premiumSpan == null) return false; - - // if this span has text it most likely says ("Free Video") so we can play this - if (premiumSpan.hasText()) return false; - return true; + return isPremium() || getName().equals("[Private video]") || getName().equals("[Deleted video]"); } @Override public String getUrl() throws ParsingException { try { - Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.attr("abs:href"); + String videoId = videoInfo.getString("videoId"); + return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); + throw new ParsingException("Could not get url", e); } } @Override public String getName() throws ParsingException { + String name = null; try { - Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.text(); - } catch (Exception e) { - throw new ParsingException("Could not get title", e); + name = videoInfo.getObject("title").getString("simpleText"); + } catch (Exception ignored) {} + if (name == null) { + try { + name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} } + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get name"); } @Override public long getDuration() throws ParsingException { try { if (getStreamType() == StreamType.LIVE_STREAM) return -1; - - final Element duration = item.select("span[class*=\"video-time\"]").first(); - // apparently on youtube, video-time element will not show up if the video has a duration of 00:00 - // see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf - return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text()); + return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); } catch (Exception e) { - throw new ParsingException("Could not get Duration: " + getUrl(), e); + throw new ParsingException("Could not get duration", e); } } @Override public String getUploaderName() throws ParsingException { + String name = null; try { - return item.select("div[class=\"yt-lockup-byline\"]").first() - .select("a").first() - .text(); - } catch (Exception e) { - throw new ParsingException("Could not get uploader", e); + name = videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception ignored) {} + if (name == null) { + try { + name = videoInfo.getObject("ownerText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception ignored) {} } + if (name == null) { + try { + name = videoInfo.getObject("shortBylineText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception ignored) {} + } + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get uploader name"); } @Override public String getUploaderUrl() throws ParsingException { - // this url is not always in the form "/channel/..." - // sometimes Youtube provides urls in the from "/user/..." try { + String id = null; try { - return item.select("div[class=\"yt-lockup-byline\"]").first() - .select("a").first() - .attr("abs:href"); - } catch (Exception e){} - - // try this if the first didn't work - return item.select("span[class=\"title\"") - .text().split(" - ")[0]; + id = videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + if (id == null) { + try { + id = videoInfo.getObject("ownerText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + } + if (id == null) { + try { + id = videoInfo.getObject("shortBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + } + if (id == null || id.isEmpty()) { + throw new IllegalArgumentException("is empty"); + } + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { - System.out.println(item.html()); - throw new ParsingException("Could not get uploader url", e); + throw new ParsingException("Could not get uploader url"); } } @Nullable @Override - public String getTextualUploadDate() throws ParsingException { - if (getStreamType().equals(StreamType.LIVE_STREAM)) { - return null; - } - - if (cachedUploadDate != null) { - return cachedUploadDate; - } - + public String getTextualUploadDate() { try { - if (isVideoReminder()) { - final Calendar calendar = getDateFromReminder(); - if (calendar != null) { - return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm") - .format(calendar.getTime()); - } - } - - - Element meta = item.select("div[class=\"yt-lockup-meta\"]").first(); - if (meta == null) return ""; - - final Elements li = meta.select("li"); - if (li.isEmpty()) return ""; - - return cachedUploadDate = li.first().text(); + return videoInfo.getObject("publishedTimeText").getString("simpleText"); } catch (Exception e) { - throw new ParsingException("Could not get upload date", e); + // upload date is not always available, e.g. in playlists + return null; } } @Nullable @Override public DateWrapper getUploadDate() throws ParsingException { - if (getStreamType().equals(StreamType.LIVE_STREAM)) { - return null; - } - - if (isVideoReminder()) { - return new DateWrapper(getDateFromReminder()); - } - String textualUploadDate = getTextualUploadDate(); if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) { - return timeAgoParser.parse(textualUploadDate); - } else { - return null; + try { + return timeAgoParser.parse(textualUploadDate); + } catch (ParsingException e) { + throw new ParsingException("Could not get upload date", e); + } } + return null; } @Override public long getViewCount() throws ParsingException { - String input; - - final Element spanViewCount = item.select("span.view-count").first(); - if (spanViewCount != null) { - input = spanViewCount.text(); - - } else if (getStreamType().equals(StreamType.LIVE_STREAM)) { - Element meta = item.select("ul.yt-lockup-meta-info").first(); - if (meta == null) return 0; - - final Elements li = meta.select("li"); - if (li.isEmpty()) return 0; - - input = li.first().text(); - } else { - try { - Element meta = item.select("div.yt-lockup-meta").first(); - if (meta == null) return -1; - - // This case can happen if google releases a special video - if (meta.select("li").size() < 2) return -1; - - input = meta.select("li").get(1).text(); - } catch (IndexOutOfBoundsException e) { - throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e); - } - } - - if (input == null) { - throw new ParsingException("Input is null"); - } - try { - - return Long.parseLong(Utils.removeNonDigitCharacters(input)); - } catch (NumberFormatException e) { - // if this happens the video probably has no views - if (!input.isEmpty()) { - return 0; + if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) { + return -1; } - - throw new ParsingException("Could not handle input: " + input, e); + String viewCount; + if (getStreamType() == StreamType.LIVE_STREAM) { + viewCount = videoInfo.getObject("viewCountText") + .getArray("runs").getObject(0).getString("text"); + } else { + viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); + } + if (viewCount.equals("Recommended for you")) return -1; + return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); + } catch (Exception e) { + throw new ParsingException("Could not get view count", e); } } @Override public String getThumbnailUrl() throws ParsingException { try { - String url; - Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we've caught such an item. - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } - return url; + // TODO: Don't simply get the first item, but look at all thumbnails and their resolution + return videoInfo.getObject("thumbnail").getArray("thumbnails") + .getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); } } - - private boolean isVideoReminder() { - return !item.select("span.yt-uix-livereminder").isEmpty(); - } - - private Calendar getDateFromReminder() throws ParsingException { - final Element timeFuture = item.select("span.yt-badge.localized-date").first(); - - if (timeFuture == null) { - throw new ParsingException("Span timeFuture is null"); - } - - final String timestamp = timeFuture.attr("data-timestamp"); - if (!timestamp.isEmpty()) { - try { - final Calendar calendar = Calendar.getInstance(); - calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L)); - return calendar; - } catch (Exception e) { - throw new ParsingException("Could not parse = \"" + timestamp + "\""); + private boolean isPremium() { + try { + JsonArray badges = videoInfo.getArray("badges"); + for (Object badge : badges) { + if (((JsonObject) badge).getObject("metadataBadgeRenderer").getString("label").equals("Premium")) { + return true; + } } - } - - throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\""); - } - - /** - * Generic method that checks if the element contains any clues that it's a livestream item - */ - protected static boolean isLiveStream(Element item) { - return !item.select("span[class*=\"yt-badge-live\"]").isEmpty() - || !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty(); + } catch (Exception ignored) {} + return false; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index be5820de7..649cdf4e7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -20,9 +20,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; * along with NewPipe. If not, see . */ -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; @@ -35,12 +35,12 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import javax.annotation.Nonnull; import java.io.IOException; -public class YoutubeTrendingExtractor extends KioskExtractor { +import javax.annotation.Nonnull; - private Document doc; +public class YoutubeTrendingExtractor extends KioskExtractor { + private JsonObject initialData; public YoutubeTrendingExtractor(StreamingService service, ListLinkHandler linkHandler, @@ -54,7 +54,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor { "?gl=" + getExtractorContentCountry().getCountryCode(); final Response response = downloader.get(url, getExtractorLocalization()); - doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } @Override @@ -70,99 +70,36 @@ public class YoutubeTrendingExtractor extends KioskExtractor { @Nonnull @Override public String getName() throws ParsingException { + String name; try { - Element a = doc.select("a[href*=\"/feed/trending\"]").first(); - Element span = a.select("span[class*=\"display-name\"]").first(); - Element nameSpan = span.select("span").first(); - return nameSpan.text(); + name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title") + .getArray("runs").getObject(0).getString("text"); } catch (Exception e) { throw new ParsingException("Could not get Trending name", e); } + if (name != null && !name.isEmpty()) { + return name; + } + throw new ParsingException("Could not get Trending name"); } @Nonnull @Override - public InfoItemsPage getInitialPage() throws ParsingException { + public InfoItemsPage getInitialPage() { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Elements uls = doc.select("ul[class*=\"expanded-shelf-content-list\"]"); + JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0).getObject("itemSectionRenderer") + .getArray("contents").getObject(0).getObject("shelfRenderer").getObject("content") + .getObject("expandedShelfContentsRenderer").getArray("items"); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (Element ul : uls) { - for (final Element li : ul.children()) { - final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first(); - collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { - @Override - public String getUrl() throws ParsingException { - try { - Element dl = el.select("h3").first().select("a").first(); - return dl.attr("abs:href"); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } - - @Override - public String getName() throws ParsingException { - try { - Element dl = el.select("h3").first().select("a").first(); - return dl.text(); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } - - @Override - public String getUploaderUrl() throws ParsingException { - try { - String link = getUploaderLink().attr("abs:href"); - if (link.isEmpty()) { - throw new IllegalArgumentException("is empty"); - } - return link; - } catch (Exception e) { - throw new ParsingException("Could not get Uploader name"); - } - } - - private Element getUploaderLink() { - // this url is not always in the form "/channel/..." - // sometimes Youtube provides urls in the from "/user/..." - Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first(); - return uploaderEl.select("a").first(); - } - - @Override - public String getUploaderName() throws ParsingException { - try { - return getUploaderLink().text(); - } catch (Exception e) { - throw new ParsingException("Could not get Uploader name"); - } - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - String url; - Element te = li.select("span[class=\"yt-thumb-simple\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we've caught such an item. - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } - return url; - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }); - } + for (Object ul : firstPageElements) { + final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer"); + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser)); } - return new InfoItemsPage<>(collector, getNextPageUrl()); + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 65ec7e3f6..51347d423 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -1,11 +1,16 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.utils.Parser; import java.net.URL; import java.text.ParseException; @@ -38,6 +43,8 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } + public static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00"; + private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user="; @@ -143,4 +150,68 @@ public class YoutubeParsingHelper { uploadDate.setTime(date); return uploadDate; } + + public static JsonObject getInitialData(String html) throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } + } + + /** + * Get the client version from a page + * @param initialData + * @param html The page HTML + * @return + * @throws ParsingException + */ + public static String getClientVersion(JsonObject initialData, String html) throws ParsingException { + if (initialData == null) initialData = getInitialData(html); + JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); + String shortClientVersion = null; + + // try to get version from initial data first + for (Object service : serviceTrackingParams) { + JsonObject s = (JsonObject) service; + if (s.getString("service").equals("CSI")) { + JsonArray params = s.getArray("params"); + for (Object param: params) { + JsonObject p = (JsonObject) param; + String key = p.getString("key"); + if (key != null && key.equals("cver")) { + return p.getString("value"); + } + } + } else if (s.getString("service").equals("ECATCHER")) { + // fallback to get a shortened client version which does not contain the last do digits + JsonArray params = s.getArray("params"); + for (Object param: params) { + JsonObject p = (JsonObject) param; + String key = p.getString("key"); + if (key != null && key.equals("client.version")) { + shortClientVersion = p.getString("value"); + } + } + } + } + + String clientVersion; + String[] patterns = { + "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", + "innertube_context_client_version\":\"([0-9\\.]+?)\"", + "client.version=([0-9\\.]+)" + }; + for (String pattern: patterns) { + try { + clientVersion = Parser.matchGroup1(pattern, html); + if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion; + } catch (Exception ignored) {} + } + + if (shortClientVersion != null) return shortClientVersion; + + throw new ParsingException("Could not get client version"); + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java index c17600742..13481b345 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java @@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory public String getUrl(String searchString, List contentFilters, String sortFilter) throws ParsingException { try { final String url = "https://www.youtube.com/results" - + "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8); + + "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8); if (contentFilters.size() > 0) { switch (contentFilters.get(0)) { - case VIDEOS: return url + "&sp=EgIQAVAU"; - case CHANNELS: return url + "&sp=EgIQAlAU"; - case PLAYLISTS: return url + "&sp=EgIQA1AU"; + case VIDEOS: return url + "&sp=EgIQAQ%253D%253D"; + case CHANNELS: return url + "&sp=EgIQAg%253D%253D"; + case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D"; case ALL: default: } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index ebd0ba16a..76aa2944f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -10,6 +10,9 @@ import java.util.List; public class Utils { + public static final String HTTP = "http://"; + public static final String HTTPS = "https://"; + private Utils() { //no instance } @@ -83,9 +86,6 @@ public class Utils { } } - private static final String HTTP = "http://"; - private static final String HTTPS = "https://"; - public static String replaceHttpWithHttps(final String url) { if (url == null) return null; diff --git a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java index 948975a05..e524ac8d4 100644 --- a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java +++ b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java @@ -20,7 +20,7 @@ import java.util.Map; public class DownloaderTestImpl extends Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0"; private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en"; private static DownloaderTestImpl instance = null; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index 317bd4fa4..fc4ffff31 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -170,7 +170,7 @@ public class YoutubeChannelExtractorTest { @Test public void testDescription() throws Exception { assertTrue("What it actually was: " + extractor.getDescription(), - extractor.getDescription().contains("Our World is Amazing. Questions? Ideas? Tweet me:")); + extractor.getDescription().contains("Our World is Amazing. \n\nQuestions? Ideas? Tweet me:")); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java index 63fc0375a..7add41262 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java @@ -12,6 +12,8 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; +import java.util.regex.Pattern; + import static java.util.Arrays.asList; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.ServiceList.YouTube; @@ -51,7 +53,12 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto @Test public void testGetSecondPageUrl() throws Exception { - assertEquals("https://www.youtube.com/results?q=pewdiepie&sp=EgIQAlAU&gl=GB&page=2", extractor.getNextPageUrl()); + // check that ctoken, continuation and itct are longer than 5 characters + Pattern pattern = Pattern.compile( + "https:\\/\\/www.youtube.com\\/results\\?search_query=pewdiepie&sp=EgIQAg%253D%253D&gl=GB&pbj=1" + + "&ctoken=[\\w%]{5,}?&continuation=[\\w%]{5,}?&itct=[\\w]{5,}?" + ); + assertTrue(pattern.matcher(extractor.getNextPageUrl()).find()); } @Ignore diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java index 60bbf2ff7..8777cc701 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java @@ -28,13 +28,13 @@ public class YoutubeSearchQHTest { @Test public void testWithContentfilter() throws Exception { - assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAVAU", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAQ%253D%253D", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl()); - assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAlAU", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAg%253D%253D", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl()); - assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQA1AU", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAw%253D%253D", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl()); - assertEquals("https://www.youtube.com/results?q=asdf", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl()); }