From d097363b24dd9f82421ebe5677fad1c07d1b8cd1 Mon Sep 17 00:00:00 2001 From: Christian Schabesberger Date: Tue, 2 Feb 2016 18:43:20 +0100 Subject: [PATCH] restructure parser --- .../newpipe/VideoItemDetailActivity.java | 2 +- .../newpipe/VideoItemDetailFragment.java | 2 +- .../schabi/newpipe/crawler/DashMpdParser.java | 101 +++++++++ .../schabi/newpipe/crawler/RegexHelper.java | 47 +++++ .../newpipe/crawler/StreamingService.java | 10 +- .../schabi/newpipe/crawler/UrlIdHandler.java | 32 +++ .../newpipe/crawler/VideoExtractor.java | 118 +---------- .../org/schabi/newpipe/crawler/VideoInfo.java | 45 ++++ .../services/youtube/YoutubeSearchEngine.java | 6 +- .../services/youtube/YoutubeService.java | 13 +- .../services/youtube/YoutubeUrlIdHandler.java | 68 ++++++ .../youtube/YoutubeVideoExtractor.java | 195 +++++------------- 12 files changed, 365 insertions(+), 274 deletions(-) create mode 100644 app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java create mode 100644 app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java create mode 100644 app/src/main/java/org/schabi/newpipe/crawler/UrlIdHandler.java create mode 100644 app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeUrlIdHandler.java diff --git a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java index 8514160d4..b61095431 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java +++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java @@ -72,7 +72,7 @@ public class VideoItemDetailActivity extends AppCompatActivity { StreamingService[] serviceList = ServiceList.getServices(); //VideoExtractor videoExtractor = null; for (int i = 0; i < serviceList.length; i++) { - if (serviceList[i].acceptUrl(videoUrl)) { + if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) { arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i); currentStreamingService = i; //videoExtractor = ServiceList.getService(i).getExtractorInstance(); diff --git a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java index ff3b94933..f45cff230 100644 --- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java +++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java @@ -112,7 +112,7 @@ public class VideoItemDetailFragment extends Fragment { public void run() { try { videoExtractor = service.getExtractorInstance(videoUrl, new Downloader()); - VideoInfo videoInfo = videoExtractor.getVideoInfo(); + VideoInfo videoInfo = VideoInfo.getVideoInfo(videoExtractor, new Downloader()); h.post(new VideoResultReturnedRunnable(videoInfo)); h.post(new SetThumbnailRunnable( //todo: make bitmaps not bypass tor diff --git a/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java b/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java new file mode 100644 index 000000000..b893599dd --- /dev/null +++ b/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java @@ -0,0 +1,101 @@ +package org.schabi.newpipe.crawler; + +import android.util.Xml; + +import org.xmlpull.v1.XmlPullParser; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Vector; + +/** + * Created by Christian Schabesberger on 02.02.16. + * + * Copyright (C) Christian Schabesberger 2016 + * DashMpdParser.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public class DashMpdParser { + + static class DashMpdParsingException extends ParsingException { + DashMpdParsingException(String message, Exception e) { + super(message, e); + } + } + + public static VideoInfo.AudioStream[] getAudioStreams(String dashManifestUrl, + Downloader downloader) + throws DashMpdParsingException { + String dashDoc; + try { + dashDoc = downloader.download(dashManifestUrl); + } catch(IOException ioe) { + throw new DashMpdParsingException("Could not get dash mpd: " + dashManifestUrl, ioe); + } + Vector audioStreams = new Vector<>(); + try { + XmlPullParser parser = Xml.newPullParser(); + parser.setInput(new StringReader(dashDoc)); + String tagName = ""; + String currentMimeType = ""; + int currentBandwidth = -1; + int currentSamplingRate = -1; + boolean currentTagIsBaseUrl = false; + for(int eventType = parser.getEventType(); + eventType != XmlPullParser.END_DOCUMENT; + eventType = parser.next() ) { + switch(eventType) { + case XmlPullParser.START_TAG: + tagName = parser.getName(); + if(tagName.equals("AdaptationSet")) { + currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType"); + } else if(tagName.equals("Representation") && currentMimeType.contains("audio")) { + currentBandwidth = Integer.parseInt( + parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth")); + currentSamplingRate = Integer.parseInt( + parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate")); + } else if(tagName.equals("BaseURL")) { + currentTagIsBaseUrl = true; + } + break; + + case XmlPullParser.TEXT: + if(currentTagIsBaseUrl && + (currentMimeType.contains("audio"))) { + int format = -1; + if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) { + format = MediaFormat.WEBMA.id; + } else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) { + format = MediaFormat.M4A.id; + } + audioStreams.add(new VideoInfo.AudioStream(parser.getText(), + format, currentBandwidth, currentSamplingRate)); + } + break; + case XmlPullParser.END_TAG: + if(tagName.equals("AdaptationSet")) { + currentMimeType = ""; + } else if(tagName.equals("BaseURL")) { + currentTagIsBaseUrl = false; + }//no break needed here + } + } + } catch(Exception e) { + throw new DashMpdParsingException("Could not parse Dash mpd", e); + } + return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]); + } +} diff --git a/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java b/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java new file mode 100644 index 000000000..a82386182 --- /dev/null +++ b/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java @@ -0,0 +1,47 @@ +package org.schabi.newpipe.crawler; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Created by Christian Schabesberger on 02.02.16. + * + * Copyright (C) Christian Schabesberger 2016 + * RegexHelper.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +/** avoid using regex !!! */ +public class RegexHelper { + + public static class RegexException extends ParsingException { + public RegexException(String message) { + super(message); + } + } + + public static String matchGroup1(String pattern, String input) throws RegexException { + Pattern pat = Pattern.compile(pattern); + Matcher mat = pat.matcher(input); + boolean foundMatch = mat.find(); + if (foundMatch) { + return mat.group(1); + } + else { + //Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\""); + throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\""); + } + } +} diff --git a/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java b/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java index 68c3da265..f34a40db8 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java @@ -27,11 +27,11 @@ public interface StreamingService { public String name = ""; } ServiceInfo getServiceInfo(); - VideoExtractor getExtractorInstance(String url, Downloader downloader) throws IOException, CrawlingException; + VideoExtractor getExtractorInstance(String url, Downloader downloader) + throws IOException, CrawlingException; SearchEngine getSearchEngineInstance(); - /**When a VIEW_ACTION is caught this function will test if the url delivered within the calling - Intent was meant to be watched with this Service. - Return false if this service shall not allow to be called through ACTIONs.*/ - boolean acceptUrl(String videoUrl); + UrlIdHandler getUrlIdHandler(); + + } diff --git a/app/src/main/java/org/schabi/newpipe/crawler/UrlIdHandler.java b/app/src/main/java/org/schabi/newpipe/crawler/UrlIdHandler.java new file mode 100644 index 000000000..5ff0a2b7d --- /dev/null +++ b/app/src/main/java/org/schabi/newpipe/crawler/UrlIdHandler.java @@ -0,0 +1,32 @@ +package org.schabi.newpipe.crawler; + +/** + * Created by Christian Schabesberger on 02.02.16. + * + * Copyright (C) Christian Schabesberger 2016 + * UrlIdHandler.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public interface UrlIdHandler { + String getVideoUrl(String videoId); + String getVideoId(String siteUrl) throws ParsingException; + String cleanUrl(String siteUrl) throws ParsingException; + + /**When a VIEW_ACTION is caught this function will test if the url delivered within the calling + Intent was meant to be watched with this Service. + Return false if this service shall not allow to be called through ACTIONs.*/ + boolean acceptUrl(String videoUrl); +} diff --git a/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java b/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java index ac165c19e..41a63ebdb 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java @@ -20,14 +20,14 @@ package org.schabi.newpipe.crawler; * along with NewPipe. If not, see . */ -import java.util.List; +import java.net.URL; import java.util.Vector; /**Scrapes information from a video streaming service (eg, YouTube).*/ @SuppressWarnings("ALL") -public abstract class VideoExtractor { +public interface VideoExtractor { public class ExctractorInitException extends CrawlingException { public ExctractorInitException() {} @@ -42,13 +42,6 @@ public abstract class VideoExtractor { } } - public class RegexException extends ParsingException { - public RegexException() {} - public RegexException(String message) { - super(message); - } - } - public class ContentNotAvailableException extends ParsingException { public ContentNotAvailableException() {} public ContentNotAvailableException(String message) { @@ -62,111 +55,6 @@ public abstract class VideoExtractor { } } - protected final String pageUrl; - protected VideoInfo videoInfo; - - @SuppressWarnings("WeakerAccess") - public VideoExtractor(String url, Downloader dl) { - this.pageUrl = url; - } - - /**Fills out the video info fields which are common to all services. - * Probably needs to be overridden by subclasses*/ - public VideoInfo getVideoInfo() throws CrawlingException - { - if(videoInfo == null) { - videoInfo = new VideoInfo(); - } - - if(videoInfo.webpage_url.isEmpty()) { - videoInfo.webpage_url = pageUrl; - } - - - if (videoInfo.title.isEmpty()) { - videoInfo.title = getTitle(); - } - - if (videoInfo.duration < 1) { - videoInfo.duration = getLength(); - } - - - if (videoInfo.uploader.isEmpty()) { - videoInfo.uploader = getUploader(); - } - - if (videoInfo.description.isEmpty()) { - videoInfo.description = getDescription(); - } - - if (videoInfo.view_count == -1) { - videoInfo.view_count = getViews(); - } - - if (videoInfo.upload_date.isEmpty()) { - videoInfo.upload_date = getUploadDate(); - } - - if (videoInfo.thumbnail_url.isEmpty()) { - videoInfo.thumbnail_url = getThumbnailUrl(); - } - - if (videoInfo.id.isEmpty()) { - videoInfo.id = getVideoId(pageUrl); - } - - /** Load and extract audio*/ - if (videoInfo.audioStreams == null) { - videoInfo.audioStreams = getAudioStreams(); - } - /** Extract video stream url*/ - if (videoInfo.videoStreams == null) { - videoInfo.videoStreams = getVideoStreams(); - } - - if (videoInfo.uploader_thumbnail_url.isEmpty()) { - videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl(); - } - - if (videoInfo.startPosition < 0) { - videoInfo.startPosition = getTimeStamp(); - } - - if(videoInfo.dashMpdUrl.isEmpty()) { - videoInfo.dashMpdUrl = getDashMpdUrl(); - } - - if(videoInfo.average_rating.isEmpty()) { - videoInfo.average_rating = getAverageRating(); - } - - if(videoInfo.like_count == -1) { - videoInfo.like_count = getLikeCount(); - } - - if(videoInfo.dislike_count == -1) { - videoInfo.dislike_count = getDislikeCount(); - } - - if(videoInfo.nextVideo == null) { - videoInfo.nextVideo = getNextVideo(); - } - - if(videoInfo.relatedVideos == null) { - videoInfo.relatedVideos = getRelatedVideos(); - } - - //Bitmap thumbnail = null; - //Bitmap uploader_thumbnail = null; - //int videoAvailableStatus = VIDEO_AVAILABLE; - return videoInfo; - } - - //todo: remove these functions, or make them static, otherwise its useles, to have them here - public abstract String getVideoUrl(String videoId); - public abstract String getVideoId(String siteUrl) throws ParsingException; - /////////////////////////////////////////////////////////////////////////////////////////// public abstract int getTimeStamp() throws ParsingException; public abstract String getTitle() throws ParsingException; public abstract String getDescription() throws ParsingException; @@ -185,4 +73,6 @@ public abstract class VideoExtractor { public abstract int getDislikeCount() throws ParsingException; public abstract VideoPreviewInfo getNextVideo() throws ParsingException; public abstract Vector getRelatedVideos() throws ParsingException; + public abstract UrlIdHandler getUrlIdConverter(); + public abstract String getPageUrl(); } diff --git a/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java b/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java index fbea6d0fc..71503b271 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.crawler; +import java.io.IOException; import java.util.List; /** @@ -26,8 +27,52 @@ import java.util.List; @SuppressWarnings("ALL") public class VideoInfo extends AbstractVideoInfo { + /**Fills out the video info fields which are common to all services. + * Probably needs to be overridden by subclasses*/ + public static VideoInfo getVideoInfo(VideoExtractor extractor, Downloader downloader) + throws CrawlingException, IOException { + VideoInfo videoInfo = new VideoInfo(); + + UrlIdHandler uiconv = extractor.getUrlIdConverter(); + + videoInfo.webpage_url = extractor.getPageUrl(); + videoInfo.title = extractor.getTitle(); + videoInfo.duration = extractor.getLength(); + videoInfo.uploader = extractor.getUploader(); + videoInfo.description = extractor.getDescription(); + videoInfo.view_count = extractor.getViews(); + videoInfo.upload_date = extractor.getUploadDate(); + videoInfo.thumbnail_url = extractor.getThumbnailUrl(); + videoInfo.id = uiconv.getVideoId(extractor.getPageUrl()); + videoInfo.dashMpdUrl = extractor.getDashMpdUrl(); + /** Load and extract audio*/ + videoInfo.audioStreams = extractor.getAudioStreams(); + if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) { + if(videoInfo.audioStreams == null || videoInfo.audioStreams.length == 0) { + videoInfo.audioStreams = + DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader); + } + } + /** Extract video stream url*/ + videoInfo.videoStreams = extractor.getVideoStreams(); + videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl(); + videoInfo.startPosition = extractor.getTimeStamp(); + videoInfo.average_rating = extractor.getAverageRating(); + videoInfo.like_count = extractor.getLikeCount(); + videoInfo.dislike_count = extractor.getDislikeCount(); + videoInfo.nextVideo = extractor.getNextVideo(); + videoInfo.relatedVideos = extractor.getRelatedVideos(); + + //Bitmap thumbnail = null; + //Bitmap uploader_thumbnail = null; + //int videoAvailableStatus = VIDEO_AVAILABLE; + return videoInfo; + } + + public String uploader_thumbnail_url = ""; public String description = ""; + /*todo: make this lists over vectors*/ public VideoStream[] videoStreams = null; public AudioStream[] audioStreams = null; // video streams provided by the dash mpd do not need to be provided as VideoStream. diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java index 9d7ce88ef..a6d4857c1 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java @@ -52,7 +52,8 @@ public class YoutubeSearchEngine implements SearchEngine { private static final String TAG = YoutubeSearchEngine.class.toString(); @Override - public Result search(String query, int page, String languageCode, Downloader downloader) throws IOException, ParsingException { + public Result search(String query, int page, String languageCode, Downloader downloader) + throws IOException, ParsingException { Result result = new Result(); Uri.Builder builder = new Uri.Builder(); builder.scheme("https") @@ -171,7 +172,8 @@ public class YoutubeSearchEngine implements SearchEngine { try { dBuilder = dbFactory.newDocumentBuilder(); - doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("utf-8")))); + doc = dBuilder.parse(new InputSource( + new ByteArrayInputStream(response.getBytes("utf-8")))); doc.getDocumentElement().normalize(); } catch (ParserConfigurationException | SAXException | IOException e) { e.printStackTrace(); diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java index 1a765ab66..40d7d41f7 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.crawler.services.youtube; import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.Downloader; import org.schabi.newpipe.crawler.StreamingService; +import org.schabi.newpipe.crawler.UrlIdHandler; import org.schabi.newpipe.crawler.VideoExtractor; import org.schabi.newpipe.crawler.SearchEngine; @@ -37,8 +38,10 @@ public class YoutubeService implements StreamingService { return serviceInfo; } @Override - public VideoExtractor getExtractorInstance(String url, Downloader downloader) throws CrawlingException, IOException { - if(acceptUrl(url)) { + public VideoExtractor getExtractorInstance(String url, Downloader downloader) + throws CrawlingException, IOException { + UrlIdHandler urlIdHandler = new YoutubeUrlIdHandler(); + if(urlIdHandler.acceptUrl(url)) { return new YoutubeVideoExtractor(url, downloader) ; } else { @@ -49,9 +52,9 @@ public class YoutubeService implements StreamingService { public SearchEngine getSearchEngineInstance() { return new YoutubeSearchEngine(); } + @Override - public boolean acceptUrl(String videoUrl) { - return videoUrl.contains("youtube") || - videoUrl.contains("youtu.be"); + public UrlIdHandler getUrlIdHandler() { + return new YoutubeUrlIdHandler(); } } diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeUrlIdHandler.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeUrlIdHandler.java new file mode 100644 index 000000000..d0ccafafa --- /dev/null +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeUrlIdHandler.java @@ -0,0 +1,68 @@ +package org.schabi.newpipe.crawler.services.youtube; + +import org.schabi.newpipe.crawler.ParsingException; +import org.schabi.newpipe.crawler.RegexHelper; +import org.schabi.newpipe.crawler.UrlIdHandler; + +/** + * Created by Christian Schabesberger on 02.02.16. + * + * Copyright (C) Christian Schabesberger 2016 + * YoutubeUrlIdHandler.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public class YoutubeUrlIdHandler implements UrlIdHandler { + @SuppressWarnings("WeakerAccess") + @Override + public String getVideoUrl(String videoId) { + return "https://www.youtube.com/watch?v=" + videoId; + } + + @SuppressWarnings("WeakerAccess") + @Override + public String getVideoId(String url) throws ParsingException { + String id; + String pat; + + if(url.contains("youtube")) { + pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})"; + } + else if(url.contains("youtu.be")) { + pat = "youtu\\.be/([a-zA-Z0-9_-]{11})"; + } + else { + throw new ParsingException("Error no suitable url: " + url); + } + + id = RegexHelper.matchGroup1(pat, url); + if(!id.isEmpty()){ + //Log.i(TAG, "string \""+url+"\" matches!"); + return id; + } else { + throw new ParsingException("Error could not parse url: " + url); + } + } + + public String cleanUrl(String complexUrl) throws ParsingException { + return getVideoUrl(getVideoId(complexUrl)); + } + + @Override + public boolean acceptUrl(String videoUrl) { + return videoUrl.contains("youtube") || + videoUrl.contains("youtu.be"); + } +} diff --git a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java index ced24cc95..2baae72ed 100644 --- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java +++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java @@ -15,6 +15,8 @@ import org.mozilla.javascript.ScriptableObject; import org.schabi.newpipe.crawler.CrawlingException; import org.schabi.newpipe.crawler.Downloader; import org.schabi.newpipe.crawler.ParsingException; +import org.schabi.newpipe.crawler.RegexHelper; +import org.schabi.newpipe.crawler.UrlIdHandler; import org.schabi.newpipe.crawler.VideoExtractor; import org.schabi.newpipe.crawler.MediaFormat; import org.schabi.newpipe.crawler.VideoInfo; @@ -25,11 +27,8 @@ import java.io.IOException; import java.io.StringReader; import java.net.URLDecoder; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Vector; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Created by Christian Schabesberger on 06.08.15. @@ -51,7 +50,7 @@ import java.util.regex.Pattern; * along with NewPipe. If not, see . */ -public class YoutubeVideoExtractor extends VideoExtractor { +public class YoutubeVideoExtractor implements VideoExtractor { public class DecryptException extends ParsingException { DecryptException(Throwable cause) { @@ -75,7 +74,6 @@ public class YoutubeVideoExtractor extends VideoExtractor { private static final String TAG = YoutubeVideoExtractor.class.toString(); private final Document doc; private JSONObject playerArgs; - private String errorMessage = ""; // static values private static final String DECRYPTION_FUNC_NAME="decrypt"; @@ -83,23 +81,27 @@ public class YoutubeVideoExtractor extends VideoExtractor { // cached values private static volatile String decryptionCode = ""; + UrlIdHandler urlidhandler = new YoutubeUrlIdHandler(); + String pageUrl = ""; + private Downloader downloader; public YoutubeVideoExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException { //most common videoInfo fields are now set in our superclass, for all services - super(pageUrl, dl); downloader = dl; - String pageContent = downloader.download(cleanUrl(pageUrl)); + this.pageUrl = urlidhandler.cleanUrl(pageUrl); + String pageContent = downloader.download(this.pageUrl); doc = Jsoup.parse(pageContent, pageUrl); String ytPlayerConfigRaw; JSONObject ytPlayerConfig; //attempt to load the youtube js player JSON arguments try { - ytPlayerConfigRaw = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent); + ytPlayerConfigRaw = + RegexHelper.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent); ytPlayerConfig = new JSONObject(ytPlayerConfigRaw); playerArgs = ytPlayerConfig.getJSONObject("args"); - } catch (RegexException e) { + } catch (RegexHelper.RegexException e) { String errorReason = findErrorReason(doc); switch(errorReason) { case "GEMA": @@ -233,7 +235,16 @@ public class YoutubeVideoExtractor extends VideoExtractor { @Override public String getDashMpdUrl() throws ParsingException { try { - return playerArgs.getString("dashmpd"); + String dashManifest = playerArgs.getString("dashmpd"); + if(!dashManifest.contains("/signature/")) { + String encryptedSig = RegexHelper.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest); + String decryptedSig; + + decryptedSig = decryptSignature(encryptedSig, decryptionCode); + dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig); + } + + return dashManifest; } catch(NullPointerException e) { throw new ParsingException( "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e); @@ -244,15 +255,8 @@ public class YoutubeVideoExtractor extends VideoExtractor { @Override public VideoInfo.AudioStream[] getAudioStreams() throws ParsingException { - try { - String dashManifest = playerArgs.getString("dashmpd"); - return parseDashManifest(dashManifest, decryptionCode); - } catch (NullPointerException e) { - throw new ParsingException( - "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e); - } catch (Exception e) { - throw new ParsingException(e); - } + /* If we provide a valid dash manifest, we don't need to provide audio streams extra */ + return null; } @Override @@ -300,37 +304,6 @@ public class YoutubeVideoExtractor extends VideoExtractor { return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]); } - @SuppressWarnings("WeakerAccess") - @Override - public String getVideoId(String url) throws ParsingException { - String id; - String pat; - - if(url.contains("youtube")) { - pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})"; - } - else if(url.contains("youtu.be")) { - pat = "youtu\\.be/([a-zA-Z0-9_-]{11})"; - } - else { - throw new ParsingException("Error no suitable url: " + url); - } - - id = matchGroup1(pat, url); - if(!id.isEmpty()){ - //Log.i(TAG, "string \""+url+"\" matches!"); - return id; - } else { - throw new ParsingException("Error could not parse url: " + url); - } - } - - @SuppressWarnings("WeakerAccess") - @Override - public String getVideoUrl(String videoId) { - return "https://www.youtube.com/watch?v=" + videoId; - } - /**Attempts to parse (and return) the offset to start playing the video from. * @return the offset (in seconds), or 0 if no timestamp is found.*/ @Override @@ -338,8 +311,8 @@ public class YoutubeVideoExtractor extends VideoExtractor { //todo: add unit test for timestamp String timeStamp; try { - timeStamp = matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl); - } catch (RegexException e) { + timeStamp = RegexHelper.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl); + } catch (RegexHelper.RegexException e) { // catch this instantly since an url does not necessarily have to have a time stamp // -2 because well the testing system will then know its the regex that failed :/ @@ -354,15 +327,15 @@ public class YoutubeVideoExtractor extends VideoExtractor { String minutesString = ""; String hoursString = ""; try { - secondsString = matchGroup1("(\\d{1,3})s", timeStamp); - minutesString = matchGroup1("(\\d{1,3})m", timeStamp); - hoursString = matchGroup1("(\\d{1,3})h", timeStamp); + secondsString = RegexHelper.matchGroup1("(\\d{1,3})s", timeStamp); + minutesString = RegexHelper.matchGroup1("(\\d{1,3})m", timeStamp); + hoursString = RegexHelper.matchGroup1("(\\d{1,3})h", timeStamp); } catch (Exception e) { //it could be that time is given in another method if (secondsString.isEmpty() //if nothing was got, && minutesString.isEmpty()//treat as unlabelled seconds && hoursString.isEmpty()) { - secondsString = matchGroup1("t=(\\d{1,3})", timeStamp); + secondsString = RegexHelper.matchGroup1("t=(\\d{1,3})", timeStamp); } } @@ -455,72 +428,14 @@ public class YoutubeVideoExtractor extends VideoExtractor { } } - private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) throws RegexException, DecryptException { - if(!dashManifest.contains("/signature/")) { - String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest); - String decryptedSig; + @Override + public UrlIdHandler getUrlIdConverter() { + return new YoutubeUrlIdHandler(); + } - decryptedSig = decryptSignature(encryptedSig, decryptoinCode); - dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig); - } - String dashDoc; - try { - dashDoc = downloader.download(dashManifest); - } catch(IOException ioe) { - throw new DecryptException("Could not get dash mpd", ioe); - } - Vector audioStreams = new Vector<>(); - try { - XmlPullParser parser = Xml.newPullParser(); - parser.setInput(new StringReader(dashDoc)); - String tagName = ""; - String currentMimeType = ""; - int currentBandwidth = -1; - int currentSamplingRate = -1; - boolean currentTagIsBaseUrl = false; - for(int eventType = parser.getEventType(); - eventType != XmlPullParser.END_DOCUMENT; - eventType = parser.next() ) { - switch(eventType) { - case XmlPullParser.START_TAG: - tagName = parser.getName(); - if(tagName.equals("AdaptationSet")) { - currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType"); - } else if(tagName.equals("Representation") && currentMimeType.contains("audio")) { - currentBandwidth = Integer.parseInt( - parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth")); - currentSamplingRate = Integer.parseInt( - parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate")); - } else if(tagName.equals("BaseURL")) { - currentTagIsBaseUrl = true; - } - break; - - case XmlPullParser.TEXT: - if(currentTagIsBaseUrl && - (currentMimeType.contains("audio"))) { - int format = -1; - if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) { - format = MediaFormat.WEBMA.id; - } else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) { - format = MediaFormat.M4A.id; - } - audioStreams.add(new VideoInfo.AudioStream(parser.getText(), - format, currentBandwidth, currentSamplingRate)); - } - //missing break here? - case XmlPullParser.END_TAG: - if(tagName.equals("AdaptationSet")) { - currentMimeType = ""; - } else if(tagName.equals("BaseURL")) { - currentTagIsBaseUrl = false; - }//no break needed here - } - } - } catch(Exception e) { - e.printStackTrace(); - } - return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]); + @Override + public String getPageUrl() { + return pageUrl; } /**Provides information about links to other videos on the video page, such as related videos. @@ -533,7 +448,7 @@ public class YoutubeVideoExtractor extends VideoExtractor { info.webpage_url = li.select("a.content-link").first() .attr("abs:href"); - info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url); + info.id = RegexHelper.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url); //todo: check NullPointerException causing info.title = li.select("span.title").first().text(); @@ -584,15 +499,20 @@ public class YoutubeVideoExtractor extends VideoExtractor { try { String playerCode = downloader.download(playerUrl); - decryptionFuncName = matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode); + decryptionFuncName = + RegexHelper.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode); - String functionPattern = "(" + decryptionFuncName.replace("$", "\\$") + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})"; - decryptionFunc = "var " + matchGroup1(functionPattern, playerCode) + ";"; + String functionPattern = "(" + + decryptionFuncName.replace("$", "\\$") + + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})"; + decryptionFunc = "var " + RegexHelper.matchGroup1(functionPattern, playerCode) + ";"; - helperObjectName = matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc); + helperObjectName = RegexHelper + .matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc); - String helperPattern = "(var " + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)"; - helperObject = matchGroup1(helperPattern, playerCode); + String helperPattern = "(var " + + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)"; + helperObject = RegexHelper.matchGroup1(helperPattern, playerCode); callerFunc = callerFunc.replace("%%", decryptionFuncName); @@ -624,25 +544,8 @@ public class YoutubeVideoExtractor extends VideoExtractor { return (result == null ? "" : result.toString()); } - private String cleanUrl(String complexUrl) throws ParsingException { - return getVideoUrl(getVideoId(complexUrl)); - } - - private String matchGroup1(String pattern, String input) throws RegexException { - Pattern pat = Pattern.compile(pattern); - Matcher mat = pat.matcher(input); - boolean foundMatch = mat.find(); - if (foundMatch) { - return mat.group(1); - } - else { - //Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\""); - throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\""); - } - } - private String findErrorReason(Document doc) { - errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text(); + String errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text(); if(errorMessage.contains("GEMA")) { // Gema sometimes blocks youtube music content in germany: // https://www.gema.de/en/