restructure parser

2016-02-02 18:43:20 +01:00 · 2016-02-02 18:43:20 +01:00 · d097363b24
commit d097363b24
parent bad576c23d
12 changed files with 365 additions and 274 deletions
--- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java
+++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailActivity.java
@ -72,7 +72,7 @@ public class VideoItemDetailActivity extends AppCompatActivity {
                StreamingService[] serviceList = ServiceList.getServices();
                //VideoExtractor videoExtractor = null;
                for (int i = 0; i < serviceList.length; i++) {
-                    if (serviceList[i].acceptUrl(videoUrl)) {
+                    if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) {
                        arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i);
                        currentStreamingService = i;
                        //videoExtractor = ServiceList.getService(i).getExtractorInstance();
--- a/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java
+++ b/app/src/main/java/org/schabi/newpipe/VideoItemDetailFragment.java
@ -112,7 +112,7 @@ public class VideoItemDetailFragment extends Fragment {
        public void run() {
            try {
                videoExtractor = service.getExtractorInstance(videoUrl, new Downloader());
-                VideoInfo videoInfo = videoExtractor.getVideoInfo();
+                VideoInfo videoInfo = VideoInfo.getVideoInfo(videoExtractor, new Downloader());
                h.post(new VideoResultReturnedRunnable(videoInfo));
                h.post(new SetThumbnailRunnable(
                        //todo: make bitmaps not bypass tor
--- a/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java
@ -0,0 +1,101 @@
+package org.schabi.newpipe.crawler;
+
+import android.util.Xml;
+
+import org.xmlpull.v1.XmlPullParser;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Vector;
+
+/**
+ * Created by Christian Schabesberger on 02.02.16.
+ *
+ * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
+ * DashMpdParser.java is part of NewPipe.
+ *
+ * NewPipe is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * NewPipe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+public class DashMpdParser {
+
+    static class DashMpdParsingException extends ParsingException {
+        DashMpdParsingException(String message, Exception e) {
+            super(message, e);
+        }
+    }
+
+    public static VideoInfo.AudioStream[] getAudioStreams(String dashManifestUrl,
+                                                             Downloader downloader)
+            throws DashMpdParsingException {
+        String dashDoc;
+        try {
+            dashDoc = downloader.download(dashManifestUrl);
+        } catch(IOException ioe) {
+            throw new DashMpdParsingException("Could not get dash mpd: " + dashManifestUrl, ioe);
+        }
+        Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
+        try {
+            XmlPullParser parser = Xml.newPullParser();
+            parser.setInput(new StringReader(dashDoc));
+            String tagName = "";
+            String currentMimeType = "";
+            int currentBandwidth = -1;
+            int currentSamplingRate = -1;
+            boolean currentTagIsBaseUrl = false;
+            for(int eventType = parser.getEventType();
+                eventType != XmlPullParser.END_DOCUMENT;
+                eventType = parser.next() ) {
+                switch(eventType) {
+                    case XmlPullParser.START_TAG:
+                        tagName = parser.getName();
+                        if(tagName.equals("AdaptationSet")) {
+                            currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
+                        } else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
+                            currentBandwidth = Integer.parseInt(
+                                    parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
+                            currentSamplingRate = Integer.parseInt(
+                                    parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
+                        } else if(tagName.equals("BaseURL")) {
+                            currentTagIsBaseUrl = true;
+                        }
+                        break;
+
+                    case XmlPullParser.TEXT:
+                        if(currentTagIsBaseUrl &&
+                                (currentMimeType.contains("audio"))) {
+                            int format = -1;
+                            if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
+                                format = MediaFormat.WEBMA.id;
+                            } else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
+                                format = MediaFormat.M4A.id;
+                            }
+                            audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
+                                    format, currentBandwidth, currentSamplingRate));
+                        }
+                        break;
+                    case XmlPullParser.END_TAG:
+                        if(tagName.equals("AdaptationSet")) {
+                            currentMimeType = "";
+                        } else if(tagName.equals("BaseURL")) {
+                            currentTagIsBaseUrl = false;
+                        }//no break needed here
+                }
+            }
+        } catch(Exception e) {
+            throw new DashMpdParsingException("Could not parse Dash mpd", e);
+        }
+        return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
+    }
+}
--- a/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/RegexHelper.java
@ -0,0 +1,47 @@
+package org.schabi.newpipe.crawler;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Created by Christian Schabesberger on 02.02.16.
+ *
+ * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
+ * RegexHelper.java is part of NewPipe.
+ *
+ * NewPipe is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * NewPipe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** avoid using regex !!! */
+public class RegexHelper {
+
+    public static class RegexException extends ParsingException {
+        public RegexException(String message) {
+            super(message);
+        }
+    }
+
+    public static String matchGroup1(String pattern, String input) throws RegexException {
+        Pattern pat = Pattern.compile(pattern);
+        Matcher mat = pat.matcher(input);
+        boolean foundMatch = mat.find();
+        if (foundMatch) {
+            return mat.group(1);
+        }
+        else {
+            //Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
+            throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
+        }
+    }
+}
--- a/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/StreamingService.java
@ -27,11 +27,11 @@ public interface StreamingService {
        public String name = "";
    }
    ServiceInfo getServiceInfo();
-    VideoExtractor getExtractorInstance(String url, Downloader downloader) throws IOException, CrawlingException;
+    VideoExtractor getExtractorInstance(String url, Downloader downloader)
+            throws IOException, CrawlingException;
    SearchEngine getSearchEngineInstance();

-    /**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
-    Intent was meant to be watched with this Service.
-    Return false if this service shall not allow to be called through ACTIONs.*/
-    boolean acceptUrl(String videoUrl);
+    UrlIdHandler getUrlIdHandler();
+
+
 }
--- a/app/src/main/java/org/schabi/newpipe/crawler/UrlIdHandler.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/UrlIdHandler.java
@ -0,0 +1,32 @@
+package org.schabi.newpipe.crawler;
+
+/**
+ * Created by Christian Schabesberger on 02.02.16.
+ *
+ * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
+ * UrlIdHandler.java is part of NewPipe.
+ *
+ * NewPipe is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * NewPipe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+public interface UrlIdHandler {
+    String getVideoUrl(String videoId);
+    String getVideoId(String siteUrl) throws ParsingException;
+    String cleanUrl(String siteUrl) throws ParsingException;
+
+    /**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
+     Intent was meant to be watched with this Service.
+     Return false if this service shall not allow to be called through ACTIONs.*/
+    boolean acceptUrl(String videoUrl);
+}
--- a/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/VideoExtractor.java
@ -20,14 +20,14 @@ package org.schabi.newpipe.crawler;
 * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
 */

-import java.util.List;
+import java.net.URL;
 import java.util.Vector;

 /**Scrapes information from a video streaming service (eg, YouTube).*/


@SuppressWarnings("ALL")
-public abstract class VideoExtractor {
+public interface VideoExtractor {

    public class ExctractorInitException extends CrawlingException {
        public ExctractorInitException() {}
@ -42,13 +42,6 @@ public abstract class VideoExtractor {
        }
    }

-    public class RegexException extends ParsingException {
-        public RegexException() {}
-        public RegexException(String message) {
-            super(message);
-        }
-    }
-
    public class ContentNotAvailableException extends ParsingException {
        public ContentNotAvailableException() {}
        public ContentNotAvailableException(String message) {
@ -62,111 +55,6 @@ public abstract class VideoExtractor {
        }
    }

-    protected final String pageUrl;
-    protected VideoInfo videoInfo;
-
-    @SuppressWarnings("WeakerAccess")
-    public VideoExtractor(String url, Downloader dl) {
-        this.pageUrl = url;
-    }
-
-    /**Fills out the video info fields which are common to all services.
-     * Probably needs to be overridden by subclasses*/
-    public VideoInfo getVideoInfo() throws CrawlingException
-    {
-        if(videoInfo == null) {
-            videoInfo = new VideoInfo();
-        }
-
-        if(videoInfo.webpage_url.isEmpty()) {
-            videoInfo.webpage_url = pageUrl;
-        }
-
-
-        if (videoInfo.title.isEmpty()) {
-            videoInfo.title = getTitle();
-        }
-
-        if (videoInfo.duration < 1) {
-            videoInfo.duration = getLength();
-        }
-
-
-        if (videoInfo.uploader.isEmpty()) {
-            videoInfo.uploader = getUploader();
-        }
-
-        if (videoInfo.description.isEmpty()) {
-            videoInfo.description = getDescription();
-        }
-
-        if (videoInfo.view_count == -1) {
-            videoInfo.view_count = getViews();
-        }
-
-        if (videoInfo.upload_date.isEmpty()) {
-            videoInfo.upload_date = getUploadDate();
-        }
-
-        if (videoInfo.thumbnail_url.isEmpty()) {
-            videoInfo.thumbnail_url = getThumbnailUrl();
-        }
-
-        if (videoInfo.id.isEmpty()) {
-            videoInfo.id = getVideoId(pageUrl);
-        }
-
-        /** Load and extract audio*/
-        if (videoInfo.audioStreams == null) {
-            videoInfo.audioStreams = getAudioStreams();
-        }
-        /** Extract video stream url*/
-        if (videoInfo.videoStreams == null) {
-            videoInfo.videoStreams = getVideoStreams();
-        }
-
-        if (videoInfo.uploader_thumbnail_url.isEmpty()) {
-            videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl();
-        }
-
-        if (videoInfo.startPosition < 0) {
-            videoInfo.startPosition = getTimeStamp();
-        }
-
-        if(videoInfo.dashMpdUrl.isEmpty()) {
-            videoInfo.dashMpdUrl = getDashMpdUrl();
-        }
-
-        if(videoInfo.average_rating.isEmpty()) {
-            videoInfo.average_rating = getAverageRating();
-        }
-
-        if(videoInfo.like_count == -1) {
-            videoInfo.like_count = getLikeCount();
-        }
-
-        if(videoInfo.dislike_count == -1) {
-            videoInfo.dislike_count = getDislikeCount();
-        }
-
-        if(videoInfo.nextVideo == null) {
-            videoInfo.nextVideo = getNextVideo();
-        }
-
-        if(videoInfo.relatedVideos == null) {
-            videoInfo.relatedVideos = getRelatedVideos();
-        }
-
-        //Bitmap thumbnail = null;
-        //Bitmap uploader_thumbnail = null;
-        //int videoAvailableStatus = VIDEO_AVAILABLE;
-        return videoInfo;
-    }
-
-    //todo: remove these functions, or make them static, otherwise its useles, to have them here
-    public abstract String getVideoUrl(String videoId);
-    public abstract String getVideoId(String siteUrl) throws ParsingException;
-    ///////////////////////////////////////////////////////////////////////////////////////////
    public abstract int getTimeStamp() throws ParsingException;
    public abstract String getTitle() throws ParsingException;
    public abstract String getDescription() throws ParsingException;
@ -185,4 +73,6 @@ public abstract class VideoExtractor {
    public abstract int getDislikeCount() throws ParsingException;
    public abstract VideoPreviewInfo getNextVideo() throws ParsingException;
    public abstract Vector<VideoPreviewInfo> getRelatedVideos() throws ParsingException;
+    public abstract UrlIdHandler getUrlIdConverter();
+    public abstract String getPageUrl();
 }
--- a/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/VideoInfo.java
@ -1,5 +1,6 @@
 package org.schabi.newpipe.crawler;

+import java.io.IOException;
 import java.util.List;

 /**
@ -26,8 +27,52 @@ import java.util.List;
@SuppressWarnings("ALL")
 public class VideoInfo extends AbstractVideoInfo {

+    /**Fills out the video info fields which are common to all services.
+     * Probably needs to be overridden by subclasses*/
+    public static VideoInfo getVideoInfo(VideoExtractor extractor, Downloader downloader)
+            throws CrawlingException, IOException {
+        VideoInfo videoInfo = new VideoInfo();
+
+        UrlIdHandler uiconv = extractor.getUrlIdConverter();
+
+        videoInfo.webpage_url = extractor.getPageUrl();
+        videoInfo.title = extractor.getTitle();
+        videoInfo.duration = extractor.getLength();
+        videoInfo.uploader = extractor.getUploader();
+        videoInfo.description = extractor.getDescription();
+        videoInfo.view_count = extractor.getViews();
+        videoInfo.upload_date = extractor.getUploadDate();
+        videoInfo.thumbnail_url = extractor.getThumbnailUrl();
+        videoInfo.id = uiconv.getVideoId(extractor.getPageUrl());
+        videoInfo.dashMpdUrl = extractor.getDashMpdUrl();
+        /** Load and extract audio*/
+        videoInfo.audioStreams = extractor.getAudioStreams();
+        if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) {
+            if(videoInfo.audioStreams == null || videoInfo.audioStreams.length == 0) {
+                videoInfo.audioStreams =
+                        DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader);
+            }
+        }
+        /** Extract video stream url*/
+        videoInfo.videoStreams = extractor.getVideoStreams();
+        videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl();
+        videoInfo.startPosition = extractor.getTimeStamp();
+        videoInfo.average_rating = extractor.getAverageRating();
+        videoInfo.like_count = extractor.getLikeCount();
+        videoInfo.dislike_count = extractor.getDislikeCount();
+        videoInfo.nextVideo = extractor.getNextVideo();
+        videoInfo.relatedVideos = extractor.getRelatedVideos();
+
+        //Bitmap thumbnail = null;
+        //Bitmap uploader_thumbnail = null;
+        //int videoAvailableStatus = VIDEO_AVAILABLE;
+        return videoInfo;
+    }
+
+
    public String uploader_thumbnail_url = "";
    public String description = "";
+    /*todo: make this lists over vectors*/
    public VideoStream[] videoStreams = null;
    public AudioStream[] audioStreams = null;
    // video streams provided by the dash mpd do not need to be provided as VideoStream.
--- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeSearchEngine.java
@ -52,7 +52,8 @@ public class YoutubeSearchEngine implements SearchEngine {
    private static final String TAG = YoutubeSearchEngine.class.toString();

    @Override
-    public Result search(String query, int page, String languageCode, Downloader downloader) throws IOException, ParsingException {
+    public Result search(String query, int page, String languageCode, Downloader downloader)
+            throws IOException, ParsingException {
        Result result = new Result();
        Uri.Builder builder = new Uri.Builder();
        builder.scheme("https")
@ -171,7 +172,8 @@ public class YoutubeSearchEngine implements SearchEngine {

            try {
                dBuilder = dbFactory.newDocumentBuilder();
-                doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("utf-8"))));
+                doc = dBuilder.parse(new InputSource(
+                        new ByteArrayInputStream(response.getBytes("utf-8"))));
                doc.getDocumentElement().normalize();
            } catch (ParserConfigurationException | SAXException | IOException e) {
                e.printStackTrace();
--- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeService.java
@ -3,6 +3,7 @@ package org.schabi.newpipe.crawler.services.youtube;
 import org.schabi.newpipe.crawler.CrawlingException;
 import org.schabi.newpipe.crawler.Downloader;
 import org.schabi.newpipe.crawler.StreamingService;
+import org.schabi.newpipe.crawler.UrlIdHandler;
 import org.schabi.newpipe.crawler.VideoExtractor;
 import org.schabi.newpipe.crawler.SearchEngine;

@ -37,8 +38,10 @@ public class YoutubeService implements StreamingService {
        return serviceInfo;
    }
    @Override
-    public VideoExtractor getExtractorInstance(String url, Downloader downloader) throws CrawlingException, IOException {
-        if(acceptUrl(url)) {
+    public VideoExtractor getExtractorInstance(String url, Downloader downloader)
+            throws CrawlingException, IOException {
+        UrlIdHandler urlIdHandler = new YoutubeUrlIdHandler();
+        if(urlIdHandler.acceptUrl(url)) {
            return new YoutubeVideoExtractor(url, downloader) ;
        }
        else {
@ -49,9 +52,9 @@ public class YoutubeService implements StreamingService {
    public SearchEngine getSearchEngineInstance() {
        return new YoutubeSearchEngine();
    }
+
    @Override
-    public boolean acceptUrl(String videoUrl) {
-        return videoUrl.contains("youtube") ||
-                videoUrl.contains("youtu.be");
+    public UrlIdHandler getUrlIdHandler() {
+        return new YoutubeUrlIdHandler();
    }
 }
--- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeUrlIdHandler.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeUrlIdHandler.java
@ -0,0 +1,68 @@
+package org.schabi.newpipe.crawler.services.youtube;
+
+import org.schabi.newpipe.crawler.ParsingException;
+import org.schabi.newpipe.crawler.RegexHelper;
+import org.schabi.newpipe.crawler.UrlIdHandler;
+
+/**
+ * Created by Christian Schabesberger on 02.02.16.
+ *
+ * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
+ * YoutubeUrlIdHandler.java is part of NewPipe.
+ *
+ * NewPipe is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * NewPipe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+public class YoutubeUrlIdHandler implements UrlIdHandler {
+    @SuppressWarnings("WeakerAccess")
+    @Override
+    public String getVideoUrl(String videoId) {
+        return "https://www.youtube.com/watch?v=" + videoId;
+    }
+
+    @SuppressWarnings("WeakerAccess")
+    @Override
+    public String getVideoId(String url) throws ParsingException {
+        String id;
+        String pat;
+
+        if(url.contains("youtube")) {
+            pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
+        }
+        else if(url.contains("youtu.be")) {
+            pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
+        }
+        else {
+            throw new ParsingException("Error no suitable url: " + url);
+        }
+
+        id = RegexHelper.matchGroup1(pat, url);
+        if(!id.isEmpty()){
+            //Log.i(TAG, "string \""+url+"\" matches!");
+            return id;
+        } else {
+            throw new ParsingException("Error could not parse url: " + url);
+        }
+    }
+
+    public String cleanUrl(String complexUrl) throws ParsingException {
+        return getVideoUrl(getVideoId(complexUrl));
+    }
+
+    @Override
+    public boolean acceptUrl(String videoUrl) {
+        return videoUrl.contains("youtube") ||
+                videoUrl.contains("youtu.be");
+    }
+}
--- a/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java
+++ b/app/src/main/java/org/schabi/newpipe/crawler/services/youtube/YoutubeVideoExtractor.java
@ -15,6 +15,8 @@ import org.mozilla.javascript.ScriptableObject;
 import org.schabi.newpipe.crawler.CrawlingException;
 import org.schabi.newpipe.crawler.Downloader;
 import org.schabi.newpipe.crawler.ParsingException;
+import org.schabi.newpipe.crawler.RegexHelper;
+import org.schabi.newpipe.crawler.UrlIdHandler;
 import org.schabi.newpipe.crawler.VideoExtractor;
 import org.schabi.newpipe.crawler.MediaFormat;
 import org.schabi.newpipe.crawler.VideoInfo;
@ -25,11 +27,8 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.net.URLDecoder;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Vector;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;

 /**
 * Created by Christian Schabesberger on 06.08.15.
@ -51,7 +50,7 @@ import java.util.regex.Pattern;
 * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
 */

-public class YoutubeVideoExtractor extends VideoExtractor {
+public class YoutubeVideoExtractor implements VideoExtractor {

    public class DecryptException extends ParsingException {
        DecryptException(Throwable cause) {
@ -75,7 +74,6 @@ public class YoutubeVideoExtractor extends VideoExtractor {
    private static final String TAG = YoutubeVideoExtractor.class.toString();
    private final Document doc;
    private JSONObject playerArgs;
-    private String errorMessage = "";

    // static values
    private static final String DECRYPTION_FUNC_NAME="decrypt";
@ -83,23 +81,27 @@ public class YoutubeVideoExtractor extends VideoExtractor {
    // cached values
    private static volatile String decryptionCode = "";

+    UrlIdHandler urlidhandler = new YoutubeUrlIdHandler();
+    String pageUrl = "";
+
    private Downloader downloader;

    public YoutubeVideoExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException {
        //most common videoInfo fields are now set in our superclass, for all services
-        super(pageUrl, dl);
        downloader = dl;
-        String pageContent = downloader.download(cleanUrl(pageUrl));
+        this.pageUrl = urlidhandler.cleanUrl(pageUrl);
+        String pageContent = downloader.download(this.pageUrl);
        doc = Jsoup.parse(pageContent, pageUrl);
        String ytPlayerConfigRaw;
        JSONObject ytPlayerConfig;

        //attempt to load the youtube js player JSON arguments
        try {
-            ytPlayerConfigRaw = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
+            ytPlayerConfigRaw =
+                    RegexHelper.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
            ytPlayerConfig = new JSONObject(ytPlayerConfigRaw);
            playerArgs = ytPlayerConfig.getJSONObject("args");
-        } catch (RegexException e) {
+        } catch (RegexHelper.RegexException e) {
            String errorReason = findErrorReason(doc);
            switch(errorReason) {
                case "GEMA":
@ -233,7 +235,16 @@ public class YoutubeVideoExtractor extends VideoExtractor {
    @Override
    public String getDashMpdUrl() throws ParsingException {
        try {
-            return playerArgs.getString("dashmpd");
+            String dashManifest = playerArgs.getString("dashmpd");
+            if(!dashManifest.contains("/signature/")) {
+                String encryptedSig = RegexHelper.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
+                String decryptedSig;
+
+                decryptedSig = decryptSignature(encryptedSig, decryptionCode);
+                dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
+            }
+
+            return dashManifest;
        } catch(NullPointerException e) {
            throw new ParsingException(
                    "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e);
@ -244,15 +255,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {

    @Override
    public VideoInfo.AudioStream[] getAudioStreams() throws ParsingException {
-        try {
-            String dashManifest = playerArgs.getString("dashmpd");
-            return parseDashManifest(dashManifest, decryptionCode);
-        } catch (NullPointerException e) {
-            throw new ParsingException(
-                    "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e);
-        } catch (Exception e) {
-            throw new ParsingException(e);
-        }
+        /* If we provide a valid dash manifest, we don't need to provide audio streams extra */
+        return null;
    }

    @Override
@ -300,37 +304,6 @@ public class YoutubeVideoExtractor extends VideoExtractor {
        return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);
    }

-    @SuppressWarnings("WeakerAccess")
-    @Override
-    public String getVideoId(String url) throws ParsingException {
-        String id;
-        String pat;
-
-        if(url.contains("youtube")) {
-            pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
-        }
-        else if(url.contains("youtu.be")) {
-            pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
-        }
-        else {
-            throw new ParsingException("Error no suitable url: " + url);
-        }
-
-        id = matchGroup1(pat, url);
-        if(!id.isEmpty()){
-            //Log.i(TAG, "string \""+url+"\" matches!");
-            return id;
-        } else {
-            throw new ParsingException("Error could not parse url: " + url);
-        }
-    }
-
-    @SuppressWarnings("WeakerAccess")
-    @Override
-    public String getVideoUrl(String videoId) {
-        return "https://www.youtube.com/watch?v=" + videoId;
-    }
-
    /**Attempts to parse (and return) the offset to start playing the video from.
     * @return the offset (in seconds), or 0 if no timestamp is found.*/
    @Override
@ -338,8 +311,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
        //todo: add unit test for timestamp
        String timeStamp;
        try {
-            timeStamp = matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
-        } catch (RegexException e) {
+            timeStamp = RegexHelper.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
+        } catch (RegexHelper.RegexException e) {
            // catch this instantly since an url does not necessarily have to have a time stamp

            // -2 because well the testing system will then know its the regex that failed :/
@ -354,15 +327,15 @@ public class YoutubeVideoExtractor extends VideoExtractor {
                String minutesString = "";
                String hoursString = "";
                try {
-                    secondsString = matchGroup1("(\\d{1,3})s", timeStamp);
-                    minutesString = matchGroup1("(\\d{1,3})m", timeStamp);
-                    hoursString = matchGroup1("(\\d{1,3})h", timeStamp);
+                    secondsString = RegexHelper.matchGroup1("(\\d{1,3})s", timeStamp);
+                    minutesString = RegexHelper.matchGroup1("(\\d{1,3})m", timeStamp);
+                    hoursString = RegexHelper.matchGroup1("(\\d{1,3})h", timeStamp);
                } catch (Exception e) {
                    //it could be that time is given in another method
                    if (secondsString.isEmpty() //if nothing was got,
                            && minutesString.isEmpty()//treat as unlabelled seconds
                            && hoursString.isEmpty()) {
-                        secondsString = matchGroup1("t=(\\d{1,3})", timeStamp);
+                        secondsString = RegexHelper.matchGroup1("t=(\\d{1,3})", timeStamp);
                    }
                }

@ -455,72 +428,14 @@ public class YoutubeVideoExtractor extends VideoExtractor {
        }
    }

-    private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) throws RegexException, DecryptException {
-        if(!dashManifest.contains("/signature/")) {
-            String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
-            String decryptedSig;
+    @Override
+    public UrlIdHandler getUrlIdConverter() {
+        return new YoutubeUrlIdHandler();
+    }

-            decryptedSig = decryptSignature(encryptedSig, decryptoinCode);
-            dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
-        }
-        String dashDoc;
-        try {
-            dashDoc = downloader.download(dashManifest);
-        } catch(IOException ioe) {
-            throw new DecryptException("Could not get dash mpd", ioe);
-        }
-        Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
-        try {
-            XmlPullParser parser = Xml.newPullParser();
-            parser.setInput(new StringReader(dashDoc));
-            String tagName = "";
-            String currentMimeType = "";
-            int currentBandwidth = -1;
-            int currentSamplingRate = -1;
-            boolean currentTagIsBaseUrl = false;
-            for(int eventType = parser.getEventType();
-                eventType != XmlPullParser.END_DOCUMENT;
-                eventType = parser.next() ) {
-                switch(eventType) {
-                    case XmlPullParser.START_TAG:
-                        tagName = parser.getName();
-                        if(tagName.equals("AdaptationSet")) {
-                            currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
-                        } else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
-                            currentBandwidth = Integer.parseInt(
-                                    parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
-                            currentSamplingRate = Integer.parseInt(
-                                    parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
-                        } else if(tagName.equals("BaseURL")) {
-                            currentTagIsBaseUrl = true;
-                        }
-                        break;
-
-                    case XmlPullParser.TEXT:
-                        if(currentTagIsBaseUrl &&
-                                (currentMimeType.contains("audio"))) {
-                            int format = -1;
-                            if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
-                                format = MediaFormat.WEBMA.id;
-                            } else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
-                                format = MediaFormat.M4A.id;
-                            }
-                            audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
-                                    format, currentBandwidth, currentSamplingRate));
-                        }
-                        //missing break here?
-                    case XmlPullParser.END_TAG:
-                        if(tagName.equals("AdaptationSet")) {
-                            currentMimeType = "";
-                        } else if(tagName.equals("BaseURL")) {
-                            currentTagIsBaseUrl = false;
-                        }//no break needed here
-                }
-            }
-        } catch(Exception e) {
-            e.printStackTrace();
-        }
-        return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
+    @Override
+    public String getPageUrl() {
+        return pageUrl;
    }

    /**Provides information about links to other videos on the video page, such as related videos.
@ -533,7 +448,7 @@ public class YoutubeVideoExtractor extends VideoExtractor {
            info.webpage_url = li.select("a.content-link").first()
                    .attr("abs:href");

-            info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
+            info.id = RegexHelper.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);

            //todo: check NullPointerException causing
            info.title = li.select("span.title").first().text();
@ -584,15 +499,20 @@ public class YoutubeVideoExtractor extends VideoExtractor {
        try {
            String playerCode = downloader.download(playerUrl);

-            decryptionFuncName = matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
+            decryptionFuncName =
+                    RegexHelper.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);

-            String functionPattern = "(" + decryptionFuncName.replace("$", "\\$") + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
-            decryptionFunc = "var " + matchGroup1(functionPattern, playerCode) + ";";
+            String functionPattern = "("
+                    + decryptionFuncName.replace("$", "\\$")
+                    + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
+            decryptionFunc = "var " + RegexHelper.matchGroup1(functionPattern, playerCode) + ";";

-            helperObjectName = matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
+            helperObjectName = RegexHelper
+                    .matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);

-            String helperPattern = "(var " + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
-            helperObject = matchGroup1(helperPattern, playerCode);
+            String helperPattern = "(var "
+                    + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
+            helperObject = RegexHelper.matchGroup1(helperPattern, playerCode);


            callerFunc = callerFunc.replace("%%", decryptionFuncName);
@ -624,25 +544,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
        return (result == null ? "" : result.toString());
    }

-    private String cleanUrl(String complexUrl) throws ParsingException {
-        return getVideoUrl(getVideoId(complexUrl));
-    }
-
-    private String matchGroup1(String pattern, String input) throws RegexException {
-        Pattern pat = Pattern.compile(pattern);
-        Matcher mat = pat.matcher(input);
-        boolean foundMatch = mat.find();
-        if (foundMatch) {
-            return mat.group(1);
-        }
-        else {
-            //Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
-            throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
-        }
-    }
-
    private String findErrorReason(Document doc) {
-        errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
+        String errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
        if(errorMessage.contains("GEMA")) {
            // Gema sometimes blocks youtube music content in germany:
            // https://www.gema.de/en/