Improve YoutubeStreamUrlIdHandler

* Make it a singelton
 * Accept embed links
 * Accept share links (youtube.com/shared?ci=...)
 * Add tests
 * Accept host case insensititve
This commit is contained in:
Coffeemakr 2017-01-10 21:10:41 +01:00
parent b3225bebe6
commit 2ded33110f
5 changed files with 203 additions and 21 deletions

View file

@ -46,7 +46,7 @@ public class YoutubeService extends StreamingService {
@Override @Override
public StreamExtractor getExtractorInstance(String url) public StreamExtractor getExtractorInstance(String url)
throws ExtractionException, IOException { throws ExtractionException, IOException {
UrlIdHandler urlIdHandler = new YoutubeStreamUrlIdHandler(); UrlIdHandler urlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
if(urlIdHandler.acceptUrl(url)) { if(urlIdHandler.acceptUrl(url)) {
return new YoutubeStreamExtractor(urlIdHandler, url, getServiceId()); return new YoutubeStreamExtractor(urlIdHandler, url, getServiceId());
} }
@ -61,7 +61,7 @@ public class YoutubeService extends StreamingService {
@Override @Override
public UrlIdHandler getUrlIdHandlerInstance() { public UrlIdHandler getUrlIdHandlerInstance() {
return new YoutubeStreamUrlIdHandler(); return YoutubeStreamUrlIdHandler.getInstance();
} }
@Override @Override

View file

@ -184,7 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// cached values // cached values
private static volatile String decryptionCode = ""; private static volatile String decryptionCode = "";
UrlIdHandler urlidhandler = new YoutubeStreamUrlIdHandler(); UrlIdHandler urlidhandler = YoutubeStreamUrlIdHandler.getInstance();
String pageUrl = ""; String pageUrl = "";
public YoutubeStreamExtractor(UrlIdHandler urlIdHandler, String pageUrl, int serviceId) public YoutubeStreamExtractor(UrlIdHandler urlIdHandler, String pageUrl, int serviceId)

View file

@ -1,12 +1,21 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import android.support.annotation.NonNull;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Parser; import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.util.regex.Matcher;
/** /**
* Created by Christian Schabesberger on 02.02.16. * Created by Christian Schabesberger on 02.02.16.
@ -29,45 +38,55 @@ import java.net.URLDecoder;
*/ */
public class YoutubeStreamUrlIdHandler implements UrlIdHandler { public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
@SuppressWarnings("WeakerAccess")
private static final YoutubeStreamUrlIdHandler instance = new YoutubeStreamUrlIdHandler();
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
private YoutubeStreamUrlIdHandler() {}
public static YoutubeStreamUrlIdHandler getInstance() {
return instance;
}
@Override @Override
public String getUrl(String videoId) { public String getUrl(String videoId) {
return "https://www.youtube.com/watch?v=" + videoId; return "https://www.youtube.com/watch?v=" + videoId;
} }
@SuppressWarnings("WeakerAccess")
@Override @Override
public String getId(String url) throws ParsingException, IllegalArgumentException { public String getId(String url) throws ParsingException, IllegalArgumentException {
if(url.isEmpty()) if(url.isEmpty()) {
{
throw new IllegalArgumentException("The url parameter should not be empty"); throw new IllegalArgumentException("The url parameter should not be empty");
} }
String id;
if(url.contains("youtube")) { String id;
if(url.contains("attribution_link")) { String lowercaseUrl = url.toLowerCase();
if(lowercaseUrl.contains("youtube")) {
if (url.contains("attribution_link")) {
try { try {
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
String query = URLDecoder.decode(escapedQuery, "UTF-8"); String query = URLDecoder.decode(escapedQuery, "UTF-8");
id = Parser.matchGroup1("v=([\\-a-zA-Z0-9_]{11})", query); id = Parser.matchGroup1("v=" + ID_PATTERN, query);
} catch(UnsupportedEncodingException uee) { } catch (UnsupportedEncodingException uee) {
throw new ParsingException("Could not parse attribution_link", uee); throw new ParsingException("Could not parse attribution_link", uee);
} }
} } else if(lowercaseUrl.contains("youtube.com/shared?ci=")) {
else if(url.contains("vnd.youtube")) return getRealIdFromSharedLink(url);
{ } else if (url.contains("vnd.youtube")) {
id = Parser.matchGroup1("([\\-a-zA-Z0-9_]{11}).*", url); id = Parser.matchGroup1(ID_PATTERN, url);
} else if (url.contains("embed")) {
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
} else if(url.contains("googleads")) { } else if(url.contains("googleads")) {
throw new FoundAdException("Error found add: " + url); throw new FoundAdException("Error found add: " + url);
} else { } else {
id = Parser.matchGroup1("[?&]v=([\\-a-zA-Z0-9_]{11})", url); id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
} }
} }
else if(url.contains("youtu.be")) { else if(lowercaseUrl.contains("youtu.be")) {
if(url.contains("v=")) { if(url.contains("v=")) {
id = Parser.matchGroup1("v=([\\-a-zA-Z0-9_]{11})", url); id = Parser.matchGroup1("v=" + ID_PATTERN, url);
} else { } else {
id = Parser.matchGroup1("youtu\\.be/([a-zA-Z0-9_-]{11})", url); id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
} }
} }
else { else {
@ -82,12 +101,55 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
} }
} }
/**
* Get the real url from a shared uri.
*
* Shared URI's look like this:
* <pre>
* * https://www.youtube.com/shared?ci=PJICrTByb3E
* * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
* </pre>
* @param url The shared url
* @return the id of the stream
* @throws ParsingException
*/
private @NonNull String getRealIdFromSharedLink(String url) throws ParsingException {
URI uri;
try {
uri = new URI(url);
} catch (URISyntaxException e) {
throw new ParsingException("Invalid shared link", e);
}
String sharedId = getSharedId(uri);
Downloader downloader = NewPipe.getDownloader();
String content;
try {
content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId);
} catch (IOException | ReCaptchaException e) {
throw new ParsingException("Unable to resolve shared link", e);
}
// is this bad? is this fragile?:
String realId = Parser.matchGroup1("rel=\"shortlink\" href=\"https://youtu.be/" + ID_PATTERN, content);
if(sharedId.equals(realId)) {
throw new ParsingException("Got same id for as shared id: " + sharedId);
}
return realId;
}
private @NonNull String getSharedId(URI uri) throws ParsingException {
if (!"/shared".equals(uri.getPath())) {
throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")");
}
return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery());
}
public String cleanUrl(String complexUrl) throws ParsingException { public String cleanUrl(String complexUrl) throws ParsingException {
return getUrl(getId(complexUrl)); return getUrl(getId(complexUrl));
} }
@Override @Override
public boolean acceptUrl(String videoUrl) { public boolean acceptUrl(String videoUrl) {
videoUrl = videoUrl.toLowerCase();
return videoUrl.contains("youtube") || return videoUrl.contains("youtube") ||
videoUrl.contains("youtu.be"); videoUrl.contains("youtu.be");
} }

View file

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.stream_info; package org.schabi.newpipe.extractor.stream_info;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -60,7 +61,7 @@ public class StreamPreviewInfoCollector {
if (urlIdHandler == null) { if (urlIdHandler == null) {
throw new ParsingException("Error: UrlIdHandler not set"); throw new ParsingException("Error: UrlIdHandler not set");
} else if(!resultItem.webpage_url.isEmpty()) { } else if(!resultItem.webpage_url.isEmpty()) {
resultItem.id = (new YoutubeStreamUrlIdHandler()).getId(resultItem.webpage_url); resultItem.id = NewPipe.getService(serviceId).getUrlIdHandlerInstance().getId(resultItem.webpage_url);
} }
resultItem.title = extractor.getTitle(); resultItem.title = extractor.getTitle();
resultItem.stream_type = extractor.getStreamType(); resultItem.stream_type = extractor.getStreamType();

View file

@ -0,0 +1,119 @@
package org.schabi.newpipe.extractor.services.youtube.youtube;
import org.junit.Before;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.YoutubeStreamUrlIdHandler;
import java.util.ArrayList;
import java.util.List;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.fail;
import static org.junit.Assert.assertTrue;
/**
* Test for {@link YoutubeStreamUrlIdHandler}
*/
public class YoutubeStreamUrlIdHandlerTest {
private static String AD_URL = "https://googleads.g.doubleclick.net/aclk?sa=l&ai=C-2IPgeVTWPf4GcOStgfOnIOADf78n61GvKmmobYDrgIQASDj-5MDKAJg9ZXOgeAEoAGgy_T-A8gBAakC2gkpmquIsT6oAwGqBJMBT9BgD5kVgbN0dX602bFFaDw9vsxq-We-S8VkrXVBi6W_e7brZ36GCz1WO3EPEeklYuJjXLUowwCOKsd-8xr1UlS_tusuFJv9iX35xoBHKTRvs8-0aDbfEIm6in37QDfFuZjqgEMB8-tg0Jn_Pf1RU5OzbuU40B4Gy25NUTnOxhDKthOhKBUSZEksCEerUV8GMu10iAXCxquwApIFBggDEAEYAaAGGsgGlIjthrUDgAfItIsBqAemvhvYBwHSCAUIgGEQAbgT6AE&num=1&sig=AOD64_1DybDd4qAm5O7o9UAbTNRdqXXHFQ&ctype=21&video_id=dMO_IXYPZew&client=ca-pub-6219811747049371&adurl=http://www.youtube.com/watch%3Fv%3DdMO_IXYPZew";
private YoutubeStreamUrlIdHandler urlIdHandler;
@Before
public void setUp() throws Exception {
urlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
NewPipe.init(Downloader.getInstance());
}
@Test(expected = NullPointerException.class)
public void getIdWithNullAsUrl() throws ParsingException {
urlIdHandler.getId(null);
}
@Test(expected = FoundAdException.class)
public void getIdForAd() throws ParsingException {
urlIdHandler.getId(AD_URL);
}
@Test
public void getIdForInvalidUrls() throws ParsingException {
List<String> invalidUrls = new ArrayList<>(50);
invalidUrls.add("https://www.youtube.com/watch?v=jZViOEv90d");
invalidUrls.add("https://www.youtube.com/watchjZViOEv90d");
invalidUrls.add("https://www.youtube.com/");
for(String invalidUrl: invalidUrls) {
Throwable exception = null;
try {
urlIdHandler.getId(invalidUrl);
} catch (ParsingException e) {
exception = e;
}
if(exception == null) {
fail("Expected ParsingException for url: " + invalidUrl);
}
}
}
@Test
public void getId() throws Exception {
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/watch?v=jZViOEv90dI"));
assertEquals("W-fFHeTX70Q", urlIdHandler.getId("https://www.youtube.com/watch?v=W-fFHeTX70Q"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://youtu.be/jZViOEv90dI?t=9s"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("HTTPS://Youtu.be/jZViOEv90dI?t=9s"));
assertEquals("uEJuoEs1UxY", urlIdHandler.getId("http://www.youtube.com/watch_popup?v=uEJuoEs1UxY"));
assertEquals("uEJuoEs1UxY", urlIdHandler.getId("http://www.Youtube.com/watch_popup?v=uEJuoEs1UxY"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/embed/jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube-nocookie.com/embed/jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube.com/watch?v=jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://youtube.com/watch?v=jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://youtu.be/jZViOEv90dI?t=9s"));
assertEquals("7_WWz2DSnT8", urlIdHandler.getId("https://youtu.be/7_WWz2DSnT8"));
assertEquals("oy6NvWeVruY", urlIdHandler.getId("https://m.youtube.com/watch?v=oy6NvWeVruY"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube.com/embed/jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.Youtube.com/embed/jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube-nocookie.com/embed/jZViOEv90dI"));
assertEquals("EhxJLojIE_o", urlIdHandler.getId("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
assertEquals("jZViOEv90dI", urlIdHandler.getId("vnd.youtube:jZViOEv90dI"));
// Shared links
String sharedId = "7JIArTByb3E";
String realId = "Q7JsK50NGaA";
assertEquals(realId, urlIdHandler.getId("vnd.youtube://www.YouTube.com/shared?ci=" + sharedId + "&feature=twitter-deep-link"));
assertEquals(realId, urlIdHandler.getId("vnd.youtube://www.youtube.com/shared?ci=" + sharedId ));
assertEquals(realId, urlIdHandler.getId("https://www.youtube.com/shared?ci=7JIArTByb3E"));
}
@Test
public void testAcceptUrl() {
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
assertTrue(urlIdHandler.acceptUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100"));
assertTrue(urlIdHandler.acceptUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
assertTrue(urlIdHandler.acceptUrl("https://youtu.be/jZViOEv90dI?t=9s"));
//assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch/jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/embed/jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("https://www.youtube-nocookie.com/embed/jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/watch?v=jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("http://youtu.be/jZViOEv90dI?t=9s"));
assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/embed/jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("http://www.youtube-nocookie.com/embed/jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare"));
assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("vnd.youtube:jZViOEv90dI"));
assertTrue(urlIdHandler.acceptUrl("vnd.youtube:jZViOEv90dI"));
String sharedId = "7JIArTByb3E";
assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/shared?ci=" + sharedId + "&feature=twitter-deep-link"));
assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/shared?ci=" + sharedId ));
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/shared?ci=7JIArTByb3E"));
}
}