Improve YoutubeStreamUrlIdHandler
* Make it a singelton * Accept embed links * Accept share links (youtube.com/shared?ci=...) * Add tests * Accept host case insensititve
This commit is contained in:
parent
b3225bebe6
commit
2ded33110f
5 changed files with 203 additions and 21 deletions
|
@ -46,7 +46,7 @@ public class YoutubeService extends StreamingService {
|
|||
@Override
|
||||
public StreamExtractor getExtractorInstance(String url)
|
||||
throws ExtractionException, IOException {
|
||||
UrlIdHandler urlIdHandler = new YoutubeStreamUrlIdHandler();
|
||||
UrlIdHandler urlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
|
||||
if(urlIdHandler.acceptUrl(url)) {
|
||||
return new YoutubeStreamExtractor(urlIdHandler, url, getServiceId());
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ public class YoutubeService extends StreamingService {
|
|||
|
||||
@Override
|
||||
public UrlIdHandler getUrlIdHandlerInstance() {
|
||||
return new YoutubeStreamUrlIdHandler();
|
||||
return YoutubeStreamUrlIdHandler.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -184,7 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
// cached values
|
||||
private static volatile String decryptionCode = "";
|
||||
|
||||
UrlIdHandler urlidhandler = new YoutubeStreamUrlIdHandler();
|
||||
UrlIdHandler urlidhandler = YoutubeStreamUrlIdHandler.getInstance();
|
||||
String pageUrl = "";
|
||||
|
||||
public YoutubeStreamExtractor(UrlIdHandler urlIdHandler, String pageUrl, int serviceId)
|
||||
|
|
|
@ -1,12 +1,21 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube;
|
||||
|
||||
import android.support.annotation.NonNull;
|
||||
|
||||
import org.schabi.newpipe.extractor.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.Parser;
|
||||
import org.schabi.newpipe.extractor.UrlIdHandler;
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
/**
|
||||
* Created by Christian Schabesberger on 02.02.16.
|
||||
|
@ -29,45 +38,55 @@ import java.net.URLDecoder;
|
|||
*/
|
||||
|
||||
public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
|
||||
private static final YoutubeStreamUrlIdHandler instance = new YoutubeStreamUrlIdHandler();
|
||||
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
|
||||
|
||||
private YoutubeStreamUrlIdHandler() {}
|
||||
|
||||
public static YoutubeStreamUrlIdHandler getInstance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl(String videoId) {
|
||||
return "https://www.youtube.com/watch?v=" + videoId;
|
||||
}
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
@Override
|
||||
public String getId(String url) throws ParsingException, IllegalArgumentException {
|
||||
if(url.isEmpty())
|
||||
{
|
||||
if(url.isEmpty()) {
|
||||
throw new IllegalArgumentException("The url parameter should not be empty");
|
||||
}
|
||||
String id;
|
||||
|
||||
if(url.contains("youtube")) {
|
||||
if(url.contains("attribution_link")) {
|
||||
String id;
|
||||
String lowercaseUrl = url.toLowerCase();
|
||||
if(lowercaseUrl.contains("youtube")) {
|
||||
if (url.contains("attribution_link")) {
|
||||
try {
|
||||
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
|
||||
String query = URLDecoder.decode(escapedQuery, "UTF-8");
|
||||
id = Parser.matchGroup1("v=([\\-a-zA-Z0-9_]{11})", query);
|
||||
} catch(UnsupportedEncodingException uee) {
|
||||
id = Parser.matchGroup1("v=" + ID_PATTERN, query);
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new ParsingException("Could not parse attribution_link", uee);
|
||||
}
|
||||
}
|
||||
else if(url.contains("vnd.youtube"))
|
||||
{
|
||||
id = Parser.matchGroup1("([\\-a-zA-Z0-9_]{11}).*", url);
|
||||
} else if(lowercaseUrl.contains("youtube.com/shared?ci=")) {
|
||||
return getRealIdFromSharedLink(url);
|
||||
} else if (url.contains("vnd.youtube")) {
|
||||
id = Parser.matchGroup1(ID_PATTERN, url);
|
||||
} else if (url.contains("embed")) {
|
||||
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
|
||||
} else if(url.contains("googleads")) {
|
||||
throw new FoundAdException("Error found add: " + url);
|
||||
} else {
|
||||
id = Parser.matchGroup1("[?&]v=([\\-a-zA-Z0-9_]{11})", url);
|
||||
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
|
||||
}
|
||||
}
|
||||
else if(url.contains("youtu.be")) {
|
||||
else if(lowercaseUrl.contains("youtu.be")) {
|
||||
if(url.contains("v=")) {
|
||||
id = Parser.matchGroup1("v=([\\-a-zA-Z0-9_]{11})", url);
|
||||
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
|
||||
} else {
|
||||
id = Parser.matchGroup1("youtu\\.be/([a-zA-Z0-9_-]{11})", url);
|
||||
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -82,12 +101,55 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the real url from a shared uri.
|
||||
*
|
||||
* Shared URI's look like this:
|
||||
* <pre>
|
||||
* * https://www.youtube.com/shared?ci=PJICrTByb3E
|
||||
* * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
|
||||
* </pre>
|
||||
* @param url The shared url
|
||||
* @return the id of the stream
|
||||
* @throws ParsingException
|
||||
*/
|
||||
private @NonNull String getRealIdFromSharedLink(String url) throws ParsingException {
|
||||
URI uri;
|
||||
try {
|
||||
uri = new URI(url);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new ParsingException("Invalid shared link", e);
|
||||
}
|
||||
String sharedId = getSharedId(uri);
|
||||
Downloader downloader = NewPipe.getDownloader();
|
||||
String content;
|
||||
try {
|
||||
content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId);
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Unable to resolve shared link", e);
|
||||
}
|
||||
// is this bad? is this fragile?:
|
||||
String realId = Parser.matchGroup1("rel=\"shortlink\" href=\"https://youtu.be/" + ID_PATTERN, content);
|
||||
if(sharedId.equals(realId)) {
|
||||
throw new ParsingException("Got same id for as shared id: " + sharedId);
|
||||
}
|
||||
return realId;
|
||||
}
|
||||
|
||||
private @NonNull String getSharedId(URI uri) throws ParsingException {
|
||||
if (!"/shared".equals(uri.getPath())) {
|
||||
throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")");
|
||||
}
|
||||
return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery());
|
||||
}
|
||||
|
||||
public String cleanUrl(String complexUrl) throws ParsingException {
|
||||
return getUrl(getId(complexUrl));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptUrl(String videoUrl) {
|
||||
videoUrl = videoUrl.toLowerCase();
|
||||
return videoUrl.contains("youtube") ||
|
||||
videoUrl.contains("youtu.be");
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.schabi.newpipe.extractor.stream_info;
|
||||
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.UrlIdHandler;
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
@ -60,7 +61,7 @@ public class StreamPreviewInfoCollector {
|
|||
if (urlIdHandler == null) {
|
||||
throw new ParsingException("Error: UrlIdHandler not set");
|
||||
} else if(!resultItem.webpage_url.isEmpty()) {
|
||||
resultItem.id = (new YoutubeStreamUrlIdHandler()).getId(resultItem.webpage_url);
|
||||
resultItem.id = NewPipe.getService(serviceId).getUrlIdHandlerInstance().getId(resultItem.webpage_url);
|
||||
}
|
||||
resultItem.title = extractor.getTitle();
|
||||
resultItem.stream_type = extractor.getStreamType();
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.youtube;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.schabi.newpipe.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.services.youtube.YoutubeStreamUrlIdHandler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.fail;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Test for {@link YoutubeStreamUrlIdHandler}
|
||||
*/
|
||||
public class YoutubeStreamUrlIdHandlerTest {
|
||||
private static String AD_URL = "https://googleads.g.doubleclick.net/aclk?sa=l&ai=C-2IPgeVTWPf4GcOStgfOnIOADf78n61GvKmmobYDrgIQASDj-5MDKAJg9ZXOgeAEoAGgy_T-A8gBAakC2gkpmquIsT6oAwGqBJMBT9BgD5kVgbN0dX602bFFaDw9vsxq-We-S8VkrXVBi6W_e7brZ36GCz1WO3EPEeklYuJjXLUowwCOKsd-8xr1UlS_tusuFJv9iX35xoBHKTRvs8-0aDbfEIm6in37QDfFuZjqgEMB8-tg0Jn_Pf1RU5OzbuU40B4Gy25NUTnOxhDKthOhKBUSZEksCEerUV8GMu10iAXCxquwApIFBggDEAEYAaAGGsgGlIjthrUDgAfItIsBqAemvhvYBwHSCAUIgGEQAbgT6AE&num=1&sig=AOD64_1DybDd4qAm5O7o9UAbTNRdqXXHFQ&ctype=21&video_id=dMO_IXYPZew&client=ca-pub-6219811747049371&adurl=http://www.youtube.com/watch%3Fv%3DdMO_IXYPZew";
|
||||
private YoutubeStreamUrlIdHandler urlIdHandler;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
urlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
|
||||
NewPipe.init(Downloader.getInstance());
|
||||
}
|
||||
|
||||
@Test(expected = NullPointerException.class)
|
||||
public void getIdWithNullAsUrl() throws ParsingException {
|
||||
urlIdHandler.getId(null);
|
||||
}
|
||||
|
||||
@Test(expected = FoundAdException.class)
|
||||
public void getIdForAd() throws ParsingException {
|
||||
urlIdHandler.getId(AD_URL);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getIdForInvalidUrls() throws ParsingException {
|
||||
List<String> invalidUrls = new ArrayList<>(50);
|
||||
invalidUrls.add("https://www.youtube.com/watch?v=jZViOEv90d");
|
||||
invalidUrls.add("https://www.youtube.com/watchjZViOEv90d");
|
||||
invalidUrls.add("https://www.youtube.com/");
|
||||
for(String invalidUrl: invalidUrls) {
|
||||
Throwable exception = null;
|
||||
try {
|
||||
urlIdHandler.getId(invalidUrl);
|
||||
} catch (ParsingException e) {
|
||||
exception = e;
|
||||
}
|
||||
if(exception == null) {
|
||||
fail("Expected ParsingException for url: " + invalidUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void getId() throws Exception {
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertEquals("W-fFHeTX70Q", urlIdHandler.getId("https://www.youtube.com/watch?v=W-fFHeTX70Q"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://youtu.be/jZViOEv90dI?t=9s"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("HTTPS://Youtu.be/jZViOEv90dI?t=9s"));
|
||||
assertEquals("uEJuoEs1UxY", urlIdHandler.getId("http://www.youtube.com/watch_popup?v=uEJuoEs1UxY"));
|
||||
assertEquals("uEJuoEs1UxY", urlIdHandler.getId("http://www.Youtube.com/watch_popup?v=uEJuoEs1UxY"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube.com/embed/jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("https://www.youtube-nocookie.com/embed/jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://youtu.be/jZViOEv90dI?t=9s"));
|
||||
assertEquals("7_WWz2DSnT8", urlIdHandler.getId("https://youtu.be/7_WWz2DSnT8"));
|
||||
assertEquals("oy6NvWeVruY", urlIdHandler.getId("https://m.youtube.com/watch?v=oy6NvWeVruY"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube.com/embed/jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.Youtube.com/embed/jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("http://www.youtube-nocookie.com/embed/jZViOEv90dI"));
|
||||
assertEquals("EhxJLojIE_o", urlIdHandler.getId("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertEquals("jZViOEv90dI", urlIdHandler.getId("vnd.youtube:jZViOEv90dI"));
|
||||
|
||||
// Shared links
|
||||
String sharedId = "7JIArTByb3E";
|
||||
String realId = "Q7JsK50NGaA";
|
||||
assertEquals(realId, urlIdHandler.getId("vnd.youtube://www.YouTube.com/shared?ci=" + sharedId + "&feature=twitter-deep-link"));
|
||||
assertEquals(realId, urlIdHandler.getId("vnd.youtube://www.youtube.com/shared?ci=" + sharedId ));
|
||||
assertEquals(realId, urlIdHandler.getId("https://www.youtube.com/shared?ci=7JIArTByb3E"));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAcceptUrl() {
|
||||
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
|
||||
assertTrue(urlIdHandler.acceptUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100"));
|
||||
assertTrue(urlIdHandler.acceptUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100"));
|
||||
assertTrue(urlIdHandler.acceptUrl("https://youtu.be/jZViOEv90dI?t=9s"));
|
||||
//assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/watch/jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/embed/jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("https://www.youtube-nocookie.com/embed/jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("http://youtu.be/jZViOEv90dI?t=9s"));
|
||||
assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/embed/jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("http://www.youtube-nocookie.com/embed/jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare"));
|
||||
assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
|
||||
assertTrue(urlIdHandler.acceptUrl("vnd.youtube:jZViOEv90dI"));
|
||||
|
||||
assertTrue(urlIdHandler.acceptUrl("vnd.youtube:jZViOEv90dI"));
|
||||
|
||||
String sharedId = "7JIArTByb3E";
|
||||
assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/shared?ci=" + sharedId + "&feature=twitter-deep-link"));
|
||||
assertTrue(urlIdHandler.acceptUrl("vnd.youtube://www.youtube.com/shared?ci=" + sharedId ));
|
||||
assertTrue(urlIdHandler.acceptUrl("https://www.youtube.com/shared?ci=7JIArTByb3E"));
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue