restructure parser
This commit is contained in:
parent
bad576c23d
commit
d097363b24
12 changed files with 365 additions and 274 deletions
|
@ -72,7 +72,7 @@ public class VideoItemDetailActivity extends AppCompatActivity {
|
|||
StreamingService[] serviceList = ServiceList.getServices();
|
||||
//VideoExtractor videoExtractor = null;
|
||||
for (int i = 0; i < serviceList.length; i++) {
|
||||
if (serviceList[i].acceptUrl(videoUrl)) {
|
||||
if (serviceList[i].getUrlIdHandler().acceptUrl(videoUrl)) {
|
||||
arguments.putInt(VideoItemDetailFragment.STREAMING_SERVICE, i);
|
||||
currentStreamingService = i;
|
||||
//videoExtractor = ServiceList.getService(i).getExtractorInstance();
|
||||
|
|
|
@ -112,7 +112,7 @@ public class VideoItemDetailFragment extends Fragment {
|
|||
public void run() {
|
||||
try {
|
||||
videoExtractor = service.getExtractorInstance(videoUrl, new Downloader());
|
||||
VideoInfo videoInfo = videoExtractor.getVideoInfo();
|
||||
VideoInfo videoInfo = VideoInfo.getVideoInfo(videoExtractor, new Downloader());
|
||||
h.post(new VideoResultReturnedRunnable(videoInfo));
|
||||
h.post(new SetThumbnailRunnable(
|
||||
//todo: make bitmaps not bypass tor
|
||||
|
|
101
app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java
Normal file
101
app/src/main/java/org/schabi/newpipe/crawler/DashMpdParser.java
Normal file
|
@ -0,0 +1,101 @@
|
|||
package org.schabi.newpipe.crawler;
|
||||
|
||||
import android.util.Xml;
|
||||
|
||||
import org.xmlpull.v1.XmlPullParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
* Created by Christian Schabesberger on 02.02.16.
|
||||
*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
* DashMpdParser.java is part of NewPipe.
|
||||
*
|
||||
* NewPipe is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* NewPipe is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
public class DashMpdParser {
|
||||
|
||||
static class DashMpdParsingException extends ParsingException {
|
||||
DashMpdParsingException(String message, Exception e) {
|
||||
super(message, e);
|
||||
}
|
||||
}
|
||||
|
||||
public static VideoInfo.AudioStream[] getAudioStreams(String dashManifestUrl,
|
||||
Downloader downloader)
|
||||
throws DashMpdParsingException {
|
||||
String dashDoc;
|
||||
try {
|
||||
dashDoc = downloader.download(dashManifestUrl);
|
||||
} catch(IOException ioe) {
|
||||
throw new DashMpdParsingException("Could not get dash mpd: " + dashManifestUrl, ioe);
|
||||
}
|
||||
Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
|
||||
try {
|
||||
XmlPullParser parser = Xml.newPullParser();
|
||||
parser.setInput(new StringReader(dashDoc));
|
||||
String tagName = "";
|
||||
String currentMimeType = "";
|
||||
int currentBandwidth = -1;
|
||||
int currentSamplingRate = -1;
|
||||
boolean currentTagIsBaseUrl = false;
|
||||
for(int eventType = parser.getEventType();
|
||||
eventType != XmlPullParser.END_DOCUMENT;
|
||||
eventType = parser.next() ) {
|
||||
switch(eventType) {
|
||||
case XmlPullParser.START_TAG:
|
||||
tagName = parser.getName();
|
||||
if(tagName.equals("AdaptationSet")) {
|
||||
currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
|
||||
} else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
|
||||
currentBandwidth = Integer.parseInt(
|
||||
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
|
||||
currentSamplingRate = Integer.parseInt(
|
||||
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
|
||||
} else if(tagName.equals("BaseURL")) {
|
||||
currentTagIsBaseUrl = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case XmlPullParser.TEXT:
|
||||
if(currentTagIsBaseUrl &&
|
||||
(currentMimeType.contains("audio"))) {
|
||||
int format = -1;
|
||||
if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
|
||||
format = MediaFormat.WEBMA.id;
|
||||
} else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
|
||||
format = MediaFormat.M4A.id;
|
||||
}
|
||||
audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
|
||||
format, currentBandwidth, currentSamplingRate));
|
||||
}
|
||||
break;
|
||||
case XmlPullParser.END_TAG:
|
||||
if(tagName.equals("AdaptationSet")) {
|
||||
currentMimeType = "";
|
||||
} else if(tagName.equals("BaseURL")) {
|
||||
currentTagIsBaseUrl = false;
|
||||
}//no break needed here
|
||||
}
|
||||
}
|
||||
} catch(Exception e) {
|
||||
throw new DashMpdParsingException("Could not parse Dash mpd", e);
|
||||
}
|
||||
return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package org.schabi.newpipe.crawler;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Created by Christian Schabesberger on 02.02.16.
|
||||
*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
* RegexHelper.java is part of NewPipe.
|
||||
*
|
||||
* NewPipe is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* NewPipe is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** avoid using regex !!! */
|
||||
public class RegexHelper {
|
||||
|
||||
public static class RegexException extends ParsingException {
|
||||
public RegexException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
public static String matchGroup1(String pattern, String input) throws RegexException {
|
||||
Pattern pat = Pattern.compile(pattern);
|
||||
Matcher mat = pat.matcher(input);
|
||||
boolean foundMatch = mat.find();
|
||||
if (foundMatch) {
|
||||
return mat.group(1);
|
||||
}
|
||||
else {
|
||||
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
|
||||
throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -27,11 +27,11 @@ public interface StreamingService {
|
|||
public String name = "";
|
||||
}
|
||||
ServiceInfo getServiceInfo();
|
||||
VideoExtractor getExtractorInstance(String url, Downloader downloader) throws IOException, CrawlingException;
|
||||
VideoExtractor getExtractorInstance(String url, Downloader downloader)
|
||||
throws IOException, CrawlingException;
|
||||
SearchEngine getSearchEngineInstance();
|
||||
|
||||
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
|
||||
Intent was meant to be watched with this Service.
|
||||
Return false if this service shall not allow to be called through ACTIONs.*/
|
||||
boolean acceptUrl(String videoUrl);
|
||||
UrlIdHandler getUrlIdHandler();
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
package org.schabi.newpipe.crawler;
|
||||
|
||||
/**
|
||||
* Created by Christian Schabesberger on 02.02.16.
|
||||
*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
* UrlIdHandler.java is part of NewPipe.
|
||||
*
|
||||
* NewPipe is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* NewPipe is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
public interface UrlIdHandler {
|
||||
String getVideoUrl(String videoId);
|
||||
String getVideoId(String siteUrl) throws ParsingException;
|
||||
String cleanUrl(String siteUrl) throws ParsingException;
|
||||
|
||||
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
|
||||
Intent was meant to be watched with this Service.
|
||||
Return false if this service shall not allow to be called through ACTIONs.*/
|
||||
boolean acceptUrl(String videoUrl);
|
||||
}
|
|
@ -20,14 +20,14 @@ package org.schabi.newpipe.crawler;
|
|||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
import java.net.URL;
|
||||
import java.util.Vector;
|
||||
|
||||
/**Scrapes information from a video streaming service (eg, YouTube).*/
|
||||
|
||||
|
||||
@SuppressWarnings("ALL")
|
||||
public abstract class VideoExtractor {
|
||||
public interface VideoExtractor {
|
||||
|
||||
public class ExctractorInitException extends CrawlingException {
|
||||
public ExctractorInitException() {}
|
||||
|
@ -42,13 +42,6 @@ public abstract class VideoExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
public class RegexException extends ParsingException {
|
||||
public RegexException() {}
|
||||
public RegexException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
public class ContentNotAvailableException extends ParsingException {
|
||||
public ContentNotAvailableException() {}
|
||||
public ContentNotAvailableException(String message) {
|
||||
|
@ -62,111 +55,6 @@ public abstract class VideoExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
protected final String pageUrl;
|
||||
protected VideoInfo videoInfo;
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public VideoExtractor(String url, Downloader dl) {
|
||||
this.pageUrl = url;
|
||||
}
|
||||
|
||||
/**Fills out the video info fields which are common to all services.
|
||||
* Probably needs to be overridden by subclasses*/
|
||||
public VideoInfo getVideoInfo() throws CrawlingException
|
||||
{
|
||||
if(videoInfo == null) {
|
||||
videoInfo = new VideoInfo();
|
||||
}
|
||||
|
||||
if(videoInfo.webpage_url.isEmpty()) {
|
||||
videoInfo.webpage_url = pageUrl;
|
||||
}
|
||||
|
||||
|
||||
if (videoInfo.title.isEmpty()) {
|
||||
videoInfo.title = getTitle();
|
||||
}
|
||||
|
||||
if (videoInfo.duration < 1) {
|
||||
videoInfo.duration = getLength();
|
||||
}
|
||||
|
||||
|
||||
if (videoInfo.uploader.isEmpty()) {
|
||||
videoInfo.uploader = getUploader();
|
||||
}
|
||||
|
||||
if (videoInfo.description.isEmpty()) {
|
||||
videoInfo.description = getDescription();
|
||||
}
|
||||
|
||||
if (videoInfo.view_count == -1) {
|
||||
videoInfo.view_count = getViews();
|
||||
}
|
||||
|
||||
if (videoInfo.upload_date.isEmpty()) {
|
||||
videoInfo.upload_date = getUploadDate();
|
||||
}
|
||||
|
||||
if (videoInfo.thumbnail_url.isEmpty()) {
|
||||
videoInfo.thumbnail_url = getThumbnailUrl();
|
||||
}
|
||||
|
||||
if (videoInfo.id.isEmpty()) {
|
||||
videoInfo.id = getVideoId(pageUrl);
|
||||
}
|
||||
|
||||
/** Load and extract audio*/
|
||||
if (videoInfo.audioStreams == null) {
|
||||
videoInfo.audioStreams = getAudioStreams();
|
||||
}
|
||||
/** Extract video stream url*/
|
||||
if (videoInfo.videoStreams == null) {
|
||||
videoInfo.videoStreams = getVideoStreams();
|
||||
}
|
||||
|
||||
if (videoInfo.uploader_thumbnail_url.isEmpty()) {
|
||||
videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl();
|
||||
}
|
||||
|
||||
if (videoInfo.startPosition < 0) {
|
||||
videoInfo.startPosition = getTimeStamp();
|
||||
}
|
||||
|
||||
if(videoInfo.dashMpdUrl.isEmpty()) {
|
||||
videoInfo.dashMpdUrl = getDashMpdUrl();
|
||||
}
|
||||
|
||||
if(videoInfo.average_rating.isEmpty()) {
|
||||
videoInfo.average_rating = getAverageRating();
|
||||
}
|
||||
|
||||
if(videoInfo.like_count == -1) {
|
||||
videoInfo.like_count = getLikeCount();
|
||||
}
|
||||
|
||||
if(videoInfo.dislike_count == -1) {
|
||||
videoInfo.dislike_count = getDislikeCount();
|
||||
}
|
||||
|
||||
if(videoInfo.nextVideo == null) {
|
||||
videoInfo.nextVideo = getNextVideo();
|
||||
}
|
||||
|
||||
if(videoInfo.relatedVideos == null) {
|
||||
videoInfo.relatedVideos = getRelatedVideos();
|
||||
}
|
||||
|
||||
//Bitmap thumbnail = null;
|
||||
//Bitmap uploader_thumbnail = null;
|
||||
//int videoAvailableStatus = VIDEO_AVAILABLE;
|
||||
return videoInfo;
|
||||
}
|
||||
|
||||
//todo: remove these functions, or make them static, otherwise its useles, to have them here
|
||||
public abstract String getVideoUrl(String videoId);
|
||||
public abstract String getVideoId(String siteUrl) throws ParsingException;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
public abstract int getTimeStamp() throws ParsingException;
|
||||
public abstract String getTitle() throws ParsingException;
|
||||
public abstract String getDescription() throws ParsingException;
|
||||
|
@ -185,4 +73,6 @@ public abstract class VideoExtractor {
|
|||
public abstract int getDislikeCount() throws ParsingException;
|
||||
public abstract VideoPreviewInfo getNextVideo() throws ParsingException;
|
||||
public abstract Vector<VideoPreviewInfo> getRelatedVideos() throws ParsingException;
|
||||
public abstract UrlIdHandler getUrlIdConverter();
|
||||
public abstract String getPageUrl();
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.schabi.newpipe.crawler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -26,8 +27,52 @@ import java.util.List;
|
|||
@SuppressWarnings("ALL")
|
||||
public class VideoInfo extends AbstractVideoInfo {
|
||||
|
||||
/**Fills out the video info fields which are common to all services.
|
||||
* Probably needs to be overridden by subclasses*/
|
||||
public static VideoInfo getVideoInfo(VideoExtractor extractor, Downloader downloader)
|
||||
throws CrawlingException, IOException {
|
||||
VideoInfo videoInfo = new VideoInfo();
|
||||
|
||||
UrlIdHandler uiconv = extractor.getUrlIdConverter();
|
||||
|
||||
videoInfo.webpage_url = extractor.getPageUrl();
|
||||
videoInfo.title = extractor.getTitle();
|
||||
videoInfo.duration = extractor.getLength();
|
||||
videoInfo.uploader = extractor.getUploader();
|
||||
videoInfo.description = extractor.getDescription();
|
||||
videoInfo.view_count = extractor.getViews();
|
||||
videoInfo.upload_date = extractor.getUploadDate();
|
||||
videoInfo.thumbnail_url = extractor.getThumbnailUrl();
|
||||
videoInfo.id = uiconv.getVideoId(extractor.getPageUrl());
|
||||
videoInfo.dashMpdUrl = extractor.getDashMpdUrl();
|
||||
/** Load and extract audio*/
|
||||
videoInfo.audioStreams = extractor.getAudioStreams();
|
||||
if(videoInfo.dashMpdUrl != null && !videoInfo.dashMpdUrl.isEmpty()) {
|
||||
if(videoInfo.audioStreams == null || videoInfo.audioStreams.length == 0) {
|
||||
videoInfo.audioStreams =
|
||||
DashMpdParser.getAudioStreams(videoInfo.dashMpdUrl, downloader);
|
||||
}
|
||||
}
|
||||
/** Extract video stream url*/
|
||||
videoInfo.videoStreams = extractor.getVideoStreams();
|
||||
videoInfo.uploader_thumbnail_url = extractor.getUploaderThumbnailUrl();
|
||||
videoInfo.startPosition = extractor.getTimeStamp();
|
||||
videoInfo.average_rating = extractor.getAverageRating();
|
||||
videoInfo.like_count = extractor.getLikeCount();
|
||||
videoInfo.dislike_count = extractor.getDislikeCount();
|
||||
videoInfo.nextVideo = extractor.getNextVideo();
|
||||
videoInfo.relatedVideos = extractor.getRelatedVideos();
|
||||
|
||||
//Bitmap thumbnail = null;
|
||||
//Bitmap uploader_thumbnail = null;
|
||||
//int videoAvailableStatus = VIDEO_AVAILABLE;
|
||||
return videoInfo;
|
||||
}
|
||||
|
||||
|
||||
public String uploader_thumbnail_url = "";
|
||||
public String description = "";
|
||||
/*todo: make this lists over vectors*/
|
||||
public VideoStream[] videoStreams = null;
|
||||
public AudioStream[] audioStreams = null;
|
||||
// video streams provided by the dash mpd do not need to be provided as VideoStream.
|
||||
|
|
|
@ -52,7 +52,8 @@ public class YoutubeSearchEngine implements SearchEngine {
|
|||
private static final String TAG = YoutubeSearchEngine.class.toString();
|
||||
|
||||
@Override
|
||||
public Result search(String query, int page, String languageCode, Downloader downloader) throws IOException, ParsingException {
|
||||
public Result search(String query, int page, String languageCode, Downloader downloader)
|
||||
throws IOException, ParsingException {
|
||||
Result result = new Result();
|
||||
Uri.Builder builder = new Uri.Builder();
|
||||
builder.scheme("https")
|
||||
|
@ -171,7 +172,8 @@ public class YoutubeSearchEngine implements SearchEngine {
|
|||
|
||||
try {
|
||||
dBuilder = dbFactory.newDocumentBuilder();
|
||||
doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("utf-8"))));
|
||||
doc = dBuilder.parse(new InputSource(
|
||||
new ByteArrayInputStream(response.getBytes("utf-8"))));
|
||||
doc.getDocumentElement().normalize();
|
||||
} catch (ParserConfigurationException | SAXException | IOException e) {
|
||||
e.printStackTrace();
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.schabi.newpipe.crawler.services.youtube;
|
|||
import org.schabi.newpipe.crawler.CrawlingException;
|
||||
import org.schabi.newpipe.crawler.Downloader;
|
||||
import org.schabi.newpipe.crawler.StreamingService;
|
||||
import org.schabi.newpipe.crawler.UrlIdHandler;
|
||||
import org.schabi.newpipe.crawler.VideoExtractor;
|
||||
import org.schabi.newpipe.crawler.SearchEngine;
|
||||
|
||||
|
@ -37,8 +38,10 @@ public class YoutubeService implements StreamingService {
|
|||
return serviceInfo;
|
||||
}
|
||||
@Override
|
||||
public VideoExtractor getExtractorInstance(String url, Downloader downloader) throws CrawlingException, IOException {
|
||||
if(acceptUrl(url)) {
|
||||
public VideoExtractor getExtractorInstance(String url, Downloader downloader)
|
||||
throws CrawlingException, IOException {
|
||||
UrlIdHandler urlIdHandler = new YoutubeUrlIdHandler();
|
||||
if(urlIdHandler.acceptUrl(url)) {
|
||||
return new YoutubeVideoExtractor(url, downloader) ;
|
||||
}
|
||||
else {
|
||||
|
@ -49,9 +52,9 @@ public class YoutubeService implements StreamingService {
|
|||
public SearchEngine getSearchEngineInstance() {
|
||||
return new YoutubeSearchEngine();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptUrl(String videoUrl) {
|
||||
return videoUrl.contains("youtube") ||
|
||||
videoUrl.contains("youtu.be");
|
||||
public UrlIdHandler getUrlIdHandler() {
|
||||
return new YoutubeUrlIdHandler();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
package org.schabi.newpipe.crawler.services.youtube;
|
||||
|
||||
import org.schabi.newpipe.crawler.ParsingException;
|
||||
import org.schabi.newpipe.crawler.RegexHelper;
|
||||
import org.schabi.newpipe.crawler.UrlIdHandler;
|
||||
|
||||
/**
|
||||
* Created by Christian Schabesberger on 02.02.16.
|
||||
*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
* YoutubeUrlIdHandler.java is part of NewPipe.
|
||||
*
|
||||
* NewPipe is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* NewPipe is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
public class YoutubeUrlIdHandler implements UrlIdHandler {
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
@Override
|
||||
public String getVideoUrl(String videoId) {
|
||||
return "https://www.youtube.com/watch?v=" + videoId;
|
||||
}
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
@Override
|
||||
public String getVideoId(String url) throws ParsingException {
|
||||
String id;
|
||||
String pat;
|
||||
|
||||
if(url.contains("youtube")) {
|
||||
pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
|
||||
}
|
||||
else if(url.contains("youtu.be")) {
|
||||
pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
|
||||
}
|
||||
else {
|
||||
throw new ParsingException("Error no suitable url: " + url);
|
||||
}
|
||||
|
||||
id = RegexHelper.matchGroup1(pat, url);
|
||||
if(!id.isEmpty()){
|
||||
//Log.i(TAG, "string \""+url+"\" matches!");
|
||||
return id;
|
||||
} else {
|
||||
throw new ParsingException("Error could not parse url: " + url);
|
||||
}
|
||||
}
|
||||
|
||||
public String cleanUrl(String complexUrl) throws ParsingException {
|
||||
return getVideoUrl(getVideoId(complexUrl));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptUrl(String videoUrl) {
|
||||
return videoUrl.contains("youtube") ||
|
||||
videoUrl.contains("youtu.be");
|
||||
}
|
||||
}
|
|
@ -15,6 +15,8 @@ import org.mozilla.javascript.ScriptableObject;
|
|||
import org.schabi.newpipe.crawler.CrawlingException;
|
||||
import org.schabi.newpipe.crawler.Downloader;
|
||||
import org.schabi.newpipe.crawler.ParsingException;
|
||||
import org.schabi.newpipe.crawler.RegexHelper;
|
||||
import org.schabi.newpipe.crawler.UrlIdHandler;
|
||||
import org.schabi.newpipe.crawler.VideoExtractor;
|
||||
import org.schabi.newpipe.crawler.MediaFormat;
|
||||
import org.schabi.newpipe.crawler.VideoInfo;
|
||||
|
@ -25,11 +27,8 @@ import java.io.IOException;
|
|||
import java.io.StringReader;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Vector;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Created by Christian Schabesberger on 06.08.15.
|
||||
|
@ -51,7 +50,7 @@ import java.util.regex.Pattern;
|
|||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
public class YoutubeVideoExtractor extends VideoExtractor {
|
||||
public class YoutubeVideoExtractor implements VideoExtractor {
|
||||
|
||||
public class DecryptException extends ParsingException {
|
||||
DecryptException(Throwable cause) {
|
||||
|
@ -75,7 +74,6 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
private static final String TAG = YoutubeVideoExtractor.class.toString();
|
||||
private final Document doc;
|
||||
private JSONObject playerArgs;
|
||||
private String errorMessage = "";
|
||||
|
||||
// static values
|
||||
private static final String DECRYPTION_FUNC_NAME="decrypt";
|
||||
|
@ -83,23 +81,27 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
// cached values
|
||||
private static volatile String decryptionCode = "";
|
||||
|
||||
UrlIdHandler urlidhandler = new YoutubeUrlIdHandler();
|
||||
String pageUrl = "";
|
||||
|
||||
private Downloader downloader;
|
||||
|
||||
public YoutubeVideoExtractor(String pageUrl, Downloader dl) throws CrawlingException, IOException {
|
||||
//most common videoInfo fields are now set in our superclass, for all services
|
||||
super(pageUrl, dl);
|
||||
downloader = dl;
|
||||
String pageContent = downloader.download(cleanUrl(pageUrl));
|
||||
this.pageUrl = urlidhandler.cleanUrl(pageUrl);
|
||||
String pageContent = downloader.download(this.pageUrl);
|
||||
doc = Jsoup.parse(pageContent, pageUrl);
|
||||
String ytPlayerConfigRaw;
|
||||
JSONObject ytPlayerConfig;
|
||||
|
||||
//attempt to load the youtube js player JSON arguments
|
||||
try {
|
||||
ytPlayerConfigRaw = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
|
||||
ytPlayerConfigRaw =
|
||||
RegexHelper.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
|
||||
ytPlayerConfig = new JSONObject(ytPlayerConfigRaw);
|
||||
playerArgs = ytPlayerConfig.getJSONObject("args");
|
||||
} catch (RegexException e) {
|
||||
} catch (RegexHelper.RegexException e) {
|
||||
String errorReason = findErrorReason(doc);
|
||||
switch(errorReason) {
|
||||
case "GEMA":
|
||||
|
@ -233,7 +235,16 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
@Override
|
||||
public String getDashMpdUrl() throws ParsingException {
|
||||
try {
|
||||
return playerArgs.getString("dashmpd");
|
||||
String dashManifest = playerArgs.getString("dashmpd");
|
||||
if(!dashManifest.contains("/signature/")) {
|
||||
String encryptedSig = RegexHelper.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
|
||||
String decryptedSig;
|
||||
|
||||
decryptedSig = decryptSignature(encryptedSig, decryptionCode);
|
||||
dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
|
||||
}
|
||||
|
||||
return dashManifest;
|
||||
} catch(NullPointerException e) {
|
||||
throw new ParsingException(
|
||||
"Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e);
|
||||
|
@ -244,15 +255,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
|
||||
@Override
|
||||
public VideoInfo.AudioStream[] getAudioStreams() throws ParsingException {
|
||||
try {
|
||||
String dashManifest = playerArgs.getString("dashmpd");
|
||||
return parseDashManifest(dashManifest, decryptionCode);
|
||||
} catch (NullPointerException e) {
|
||||
throw new ParsingException(
|
||||
"Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).", e);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException(e);
|
||||
}
|
||||
/* If we provide a valid dash manifest, we don't need to provide audio streams extra */
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -300,37 +304,6 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);
|
||||
}
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
@Override
|
||||
public String getVideoId(String url) throws ParsingException {
|
||||
String id;
|
||||
String pat;
|
||||
|
||||
if(url.contains("youtube")) {
|
||||
pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
|
||||
}
|
||||
else if(url.contains("youtu.be")) {
|
||||
pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
|
||||
}
|
||||
else {
|
||||
throw new ParsingException("Error no suitable url: " + url);
|
||||
}
|
||||
|
||||
id = matchGroup1(pat, url);
|
||||
if(!id.isEmpty()){
|
||||
//Log.i(TAG, "string \""+url+"\" matches!");
|
||||
return id;
|
||||
} else {
|
||||
throw new ParsingException("Error could not parse url: " + url);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
@Override
|
||||
public String getVideoUrl(String videoId) {
|
||||
return "https://www.youtube.com/watch?v=" + videoId;
|
||||
}
|
||||
|
||||
/**Attempts to parse (and return) the offset to start playing the video from.
|
||||
* @return the offset (in seconds), or 0 if no timestamp is found.*/
|
||||
@Override
|
||||
|
@ -338,8 +311,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
//todo: add unit test for timestamp
|
||||
String timeStamp;
|
||||
try {
|
||||
timeStamp = matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
|
||||
} catch (RegexException e) {
|
||||
timeStamp = RegexHelper.matchGroup1("((#|&|\\?)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
|
||||
} catch (RegexHelper.RegexException e) {
|
||||
// catch this instantly since an url does not necessarily have to have a time stamp
|
||||
|
||||
// -2 because well the testing system will then know its the regex that failed :/
|
||||
|
@ -354,15 +327,15 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
String minutesString = "";
|
||||
String hoursString = "";
|
||||
try {
|
||||
secondsString = matchGroup1("(\\d{1,3})s", timeStamp);
|
||||
minutesString = matchGroup1("(\\d{1,3})m", timeStamp);
|
||||
hoursString = matchGroup1("(\\d{1,3})h", timeStamp);
|
||||
secondsString = RegexHelper.matchGroup1("(\\d{1,3})s", timeStamp);
|
||||
minutesString = RegexHelper.matchGroup1("(\\d{1,3})m", timeStamp);
|
||||
hoursString = RegexHelper.matchGroup1("(\\d{1,3})h", timeStamp);
|
||||
} catch (Exception e) {
|
||||
//it could be that time is given in another method
|
||||
if (secondsString.isEmpty() //if nothing was got,
|
||||
&& minutesString.isEmpty()//treat as unlabelled seconds
|
||||
&& hoursString.isEmpty()) {
|
||||
secondsString = matchGroup1("t=(\\d{1,3})", timeStamp);
|
||||
secondsString = RegexHelper.matchGroup1("t=(\\d{1,3})", timeStamp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -455,72 +428,14 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) throws RegexException, DecryptException {
|
||||
if(!dashManifest.contains("/signature/")) {
|
||||
String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
|
||||
String decryptedSig;
|
||||
@Override
|
||||
public UrlIdHandler getUrlIdConverter() {
|
||||
return new YoutubeUrlIdHandler();
|
||||
}
|
||||
|
||||
decryptedSig = decryptSignature(encryptedSig, decryptoinCode);
|
||||
dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
|
||||
}
|
||||
String dashDoc;
|
||||
try {
|
||||
dashDoc = downloader.download(dashManifest);
|
||||
} catch(IOException ioe) {
|
||||
throw new DecryptException("Could not get dash mpd", ioe);
|
||||
}
|
||||
Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
|
||||
try {
|
||||
XmlPullParser parser = Xml.newPullParser();
|
||||
parser.setInput(new StringReader(dashDoc));
|
||||
String tagName = "";
|
||||
String currentMimeType = "";
|
||||
int currentBandwidth = -1;
|
||||
int currentSamplingRate = -1;
|
||||
boolean currentTagIsBaseUrl = false;
|
||||
for(int eventType = parser.getEventType();
|
||||
eventType != XmlPullParser.END_DOCUMENT;
|
||||
eventType = parser.next() ) {
|
||||
switch(eventType) {
|
||||
case XmlPullParser.START_TAG:
|
||||
tagName = parser.getName();
|
||||
if(tagName.equals("AdaptationSet")) {
|
||||
currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
|
||||
} else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
|
||||
currentBandwidth = Integer.parseInt(
|
||||
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
|
||||
currentSamplingRate = Integer.parseInt(
|
||||
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
|
||||
} else if(tagName.equals("BaseURL")) {
|
||||
currentTagIsBaseUrl = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case XmlPullParser.TEXT:
|
||||
if(currentTagIsBaseUrl &&
|
||||
(currentMimeType.contains("audio"))) {
|
||||
int format = -1;
|
||||
if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
|
||||
format = MediaFormat.WEBMA.id;
|
||||
} else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
|
||||
format = MediaFormat.M4A.id;
|
||||
}
|
||||
audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
|
||||
format, currentBandwidth, currentSamplingRate));
|
||||
}
|
||||
//missing break here?
|
||||
case XmlPullParser.END_TAG:
|
||||
if(tagName.equals("AdaptationSet")) {
|
||||
currentMimeType = "";
|
||||
} else if(tagName.equals("BaseURL")) {
|
||||
currentTagIsBaseUrl = false;
|
||||
}//no break needed here
|
||||
}
|
||||
}
|
||||
} catch(Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
|
||||
@Override
|
||||
public String getPageUrl() {
|
||||
return pageUrl;
|
||||
}
|
||||
|
||||
/**Provides information about links to other videos on the video page, such as related videos.
|
||||
|
@ -533,7 +448,7 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
info.webpage_url = li.select("a.content-link").first()
|
||||
.attr("abs:href");
|
||||
|
||||
info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
|
||||
info.id = RegexHelper.matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
|
||||
|
||||
//todo: check NullPointerException causing
|
||||
info.title = li.select("span.title").first().text();
|
||||
|
@ -584,15 +499,20 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
try {
|
||||
String playerCode = downloader.download(playerUrl);
|
||||
|
||||
decryptionFuncName = matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
|
||||
decryptionFuncName =
|
||||
RegexHelper.matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
|
||||
|
||||
String functionPattern = "(" + decryptionFuncName.replace("$", "\\$") + "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
|
||||
decryptionFunc = "var " + matchGroup1(functionPattern, playerCode) + ";";
|
||||
String functionPattern = "("
|
||||
+ decryptionFuncName.replace("$", "\\$")
|
||||
+ "=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
|
||||
decryptionFunc = "var " + RegexHelper.matchGroup1(functionPattern, playerCode) + ";";
|
||||
|
||||
helperObjectName = matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
|
||||
helperObjectName = RegexHelper
|
||||
.matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
|
||||
|
||||
String helperPattern = "(var " + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
|
||||
helperObject = matchGroup1(helperPattern, playerCode);
|
||||
String helperPattern = "(var "
|
||||
+ helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
|
||||
helperObject = RegexHelper.matchGroup1(helperPattern, playerCode);
|
||||
|
||||
|
||||
callerFunc = callerFunc.replace("%%", decryptionFuncName);
|
||||
|
@ -624,25 +544,8 @@ public class YoutubeVideoExtractor extends VideoExtractor {
|
|||
return (result == null ? "" : result.toString());
|
||||
}
|
||||
|
||||
private String cleanUrl(String complexUrl) throws ParsingException {
|
||||
return getVideoUrl(getVideoId(complexUrl));
|
||||
}
|
||||
|
||||
private String matchGroup1(String pattern, String input) throws RegexException {
|
||||
Pattern pat = Pattern.compile(pattern);
|
||||
Matcher mat = pat.matcher(input);
|
||||
boolean foundMatch = mat.find();
|
||||
if (foundMatch) {
|
||||
return mat.group(1);
|
||||
}
|
||||
else {
|
||||
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
|
||||
throw new RegexException("failed to find pattern \""+pattern+" inside of "+input+"\"");
|
||||
}
|
||||
}
|
||||
|
||||
private String findErrorReason(Document doc) {
|
||||
errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
|
||||
String errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
|
||||
if(errorMessage.contains("GEMA")) {
|
||||
// Gema sometimes blocks youtube music content in germany:
|
||||
// https://www.gema.de/en/
|
||||
|
|
Loading…
Add table
Reference in a new issue