-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #53 from dotnettools/blackwidow
Blackwidow
- Loading branch information
Showing
145 changed files
with
5,648 additions
and
184 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<img src="../assets/blackwidow-logo-text.png" alt="SharpGrabber" height="128" /> | ||
|
||
# BlackWidow | ||
|
||
BlackWidow is a .NET library based on SharpGrabber. Rather than relying on .NET assemblies, BlackWidow executes scripts written specifically for grabbing. | ||
|
||
## Why use BlackWidow? | ||
BlackWidow gives you the following advantages over the traditional NuGet package approach: | ||
|
||
- **Always Up-to-date:** The scripts are always kept up-to-date at runtime; so the functionality of the host application won't break as the sources change - at least not for long! | ||
- **ECMAScript Support:** Supports JavaScript/ECMAScript out of the box. | ||
- **Easy Maintenance:** *JavaScript* is darn easy to write and understand! This helps contributors to quickly write new grabbers or fix the existing ones. | ||
- **Secure**: The scripts are executed in a sandbox environment, and they only have access to what the BlackWidow API exposes to them. | ||
- **Highly Customizable:** Almost everything is open for extension or replacement. Make new script interpreters, custom grabber repositories, or roll out your own interpreter APIs | ||
|
||
## How does it work? | ||
|
||
BlackWidow keeps a collection of scripts locally - called the local repository. | ||
Each script gets interpreted as an object implementing `IGrabber`. | ||
To keep the scripts up-to-date, a remote repository is constantly monitored as the single source of truth. | ||
|
||
*TODO:* <a href="https://github.com/dotnettools/SharpGrabber">Read the Documentation</a> | ||
|
||
# Installation | ||
*WIP* | ||
|
||
<a href="https://github.com/dotnettools/SharpGrabber"><- Back to Home Page</a> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"scripts": [ | ||
{ | ||
"id": "vimeo.com", | ||
"name": "Vimeo", | ||
"version": "1.0", | ||
"type": "JavaScript", | ||
"apiVersion": 1, | ||
"supportedRegularExpressions": [ "^https?://(www\\.|player\\.)?vimeo\\.com/(video/)?([0-9]+)" ], | ||
"file": "scripts/vimeo.js" | ||
}, | ||
{ | ||
"id": "pornhub.com", | ||
"name": "PornHub", | ||
"version": "1.0", | ||
"type": "JavaScript", | ||
"apiVersion": 1, | ||
"supportedRegularExpressions": [ "^(https?:\\/\\/)?(www\\.)?pornhub\\.com\\/([^\\/]+)viewkey=(\\w+).*$" ], | ||
"file": "scripts/pornhub.js" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
const urlMatcher = /^(https?:\/\/)?(www\.)?pornhub\.com\/([^\/]+)viewkey=(\w+).*$/i | ||
const flashVarsFinder = /^\s*(var|let)\s+(flashvars[\w_]+)\s+=/mi | ||
|
||
const getViewId = uri => { | ||
const url = new URL(uri) | ||
const match = urlMatcher.exec(uri) | ||
if (!match) | ||
return undefined | ||
return match[4] | ||
} | ||
|
||
const getStdUrl = url => { | ||
return `https://www.pornhub.com/view_video.php?viewkey=${url}` | ||
} | ||
|
||
const parseFlashVarsScript = doc => { | ||
let source | ||
let varName | ||
doc.selectAll('script').forEach(elem => { | ||
const match = flashVarsFinder.exec(elem.innerText) | ||
if (match) { | ||
source = elem.innerText | ||
varName = match[2] | ||
} | ||
}) | ||
|
||
const flashVars = new Function('let playerObjList = {};'+source + ';return '+varName+';')() | ||
if (!flashVars) | ||
throw new GrabException('Could not extract flashVars.') | ||
return flashVars | ||
} | ||
|
||
const updateResult = (result, vars) => { | ||
const parseBool = str => typeof str === 'boolean' ? str : new Function('return ' + str)(); | ||
|
||
if (parseBool(vars.video_unavailable)) | ||
throw new GrabException('This video is unavailable.') | ||
if (parseBool(vars.video_unavailable_country)) | ||
throw new GrabException('This video is unavailable in your country.') | ||
|
||
const duration = vars.video_duration * 1000 // milliseconds | ||
|
||
result.title = vars.video_title | ||
|
||
result.grab('info', { | ||
length: duration | ||
}) | ||
|
||
result.grab('image', { | ||
resourceUri: vars.image_url, | ||
type: 'primary' | ||
}) | ||
|
||
vars.mediaDefinitions.forEach(def => { | ||
if (!def.quality || def.remote || !def.videoUrl) | ||
return | ||
|
||
if (def.format === 'hls') { | ||
// grab HLS stream | ||
if (Array.isArray(def.quality)) { | ||
result.grab('hlsStreamReference', { | ||
resourceUri: def.videoUrl, | ||
playlistType: 'master', | ||
resolution: def.quality.join(',') | ||
}) | ||
} else { | ||
result.grab('hlsStreamReference', { | ||
resourceUri: def.videoUrl, | ||
playlistType: 'stream', | ||
resolution: def.quality | ||
}) | ||
} | ||
} else { | ||
// grab mp4 video | ||
result.grab('media', { | ||
resourceUri: def.videoUrl, | ||
format: { | ||
mime: 'video/mp4', | ||
extension: 'mp4', | ||
channels: 'both', | ||
length: duration, | ||
container: 'mp4,' | ||
resolution: def.quality, | ||
formatTitle: 'MP4 ' + def.quality, | ||
} | ||
}) | ||
} | ||
}) | ||
} | ||
|
||
grabber.supports = uri => { | ||
return getViewId(uri) !== undefined | ||
} | ||
|
||
grabber.grab = (request, result) => { | ||
|
||
// init | ||
const viewId = getViewId(request.url) | ||
if (!viewId) | ||
return false | ||
|
||
// download page | ||
const url = getStdUrl(viewId) | ||
const response = http.client.get({ | ||
url | ||
}) | ||
response.assertSuccess() | ||
|
||
// parse response HTML | ||
const doc = html.parse(response.bodyText) | ||
const flashVars = parseFlashVarsScript(doc) | ||
updateResult(result, flashVars) | ||
|
||
return true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
const urlRegex = /^https?:\/\/(www\.|player\.)?vimeo\.com\/(video\/)?([0-9]+)/ | ||
|
||
function getVideoId(url) { | ||
const match = urlRegex.exec(url) | ||
return match ? match[3] : undefined | ||
} | ||
|
||
function getConfigUrl(videoId) { | ||
return 'https://player.vimeo.com/video/{0}/config'.replace('{0}', videoId) | ||
} | ||
|
||
function fetchConfig(videoId) { | ||
const url = getConfigUrl(videoId) | ||
const response = http.client.get({ | ||
url, | ||
expectText: true | ||
}) | ||
response.assertSuccess() | ||
return JSON.parse(response.bodyText) | ||
} | ||
|
||
function setGrabResult(result, config) { | ||
if (!config.request.files) | ||
throw new GrabException('Video is unavailable.') | ||
|
||
// add info | ||
result.title = config.video.title | ||
result.grab('info', { | ||
author: config.video.owner?.name, | ||
length: config.video.duration * 1000, | ||
}) | ||
|
||
// add images | ||
if (config.video.thumbs) { | ||
for (var key in config.video.thumbs) { | ||
const isBase = Number.isNaN(Number(key)) | ||
const size = isBase ? undefined : { | ||
width: key, | ||
height: key * 0.5625 | ||
}; | ||
result.grab('image', { | ||
resourceUri: config.video.thumbs[key], | ||
type: isBase ? 'primary' : 'thumbnail', | ||
size | ||
}) | ||
} | ||
} | ||
|
||
// add media | ||
config.request.files.progressive.forEach(file => { | ||
const fileMime = file.mime || 'video/mp4' | ||
const fileExt = mime.getExtension(fileMime) | ||
const containerName = fileExt.toUpperCase() | ||
result.grab('media', { | ||
resourceUri: file.url, | ||
channels: 'both', | ||
container: containerName, | ||
resolution: file.quality, | ||
formatTitle: containerName + ' ' + file.quality, | ||
pixelWidth: file.width, | ||
pixelHeight: file.height, | ||
format: { | ||
mime: fileMime, | ||
extension: fileExt | ||
} | ||
}) | ||
}) | ||
} | ||
|
||
grabber.supports = url => Boolean(getVideoId(url)) | ||
|
||
grabber.grab = (request, result) => { | ||
const videoId = getVideoId(request.url) | ||
if (!videoId) | ||
return false | ||
|
||
const config = fetchConfig(videoId) | ||
if (!config) | ||
throw new GrabException('Failed to fetch video config.') | ||
|
||
setGrabResult(result, config) | ||
return true | ||
} |
Oops, something went wrong.