diff --git a/README.md b/README.md index d4591fb..9db1574 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,10 @@ [![NuGet download count](https://img.shields.io/nuget/dt/SharpGrabber)](https://www.nuget.org/packages/SharpGrabber) This repository contains multiple related projects: -- `SharpGrabber` is a *.NET Standard* library for scraping top media providers and grabbing high quality video, audio and information. -- `SharpGrabber.Converter` is a *.NET Standard* library based on `ffmpeg` shared libraries to join audio and video streams. This is particularly useful when grabbing high quality *YouTube* media that might be separated into audio and video files. It is also used for merging HLS stream segments. -- `SharpGrabber.Desktop` A cross-platform desktop application which utilizes both mentioned libraries to expose their functionality to desktop end-users. +- `SharpGrabber` is a *.NET Standard* library for scraping top media providers and grabbing high quality video, audio and information. +- `SharpGrabber.Converter` is a *.NET Standard* library based on `ffmpeg` shared libraries to join audio and video streams. This is particularly useful when grabbing high quality *YouTube* media that might be separated into audio and video files. It is also used for merging HLS stream segments. +- `SharpGrabber.BlackWidow` is a *.NET Standard* library for grabbing with JavaScript, which has many advantages over using scattered NuGet packages. +- `SharpGrabber.Desktop` A cross-platform desktop application which utilizes all three libraries mentioned above to expose their functionality to desktop end-users. # How to Use **⭐ Please give a star if you find this project useful!** @@ -24,7 +25,7 @@ This repository contains multiple related projects: The `SharpGrabber` package defines abstractions only. The actual grabbers have their own packages and should be installed separately. ### SharpGrabber - Core Package - Install-Package SharpGrabber -Version 2.0.2 + Install-Package SharpGrabber -Version 2.1 ### SharpGrabber.Converter It's an optional package to work with media files. Using this package, you can easily concatenate video segments, or mux audio and video channels. @@ -95,9 +96,10 @@ The good news is no functionality has been removed, so with a minor refactoring, I strongly recommend that you upgrade, v2 has a much cleaner structure and code. - -## SharpGrabber.Desktop 3.3 -- It uses every package mentioned above and supports all of the mentioned providers! + +## SharpGrabber.Desktop +### Version 3.3 +- Grabs from every source supported by official grabbers. - Displays information and downloads videos, audios, images etc. - Merges YouTube separated audio and video streams into complete media files. It can join HLS segments as well! @@ -111,12 +113,29 @@ Requirements of the cross-platform desktop application to run and operate correc SharpGrabber.Desktop Application +# Introducing BlackWidow +SharpGrabber + +BlackWidow executes scripts written specifically for grabbing, rather than relying on .NET assemblies. +- **Always Up-to-date:** The scripts are always kept up-to-date at runtime; so the functionality of the host application won't break as the sources change - at least not for long! +- **ECMAScript Support:** Supports JavaScript/ECMAScript out of the box. +- **Easy Maintenance:** *JavaScript* is darn easy to write and understand! This helps contributors to quickly write new grabbers or fix the existing ones. +- **Secure**: The scripts are executed in a sandbox environment, and they only have access to what the BlackWidow API exposes to them. +- **Highly Customizable:** Almost everything is open for extension or replacement. Make new script interpreters, custom grabber repositories, or roll out your own interpreter APIs +Read more + Documentation + ## Contribution You are most welcome to contribute! -- Support for more media providers such as *DailyMotion*, *Instagram*, *Facebook*, *Twitch* etc. +- Authentication mechanisms for grabbers e.g. Instagram Login +- Support for more media providers such as *DailyMotion*, *Facebook*, *Twitch* etc. - Accelerate downloads in the desktop app (like a download manager) +## Disclaimer +SharpGrabber library, BlackWidow and other projects and libraries provided in this repository are developed for educational purposes. +Since it's illegal to extract copyrighted data, you should make sure your usage of the tools provided here complies with copyright laws. +Contributors to these tools are not responsible for any copyright infringement that may occur per usage. + ## License Copyright © 2021 Javid Shoaei and other contributors
diff --git a/SharpGrabber.sln b/SharpGrabber.sln index 56a9024..f3cf591 100644 --- a/SharpGrabber.sln +++ b/SharpGrabber.sln @@ -24,6 +24,12 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SharpGrabber.Hls", "src\Sha EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SharpGrabber.Instagram", "src\SharpGrabber.Instagram\SharpGrabber.Instagram.csproj", "{094B729B-9871-4A2C-9228-9AAEE66F135D}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SharpGrabber.BlackWidow", "src\SharpGrabber.BlackWidow\SharpGrabber.BlackWidow.csproj", "{9F3A8C86-8F28-4F54-B8A6-DBB49DDB5171}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SharpGrabber.BlackWidow.Tests", "tests\SharpGrabber.BlackWidow.Tests\SharpGrabber.BlackWidow.Tests.csproj", "{4CB41014-D036-4090-B6FA-4CFB01D82C3A}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{ADFEEE61-D79B-4F91-A192-F6A2E949673C}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -62,10 +68,21 @@ Global {094B729B-9871-4A2C-9228-9AAEE66F135D}.Debug|Any CPU.Build.0 = Debug|Any CPU {094B729B-9871-4A2C-9228-9AAEE66F135D}.Release|Any CPU.ActiveCfg = Release|Any CPU {094B729B-9871-4A2C-9228-9AAEE66F135D}.Release|Any CPU.Build.0 = Release|Any CPU + {9F3A8C86-8F28-4F54-B8A6-DBB49DDB5171}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9F3A8C86-8F28-4F54-B8A6-DBB49DDB5171}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9F3A8C86-8F28-4F54-B8A6-DBB49DDB5171}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9F3A8C86-8F28-4F54-B8A6-DBB49DDB5171}.Release|Any CPU.Build.0 = Release|Any CPU + {4CB41014-D036-4090-B6FA-4CFB01D82C3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {4CB41014-D036-4090-B6FA-4CFB01D82C3A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {4CB41014-D036-4090-B6FA-4CFB01D82C3A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {4CB41014-D036-4090-B6FA-4CFB01D82C3A}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {4CB41014-D036-4090-B6FA-4CFB01D82C3A} = {ADFEEE61-D79B-4F91-A192-F6A2E949673C} + EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {0003E70E-C9A2-459C-A6A0-540449AC7A87} EndGlobalSection diff --git a/assets/blackwidow-logo-text-sm.png b/assets/blackwidow-logo-text-sm.png new file mode 100644 index 0000000..36c3c63 Binary files /dev/null and b/assets/blackwidow-logo-text-sm.png differ diff --git a/assets/blackwidow-logo-text.png b/assets/blackwidow-logo-text.png new file mode 100644 index 0000000..6be1754 Binary files /dev/null and b/assets/blackwidow-logo-text.png differ diff --git a/assets/blackwidow-logo-text.psd b/assets/blackwidow-logo-text.psd new file mode 100644 index 0000000..39ef85e Binary files /dev/null and b/assets/blackwidow-logo-text.psd differ diff --git a/assets/blackwidow-logo.png b/assets/blackwidow-logo.png new file mode 100644 index 0000000..b93e412 Binary files /dev/null and b/assets/blackwidow-logo.png differ diff --git a/assets/blackwidow-logo.psd b/assets/blackwidow-logo.psd new file mode 100644 index 0000000..69ef5da Binary files /dev/null and b/assets/blackwidow-logo.psd differ diff --git a/blackwidow/README.md b/blackwidow/README.md new file mode 100644 index 0000000..ebbfd9a --- /dev/null +++ b/blackwidow/README.md @@ -0,0 +1,27 @@ +SharpGrabber + +# BlackWidow + +BlackWidow is a .NET library based on SharpGrabber. Rather than relying on .NET assemblies, BlackWidow executes scripts written specifically for grabbing. + +## Why use BlackWidow? +BlackWidow gives you the following advantages over the traditional NuGet package approach: + +- **Always Up-to-date:** The scripts are always kept up-to-date at runtime; so the functionality of the host application won't break as the sources change - at least not for long! +- **ECMAScript Support:** Supports JavaScript/ECMAScript out of the box. +- **Easy Maintenance:** *JavaScript* is darn easy to write and understand! This helps contributors to quickly write new grabbers or fix the existing ones. +- **Secure**: The scripts are executed in a sandbox environment, and they only have access to what the BlackWidow API exposes to them. +- **Highly Customizable:** Almost everything is open for extension or replacement. Make new script interpreters, custom grabber repositories, or roll out your own interpreter APIs + +## How does it work? + +BlackWidow keeps a collection of scripts locally - called the local repository. +Each script gets interpreted as an object implementing `IGrabber`. +To keep the scripts up-to-date, a remote repository is constantly monitored as the single source of truth. + +*TODO:* Read the Documentation + +# Installation +*WIP* + +<- Back to Home Page diff --git a/blackwidow/repo/feed.json b/blackwidow/repo/feed.json new file mode 100644 index 0000000..4fdf90b --- /dev/null +++ b/blackwidow/repo/feed.json @@ -0,0 +1,22 @@ +{ + "scripts": [ + { + "id": "vimeo.com", + "name": "Vimeo", + "version": "1.0", + "type": "JavaScript", + "apiVersion": 1, + "supportedRegularExpressions": [ "^https?://(www\\.|player\\.)?vimeo\\.com/(video/)?([0-9]+)" ], + "file": "scripts/vimeo.js" + }, + { + "id": "pornhub.com", + "name": "PornHub", + "version": "1.0", + "type": "JavaScript", + "apiVersion": 1, + "supportedRegularExpressions": [ "^(https?:\\/\\/)?(www\\.)?pornhub\\.com\\/([^\\/]+)viewkey=(\\w+).*$" ], + "file": "scripts/pornhub.js" + } + ] +} \ No newline at end of file diff --git a/blackwidow/repo/scripts/pornhub.js b/blackwidow/repo/scripts/pornhub.js new file mode 100644 index 0000000..26b7ef1 --- /dev/null +++ b/blackwidow/repo/scripts/pornhub.js @@ -0,0 +1,115 @@ +const urlMatcher = /^(https?:\/\/)?(www\.)?pornhub\.com\/([^\/]+)viewkey=(\w+).*$/i +const flashVarsFinder = /^\s*(var|let)\s+(flashvars[\w_]+)\s+=/mi + +const getViewId = uri => { + const url = new URL(uri) + const match = urlMatcher.exec(uri) + if (!match) + return undefined + return match[4] +} + +const getStdUrl = url => { + return `https://www.pornhub.com/view_video.php?viewkey=${url}` +} + +const parseFlashVarsScript = doc => { + let source + let varName + doc.selectAll('script').forEach(elem => { + const match = flashVarsFinder.exec(elem.innerText) + if (match) { + source = elem.innerText + varName = match[2] + } + }) + + const flashVars = new Function('let playerObjList = {};'+source + ';return '+varName+';')() + if (!flashVars) + throw new GrabException('Could not extract flashVars.') + return flashVars +} + +const updateResult = (result, vars) => { + const parseBool = str => typeof str === 'boolean' ? str : new Function('return ' + str)(); + + if (parseBool(vars.video_unavailable)) + throw new GrabException('This video is unavailable.') + if (parseBool(vars.video_unavailable_country)) + throw new GrabException('This video is unavailable in your country.') + + const duration = vars.video_duration * 1000 // milliseconds + + result.title = vars.video_title + + result.grab('info', { + length: duration + }) + + result.grab('image', { + resourceUri: vars.image_url, + type: 'primary' + }) + + vars.mediaDefinitions.forEach(def => { + if (!def.quality || def.remote || !def.videoUrl) + return + + if (def.format === 'hls') { + // grab HLS stream + if (Array.isArray(def.quality)) { + result.grab('hlsStreamReference', { + resourceUri: def.videoUrl, + playlistType: 'master', + resolution: def.quality.join(',') + }) + } else { + result.grab('hlsStreamReference', { + resourceUri: def.videoUrl, + playlistType: 'stream', + resolution: def.quality + }) + } + } else { + // grab mp4 video + result.grab('media', { + resourceUri: def.videoUrl, + format: { + mime: 'video/mp4', + extension: 'mp4', + channels: 'both', + length: duration, + container: 'mp4,' + resolution: def.quality, + formatTitle: 'MP4 ' + def.quality, + } + }) + } + }) +} + +grabber.supports = uri => { + return getViewId(uri) !== undefined +} + +grabber.grab = (request, result) => { + + // init + const viewId = getViewId(request.url) + if (!viewId) + return false + + // download page + const url = getStdUrl(viewId) + const response = http.client.get({ + url + }) + response.assertSuccess() + + // parse response HTML + const doc = html.parse(response.bodyText) + const flashVars = parseFlashVarsScript(doc) + updateResult(result, flashVars) + + return true +} diff --git a/blackwidow/repo/scripts/vimeo.js b/blackwidow/repo/scripts/vimeo.js new file mode 100644 index 0000000..58664cb --- /dev/null +++ b/blackwidow/repo/scripts/vimeo.js @@ -0,0 +1,83 @@ +const urlRegex = /^https?:\/\/(www\.|player\.)?vimeo\.com\/(video\/)?([0-9]+)/ + +function getVideoId(url) { + const match = urlRegex.exec(url) + return match ? match[3] : undefined +} + +function getConfigUrl(videoId) { + return 'https://player.vimeo.com/video/{0}/config'.replace('{0}', videoId) +} + +function fetchConfig(videoId) { + const url = getConfigUrl(videoId) + const response = http.client.get({ + url, + expectText: true + }) + response.assertSuccess() + return JSON.parse(response.bodyText) +} + +function setGrabResult(result, config) { + if (!config.request.files) + throw new GrabException('Video is unavailable.') + + // add info + result.title = config.video.title + result.grab('info', { + author: config.video.owner?.name, + length: config.video.duration * 1000, + }) + + // add images + if (config.video.thumbs) { + for (var key in config.video.thumbs) { + const isBase = Number.isNaN(Number(key)) + const size = isBase ? undefined : { + width: key, + height: key * 0.5625 + }; + result.grab('image', { + resourceUri: config.video.thumbs[key], + type: isBase ? 'primary' : 'thumbnail', + size + }) + } + } + + // add media + config.request.files.progressive.forEach(file => { + const fileMime = file.mime || 'video/mp4' + const fileExt = mime.getExtension(fileMime) + const containerName = fileExt.toUpperCase() + result.grab('media', { + resourceUri: file.url, + channels: 'both', + container: containerName, + resolution: file.quality, + formatTitle: containerName + ' ' + file.quality, + pixelWidth: file.width, + pixelHeight: file.height, + format: { + mime: fileMime, + extension: fileExt + } + }) + }) +} + +grabber.supports = url => Boolean(getVideoId(url)) + +grabber.grab = (request, result) => { + const videoId = getVideoId(request.url) + if (!videoId) + return false + + const config = fetchConfig(videoId) + if (!config) + throw new GrabException('Failed to fetch video config.') + + setGrabResult(result, config) + return true +} \ No newline at end of file diff --git a/blackwidow/schema/feed.json b/blackwidow/schema/feed.json new file mode 100644 index 0000000..d1baca6 --- /dev/null +++ b/blackwidow/schema/feed.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft-04/schema", + "$id": "https://raw.githubusercontent.com/dotnettools/SharpGrabber/blackwidow/blackwidow/schema/feed.json", + "title": "Feed", + "description": "BlackWidow Grabber Repository Feed", + "type": "object", + "properties": { + "scripts": { + "description": "Array of scripts defined in this feed", + "type": "array", + "items": { + "description": "BlackWidow Grabber Repository Script", + "type": "object", + "properties": { + "id": { + "description": "The unique identifier for the script", + "type": "string" + }, + "name": { + "description": "A friendly name for the script", + "type": "string" + }, + "version": { + "description": "The semantic version of the script", + "type": "string", + "minLength": 5, + "maxLength": 14, + "pattern": "^(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)$" + }, + "type": { + "description": "Type of the script", + "type": "string", + "enum": [ "JavaScript" ] + }, + "apiVersion": { + "description": "BlackWidow API version", + "type": "integer", + "minimum": 1 + }, + "supportedRegularExpressions": { + "description": "Array of regular expressions the script can potentially support", + "type": "array", + "items": { + "type": "string" + } + }, + "file": { + "description": "Virtual path to the script file, relative to the feed", + "type": "string" + }, + }, + "required": [ "id", "name", "version", "type", "apiVersion", "supportedRegularExpressions", "file" ] + } + } + }, + "required": [ "scripts" ] +} \ No newline at end of file diff --git a/src/SharpGrabber.Adult/PornHubGrabber.cs b/src/SharpGrabber.Adult/PornHubGrabber.cs index 68ba74c..455b19d 100644 --- a/src/SharpGrabber.Adult/PornHubGrabber.cs +++ b/src/SharpGrabber.Adult/PornHubGrabber.cs @@ -123,7 +123,7 @@ protected virtual void Grab(GrabResult result, List resources, JObject if (options.Flags.HasFlag(GrabOptionFlags.GrabImages)) { var image_url = new Uri(result.OriginalUri, flashVars.SelectToken("$.image_url").Value()); - resources.Add(new GrabbedImage(GrabbedImageType.Primary, null, image_url)); + resources.Add(new GrabbedImage(GrabbedImageType.Primary, image_url)); } result.Title = flashVars.SelectToken("$.video_title").Value(); @@ -140,7 +140,7 @@ protected virtual void Grab(GrabResult result, List resources, JObject var url = quality.Value("url"); if (string.IsNullOrEmpty(url)) continue; - var vid = new GrabbedMedia(new Uri(result.OriginalUri, url), result.OriginalUri, DefaultMediaFormat, MediaChannels.Both); + var vid = new GrabbedMedia(new Uri(result.OriginalUri, url), DefaultMediaFormat, MediaChannels.Both); vid.Resolution = quality.Value("text"); var qint = StringHelper.ForceParseInt(vid.Resolution); grabbed.Add(qint, vid); @@ -166,7 +166,7 @@ protected virtual void Grab(GrabResult result, List resources, JObject switch (format.ToLowerInvariant()) { case "mp4": - var m = new GrabbedMedia(uri, result.OriginalUri, DefaultMediaFormat, MediaChannels.Both) + var m = new GrabbedMedia(uri, DefaultMediaFormat, MediaChannels.Both) { Resolution = resol, FormatTitle = $"MP4 {resol}", @@ -174,7 +174,7 @@ protected virtual void Grab(GrabResult result, List resources, JObject grabbed.Add(quality, m); break; case "hls": - var sr = new GrabbedStreamReference(uri, result.OriginalUri) + var sr = new GrabbedHlsStreamReference(uri) { Resolution = resol, PlaylistType = playlistType, diff --git a/src/SharpGrabber.Adult/XnxxGrabber.cs b/src/SharpGrabber.Adult/XnxxGrabber.cs index 5fd3f59..323e5bc 100644 --- a/src/SharpGrabber.Adult/XnxxGrabber.cs +++ b/src/SharpGrabber.Adult/XnxxGrabber.cs @@ -49,10 +49,10 @@ protected override async Task InternalGrabAsync(Uri uri, Cancellatio // grab images var img = (paramMap.GetOrDefault("image") ?? paramMap.GetOrDefault("ThumbUrl169") ?? paramMap.GetOrDefault("ThumbUrl")) as string; if (Uri.TryCreate(img, UriKind.Absolute, out var imgUri)) - resources.Add(new GrabbedImage(GrabbedImageType.Thumbnail, uri, imgUri)); + resources.Add(new GrabbedImage(GrabbedImageType.Thumbnail, imgUri)); img = (paramMap.GetOrDefault("ThumbSlideBig") ?? paramMap.GetOrDefault("ThumbSlide")) as string; if (Uri.TryCreate(img, UriKind.Absolute, out imgUri)) - resources.Add(new GrabbedImage(GrabbedImageType.Preview, uri, imgUri)); + resources.Add(new GrabbedImage(GrabbedImageType.Preview, imgUri)); // grab resources var hls = paramMap["VideoHLS"] as string; diff --git a/src/SharpGrabber.BlackWidow/BlackWidowConstants.cs b/src/SharpGrabber.BlackWidow/BlackWidowConstants.cs new file mode 100644 index 0000000..e6843dd --- /dev/null +++ b/src/SharpGrabber.BlackWidow/BlackWidowConstants.cs @@ -0,0 +1,38 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Defines BlackWidow-related constants. + /// + public static class BlackWidowConstants + { + public static class GitHub + { + public static class OfficialRepository + { + /// + /// The offical repository name + /// + public const string RepositoryAddress = "dotnettools/SharpGrabber"; + + /// + /// Name of the main branch + /// + public const string MasterBranch = "master"; + + /// + /// Path to the directory that contains the feed file and the scripts + /// + public const string RootPath = "blackwidow/repo"; + + /// + /// Name of the feed JSON file + /// + public const string FeedFileName = "feed.json"; + } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/BlackWidowInitializer.cs b/src/SharpGrabber.BlackWidow/BlackWidowInitializer.cs new file mode 100644 index 0000000..aaa85e3 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/BlackWidowInitializer.cs @@ -0,0 +1,32 @@ +using System; +using System.Collections.Generic; +using System.Reflection; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + internal static class BlackWidowInitializer + { + static BlackWidowInitializer() + { + EnsureLoaded(Hls.HlsGrabber.Initializer); + } + + public static void Test() + { + // nothing should be done here, + // the static constructor would run once. + } + + private static void EnsureLoaded(params Type[] types) + { + foreach (var type in types) + { + // create a dummy instance just to ensure the type is loaded + var o = Activator.CreateInstance(type); + if (o is IDisposable disposable) + disposable.Dispose(); + } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/BlackWidowService.cs b/src/SharpGrabber.BlackWidow/BlackWidowService.cs new file mode 100644 index 0000000..758f418 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/BlackWidowService.cs @@ -0,0 +1,289 @@ +using DotNetTools.SharpGrabber.BlackWidow.Exceptions; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter; +using DotNetTools.SharpGrabber.BlackWidow.Repository; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; +using DotNetTools.SharpGrabber.BlackWidow.Internal; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Default implementation for + /// + public class BlackWidowService : IBlackWidowService + { + private readonly ConcurrentDictionary _grabbers = + new(StringComparer.InvariantCultureIgnoreCase); + private readonly BlackWidowGrabber _grabber; + + private readonly IGrabberRepositoryChangeDetector _changeDetector; + private readonly ConcurrentHashSet _scriptsUsed = new(); + private readonly ConcurrentHashSet _scriptsUpdating = new(); + private IGrabberRepositoryFeed _localFeed; + private IGrabberRepositoryFeed _remoteFeed; + + protected BlackWidowService(IGrabberRepository localRepository, IGrabberRepository remoteRepository, + IGrabberServices grabberServices, + IScriptHost scriptHost, IGrabberScriptInterpreterService interpreterService, IGrabberRepositoryChangeDetector changeDetector) + { + _changeDetector = changeDetector; + Interpreters = interpreterService ?? throw new ArgumentNullException(nameof(interpreterService)); + LocalRepository = localRepository ?? throw new ArgumentNullException(nameof(localRepository)); + RemoteRepository = remoteRepository ?? throw new ArgumentNullException(nameof(remoteRepository)); + ScriptHost = scriptHost; + changeDetector.RepositoryChanged += ChangeDetector_RepositoryChanged; + _grabber = new BlackWidowGrabber(this, grabberServices ?? throw new ArgumentNullException(nameof(grabberServices))); + } + + public IScriptHost ScriptHost { get; } + + /// + /// Gets the interpreter service. + /// + public IGrabberScriptInterpreterService Interpreters { get; } + + public IGrabberRepository LocalRepository { get; } + + public IGrabberRepository RemoteRepository { get; } + + public IBlackWidowGrabber Grabber => _grabber; + + /// + /// Creates a new instance of . + /// + public static async Task CreateAsync(IGrabberRepository localRepository, + IGrabberRepository remoteRepository, + IGrabberServices grabberServices, + IScriptHost scriptHost, IGrabberScriptInterpreterService interpreterService = null, + IGrabberRepositoryChangeDetector changeDetector = null) + { + interpreterService ??= new GrabberScriptInterpreterService(); + changeDetector ??= new GrabberRepositoryChangeDetector(new[] { localRepository, remoteRepository }); + var service = new BlackWidowService(localRepository, remoteRepository, grabberServices, scriptHost, interpreterService, changeDetector); + await service.LoadLocalFeedAsync().ConfigureAwait(false); + return service; + } + + public IEnumerable GetLocalCandidates(Uri uri) + { + return _grabbers.Values.Where(g => g.Supports(uri)); + } + + public IGrabber GetLocalScript(string scriptId) + => _grabbers.GetOrDefault(scriptId); + + public IEnumerable GetRemoteCandidates(Uri uri) + { + if (_remoteFeed == null) + return Enumerable.Empty(); + + return _remoteFeed + .GetScripts() + .Where(s => s.IsMatch(uri)); + } + + public async Task GetScriptAsync(string scriptId) + { + // init + var localInfo = _localFeed.GetScript(scriptId); + var remoteInfo = _remoteFeed?.GetScript(scriptId); + if (localInfo == null) + { + await LoadLocalFeedAsync().ConfigureAwait(false); + localInfo = _localFeed.GetScript(scriptId); + } + + var updateNeeded = localInfo == null || + (remoteInfo != null && remoteInfo.GetVersion() > localInfo.GetVersion()); + + if (localInfo == null && remoteInfo == null) + return null; + + // fetch the script + if (updateNeeded) + { + var source = await RemoteRepository.FetchSourceAsync(remoteInfo).ConfigureAwait(false); + await LocalRepository.PutAsync(remoteInfo, source).ConfigureAwait(false); + _grabbers.TryRemove(scriptId, out _); + await LoadLocalFeedAsync().ConfigureAwait(false); + } + + // get local grabber + return _grabbers.GetOrDefault(scriptId); + } + + public async Task UpdateFeedAsync() + { + _remoteFeed = await RemoteRepository.GetFeedAsync().ConfigureAwait(false); + } + + public void Dispose() + { + _changeDetector?.Dispose(); + } + + private async Task LoadLocalFeedAsync() + { + _localFeed = await LocalRepository.GetFeedAsync().ConfigureAwait(false); + await LoadLocalGrabbers().ConfigureAwait(false); + } + + private async Task LoadLocalGrabbers() + { + foreach (var scriptInfo in _localFeed.GetScripts()) + { + if (_grabbers.ContainsKey(scriptInfo.Id)) + continue; + var scriptSource = await LocalRepository.FetchSourceAsync(scriptInfo).ConfigureAwait(false); + await LoadGrabberAsync(scriptInfo, scriptSource).ConfigureAwait(false); + } + } + + private async Task LoadGrabberAsync(IGrabberRepositoryScript scriptInfo, + IGrabberScriptSource scriptSource) + { + var interpreter = Interpreters.GetInterpreter(scriptInfo.Type); + if (interpreter == null) + throw new ScriptInterpretException($"No interpreter is registered for {scriptInfo.Type}."); + + var grabber = await interpreter.InterpretAsync(scriptInfo, scriptSource, scriptInfo.ApiVersion) + .ConfigureAwait(false); + _grabbers.TryAdd(scriptInfo.Id, grabber); + return grabber; + } + + private void ChangeDetector_RepositoryChanged(IGrabberRepository repository, IGrabberRepositoryFeed feed, IGrabberRepositoryFeed prevFeed) + { + if (repository != LocalRepository && repository != RemoteRepository) + return; + + var isLocal = LocalRepository == repository; + if (isLocal) + _localFeed = feed; + else + _remoteFeed = feed; + _ = UpdateGrabbersAsync(_scriptsUsed); + } + + private async Task UpdateGrabbersAsync(IEnumerable ids) + { + if (_remoteFeed == null) + await UpdateFeedAsync().ConfigureAwait(false); + + var localFeed = _localFeed; + var remoteFeed = _remoteFeed; + if (localFeed == null || remoteFeed == null) + return false; + + var idSet = new HashSet(ids, StringComparer.InvariantCultureIgnoreCase); + + var localScripts = localFeed.GetScripts() + .Where(s => idSet.Contains(s.Id)) + .ToDictionary(s => s.Id); + var remoteScripts = remoteFeed.GetScripts() + .Where(s => idSet.Contains(s.Id)); + + // compare scripts + var updateTasks = new List>(); + foreach (var remoteScript in remoteScripts) + { + var localScript = localScripts[remoteScript.Id]; + if (localScript != null && remoteScript.GetVersion() <= localScript.GetVersion()) + continue; + var task = UpdateGrabberAsync(remoteScript.Id); + updateTasks.Add(task); + } + await Task.WhenAll(updateTasks).ConfigureAwait(false); + var anyUpdates = updateTasks.Any(t => t.Result); + + if (anyUpdates) + { + await LoadLocalGrabbers().ConfigureAwait(false); + } + return anyUpdates; + } + + private async Task UpdateGrabberAsync(string id) + { + // get current records + var localScript = _localFeed?.GetScript(id); + var remoteScript = _remoteFeed?.GetScript(id); + if (remoteScript == null) + return false; + if (localScript != null && localScript.GetVersion() >= remoteScript.GetVersion()) + return false; + + if (!_scriptsUpdating.Add(id)) + return false; + + try + { + // update script + await GetScriptAsync(id); + } + finally + { + _scriptsUpdating.Remove(id); + } + return true; + } + + private sealed class BlackWidowGrabber : GrabberBase, IBlackWidowGrabber + { + private readonly BlackWidowService _service; + + public BlackWidowGrabber(BlackWidowService service, IGrabberServices services) : base(services) + { + _service = service; + } + + public override string StringId => "BlackWidow"; + + public override string Name => "BlackWidow"; + + public override GrabOptions DefaultGrabOptions { get; } = new GrabOptions(GrabOptionFlags.All); + + public IEnumerable GetScriptGrabbers() + { + return _service._grabbers.Values.AsEnumerable(); + } + + public override bool Supports(Uri uri) + { + return new[] { _service._localFeed, _service._remoteFeed } + .Any(feed => feed?.GetScripts().Any(s => s.IsMatch(uri)) ?? false); + } + + protected override async Task InternalGrabAsync(Uri uri, CancellationToken cancellationToken, GrabOptions options, IProgress progress) + { + Dictionary GetGrabbers() + => _service._grabbers + .Where(g => g.Value.Supports(uri)) + .ToDictionary(g => g.Key, g => g.Value); + + var grabbers = GetGrabbers(); + if (await _service.UpdateGrabbersAsync(grabbers.Keys).ConfigureAwait(false)) + { + grabbers = GetGrabbers(); + } + + foreach (var grabber in grabbers) + { + _service._scriptsUsed.Add(grabber.Key); + var result = await grabber.Value.GrabAsync(uri, cancellationToken, options, progress).ConfigureAwait(false); + if (result != null) + return result; + } + return null; + } + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Builder/BlackWidowBuilder.cs b/src/SharpGrabber.BlackWidow/Builder/BlackWidowBuilder.cs new file mode 100644 index 0000000..3df3f2c --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/BlackWidowBuilder.cs @@ -0,0 +1,103 @@ +using System; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter; +using DotNetTools.SharpGrabber.BlackWidow.Repository; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Build a BlackWidow service. + /// + public sealed class BlackWidowBuilder : IBlackWidowBuilder + { + private IGrabberRepository _localRepository; + private IGrabberRepository _remoteRepository; + private IGrabberServices _grabberServices; + private IGrabberScriptInterpreterService _interpreterService; + private IScriptHost _scriptHost; + private IGrabberRepositoryChangeDetector _changeDetector; + + private BlackWidowBuilder() { } + + /// + /// Creates a new . + /// + public static BlackWidowBuilder New() + => new(); + + public async Task BuildAsync() + { + if (_localRepository == null) + throw new InvalidOperationException("Local repository is unspecified."); + if (_remoteRepository == null) + throw new InvalidOperationException("Remote repository is unspecified."); + var changeDetector = _changeDetector ?? new GrabberRepositoryChangeDetector(new[] { _localRepository, _remoteRepository }); + if (_interpreterService == null) + SetDefaultInterpreterService(); + var grabberServices = _grabberServices ?? GrabberServices.Default; + + var service = await BlackWidowService.CreateAsync(_localRepository, _remoteRepository, + grabberServices ?? throw new InvalidOperationException("Grabber services instance is unspecified."), + _scriptHost ?? new ScriptHost(), + _interpreterService, changeDetector).ConfigureAwait(false); + return service; + } + + public IBlackWidowBuilder ConfigureLocalRepository(Action configurator) + { + var cfg = new BlackWidowRepositoryConfigurator(); + configurator(cfg); + _localRepository = cfg.Repository ?? throw new InvalidOperationException("No"); + return this; + } + + public IBlackWidowBuilder ConfigureRemoteRepository(Action configurator) + { + var cfg = new BlackWidowRepositoryConfigurator(); + configurator(cfg); + _remoteRepository = cfg.Repository ?? throw new InvalidOperationException("No"); + return this; + } + + public IBlackWidowBuilder SetChangeDetector(IGrabberRepositoryChangeDetector changeDetector) + { + _changeDetector = changeDetector; + return this; + } + + public IBlackWidowBuilder SetScriptHost(IScriptHost scriptHost) + { + _scriptHost = scriptHost; + return this; + } + + public IBlackWidowBuilder UseInterpreterService(IGrabberScriptInterpreterService interpreterService) + { + _interpreterService = interpreterService; + return this; + } + + public IBlackWidowBuilder SetGrabberServices(IGrabberServices grabberServices) + { + _grabberServices = grabberServices; + return this; + } + + public IBlackWidowBuilder ConfigureInterpreterService(Action configure) + { + var configurator = new GrabberScriptInterpreterServiceConfigurator() + .UseScriptHost(_scriptHost); + configure(configurator); + + var interpreterService = configurator.Build(); + return UseInterpreterService(interpreterService); + } + + private void SetDefaultInterpreterService() + { + var service = new GrabberScriptInterpreterService(); + ConfigureInterpreterService(cfg => cfg.AddJint()); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/BlackWidowRepositoryConfigurator.cs b/src/SharpGrabber.BlackWidow/Builder/BlackWidowRepositoryConfigurator.cs new file mode 100644 index 0000000..17a7cda --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/BlackWidowRepositoryConfigurator.cs @@ -0,0 +1,19 @@ +using DotNetTools.SharpGrabber.BlackWidow.Repository; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Builds BlackWidow repositories. + /// + internal class BlackWidowRepositoryConfigurator : IBlackWidowRepositoryConfigurator + { + public IGrabberRepository Repository { get; private set; } + + public IBlackWidowRepositoryConfigurator Use(IGrabberRepository repository) + { + Repository?.Dispose(); + Repository = repository; + return this; + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/BuilderExtensions.cs b/src/SharpGrabber.BlackWidow/Builder/BuilderExtensions.cs new file mode 100644 index 0000000..70d66e1 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/BuilderExtensions.cs @@ -0,0 +1,31 @@ +using DotNetTools.SharpGrabber.BlackWidow.Definitions; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.JavaScript; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Defines extension methods for builder and configurator interfaces to work with built-in implementations. + /// + public static class BuilderExtensions + { + /// + /// Registers Jint as the JavaScript interpreter. + /// + public static IGrabberScriptInterpreterServiceConfigurator AddJint(this IGrabberScriptInterpreterServiceConfigurator interpreterService) + { + return interpreterService.AddInterpreter(GrabberScriptType.JavaScript, context => + { + return new JintJavaScriptInterpreter(context.ApiService, context.GrabberServices, context.ScriptHost); + }); + } + + /// + /// Configures to use the official API service. + /// + public static IGrabberScriptInterpreterServiceConfigurator SetDefaultApiService(this IGrabberScriptInterpreterServiceConfigurator interpreterService) + { + return interpreterService.SetApiService(context => new DefaultInterpreterApiService(context.GrabberServices, context.GrabbedTypes, context.TypeConverter)); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterActivationContext.cs b/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterActivationContext.cs new file mode 100644 index 0000000..5551b15 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterActivationContext.cs @@ -0,0 +1,34 @@ +using System; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Provides references to services used when activating an interpreter. + /// + public class GrabberScriptInterpreterActivationContext + { + public GrabberScriptInterpreterActivationContext(IInterpreterApiService apiService, IGrabberServices grabberServices, IScriptHost scripHost) + { + ApiService = apiService ?? throw new ArgumentNullException(nameof(apiService)); + GrabberServices = grabberServices ?? throw new ArgumentNullException(nameof(grabberServices)); + ScriptHost = scripHost ?? throw new ArgumentNullException(nameof(scripHost)); + } + + /// + /// Gets the interpreter API service. + /// + public IInterpreterApiService ApiService { get; } + + /// + /// Gets the grabber services. + /// + public IGrabberServices GrabberServices { get; } + + /// + /// Gets the script host. + /// + public IScriptHost ScriptHost { get; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterApiServiceActivationContext.cs b/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterApiServiceActivationContext.cs new file mode 100644 index 0000000..aa2956e --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterApiServiceActivationContext.cs @@ -0,0 +1,41 @@ +using System; +using DotNetTools.ConvertEx; +using DotNetTools.SharpGrabber.BlackWidow.Host; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Provides references to services used when activating an interpreter API service. + /// + public class GrabberScriptInterpreterApiServiceActivationContext + { + public GrabberScriptInterpreterApiServiceActivationContext(IGrabberServices grabberServices, IScriptHost scriptHost, + IGrabbedTypeCollection grabbedTypes, ITypeConverter typeConverter) + { + GrabberServices = grabberServices ?? throw new ArgumentNullException(nameof(grabberServices)); + ScriptHost = scriptHost ?? throw new ArgumentNullException(nameof(scriptHost)); + GrabbedTypes = grabbedTypes ?? throw new ArgumentNullException(nameof(grabbedTypes)); + TypeConverter = typeConverter ?? throw new ArgumentNullException(nameof(typeConverter)); + } + + /// + /// Gets the grabber services. + /// + public IGrabberServices GrabberServices { get; } + + /// + /// Gets the script host. + /// + public IScriptHost ScriptHost { get; } + + /// + /// Gets the collection of grabbed types. + /// + public IGrabbedTypeCollection GrabbedTypes { get; } + + /// + /// Gets the type converter. + /// + public ITypeConverter TypeConverter { get; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterServiceConfigurator.cs b/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterServiceConfigurator.cs new file mode 100644 index 0000000..4585ef1 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/GrabberScriptInterpreterServiceConfigurator.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using DotNetTools.ConvertEx; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api; +using DotNetTools.SharpGrabber.BlackWidow.TypeConversion; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Builds . + /// + internal class GrabberScriptInterpreterServiceConfigurator : IGrabberScriptInterpreterServiceConfigurator + { + private readonly Dictionary> _interpreterFactories = new(); + private Func _apiServiceFactory; + private IGrabberServices _grabberServices; + private IScriptHost _scriptHost; + private IGrabbedTypeCollection _grabbedTypeCollection; + private ITypeConverter _typeConverter; + + public IGrabberScriptInterpreterServiceConfigurator UseGrabberServices(IGrabberServices grabberServices) + { + _grabberServices = grabberServices; + return this; + } + + public IGrabberScriptInterpreterServiceConfigurator UseScriptHost(IScriptHost scriptHost) + { + _scriptHost = scriptHost; + return this; + } + + public IGrabberScriptInterpreterServiceConfigurator UseGrabbedTypeCollection(IGrabbedTypeCollection grabbedTypeCollection) + { + _grabbedTypeCollection = grabbedTypeCollection; + return this; + } + + public IGrabberScriptInterpreterServiceConfigurator UseTypeConverter(ITypeConverter typeConverter) + { + _typeConverter = typeConverter; + return this; + } + + public IGrabberScriptInterpreterServiceConfigurator SetApiService(Func apiServiceFactory) + { + _apiServiceFactory = apiServiceFactory; + return this; + } + + public IGrabberScriptInterpreterServiceConfigurator AddInterpreter(GrabberScriptType scriptType, + Func interpreterFactory) + { + _interpreterFactories[scriptType] = interpreterFactory; + return this; + } + + public IGrabberScriptInterpreterService Build() + { + if (_apiServiceFactory == null) + this.SetDefaultApiService(); + if (_apiServiceFactory == null) + throw new InvalidOperationException("Interpreter API service is unspecified."); + + var grabberServies = _grabberServices ?? GrabberServices.Default; + var scriptHost = _scriptHost ?? new ScriptHost(); + var grabbedTypeCollection = _grabbedTypeCollection ?? new GrabbedTypeCollection(); + var typeConverter = _typeConverter ?? TypeConverters.Default; + var apiServiceContext = new GrabberScriptInterpreterApiServiceActivationContext(grabberServies, scriptHost, grabbedTypeCollection, typeConverter); + var apiService = _apiServiceFactory.Invoke(apiServiceContext); + var interpreterContext = new GrabberScriptInterpreterActivationContext(apiService, grabberServies, scriptHost); + + var service = new GrabberScriptInterpreterService(); + foreach (var pair in _interpreterFactories) + { + var interpreter = pair.Value(interpreterContext); + service.Register(pair.Key, interpreter); + } + return service; + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/IBlackWidowBuilder.cs b/src/SharpGrabber.BlackWidow/Builder/IBlackWidowBuilder.cs new file mode 100644 index 0000000..e8dbb4e --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/IBlackWidowBuilder.cs @@ -0,0 +1,55 @@ +using System; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter; +using DotNetTools.SharpGrabber.BlackWidow.Repository; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Builds a . + /// + public interface IBlackWidowBuilder + { + /// + /// Configures the local repository. + /// + IBlackWidowBuilder ConfigureLocalRepository(Action configure); + + /// + /// Configures the remote repository. + /// + IBlackWidowBuilder ConfigureRemoteRepository(Action configure); + + /// + /// Sets the grabber services. + /// + IBlackWidowBuilder SetGrabberServices(IGrabberServices grabberServices); + + /// + /// Sets the script host. + /// + IBlackWidowBuilder SetScriptHost(IScriptHost scriptHost); + + /// + /// Sets the change detector. + /// + IBlackWidowBuilder SetChangeDetector(IGrabberRepositoryChangeDetector changeDetector); + + /// + /// Sets to be used. + /// + IBlackWidowBuilder UseInterpreterService(IGrabberScriptInterpreterService interpreterService); + + /// + /// Configures the interpreter service. + /// + IBlackWidowBuilder ConfigureInterpreterService(Action configure); + + /// + /// Builds the service. + /// + /// Thrown in case of misconfiguration. + Task BuildAsync(); + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/IBlackWidowRepositoryConfigurator.cs b/src/SharpGrabber.BlackWidow/Builder/IBlackWidowRepositoryConfigurator.cs new file mode 100644 index 0000000..d030e38 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/IBlackWidowRepositoryConfigurator.cs @@ -0,0 +1,20 @@ +using DotNetTools.SharpGrabber.BlackWidow.Repository; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Configures a repository on a builder. + /// + public interface IBlackWidowRepositoryConfigurator + { + /// + /// Gets the configured repository. + /// + IGrabberRepository Repository { get; } + + /// + /// Uses a repository instance. + /// + IBlackWidowRepositoryConfigurator Use(IGrabberRepository repository); + } +} diff --git a/src/SharpGrabber.BlackWidow/Builder/IGrabberScriptInterpreterServiceConfigurator.cs b/src/SharpGrabber.BlackWidow/Builder/IGrabberScriptInterpreterServiceConfigurator.cs new file mode 100644 index 0000000..0d507b5 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Builder/IGrabberScriptInterpreterServiceConfigurator.cs @@ -0,0 +1,52 @@ +using System; +using DotNetTools.ConvertEx; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Configures a for a BlackWidow builder. + /// + public interface IGrabberScriptInterpreterServiceConfigurator + { + /// + /// Configures the builder to use . + /// + IGrabberScriptInterpreterServiceConfigurator UseGrabberServices(IGrabberServices grabberServices); + + /// + /// Configures the builder to use . + /// + IGrabberScriptInterpreterServiceConfigurator UseScriptHost(IScriptHost scriptHost); + + /// + /// Configures the builder to use . + /// + IGrabberScriptInterpreterServiceConfigurator UseGrabbedTypeCollection(IGrabbedTypeCollection grabbedTypeCollection); + + /// + /// Configures the builder to use . + /// + IGrabberScriptInterpreterServiceConfigurator UseTypeConverter(ITypeConverter typeConverter); + + /// + /// Sets an interpreter API service factory. + /// + IGrabberScriptInterpreterServiceConfigurator SetApiService(Func apiServiceFactory); + + /// + /// Registers an interpreter factory. + /// + IGrabberScriptInterpreterServiceConfigurator AddInterpreter(GrabberScriptType scriptType, + Func interpreterFactory); + + /// + /// Builds a configured instance of . + /// + /// Thrown in case of missing information. + IGrabberScriptInterpreterService Build(); + } +} diff --git a/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptSource.cs b/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptSource.cs new file mode 100644 index 0000000..5312d29 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptSource.cs @@ -0,0 +1,38 @@ +using System.IO; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Definitions +{ + /// + /// Default implementation for + /// + public class GrabberScriptSource : IGrabberScriptSource + { + /// + /// Refers to a static empty source. + /// + public static readonly GrabberScriptSource Empty = new(string.Empty); + + private readonly string _source; + + public GrabberScriptSource(string source) + { + _source = source; + } + + /// + /// Creates a by reading all the source code from a file. + /// + public static GrabberScriptSource FromFile(string fileName) + { + var src = File.ReadAllText(fileName); + return new GrabberScriptSource(src); + } + + public string GetSource() + => _source; + + public Task GetSourceAsync() + => Task.FromResult(_source); + } +} diff --git a/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptType.cs b/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptType.cs new file mode 100644 index 0000000..3f2e4e4 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptType.cs @@ -0,0 +1,33 @@ +using System.Linq; +using System.Reflection; + +namespace DotNetTools.SharpGrabber.BlackWidow.Definitions +{ + /// + /// Defines all possible script types. + /// + public enum GrabberScriptType + { + /// + /// ECMAScript + /// + [GrabberScriptType(FileExtension = "js")] + JavaScript = 1, + } + + public static class GrabberScriptTypeExtensions + { + /// + /// Gets the associated with the value. + /// + public static GrabberScriptTypeAttribute GetScriptTypeAttribute(this GrabberScriptType value, bool orDefault = true) + { + GrabberScriptTypeAttribute GetDefault() + => orDefault ? GrabberScriptTypeAttribute.Default : null; + + var enumType = typeof(GrabberScriptType); + var member = enumType.GetMember(value.ToString()).FirstOrDefault(m => m.DeclaringType == enumType); + return member.GetCustomAttribute() ?? GetDefault(); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptTypeAttribute.cs b/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptTypeAttribute.cs new file mode 100644 index 0000000..b71c7a9 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Definitions/GrabberScriptTypeAttribute.cs @@ -0,0 +1,18 @@ +using System; + +namespace DotNetTools.SharpGrabber.BlackWidow.Definitions +{ + [AttributeUsage(AttributeTargets.Field)] + public class GrabberScriptTypeAttribute : Attribute + { + /// + /// Gets the default value. + /// + public static GrabberScriptTypeAttribute Default => new(); + + /// + /// Gets or sets the file extension associated with this script type. + /// + public string FileExtension { get; set; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Definitions/IGrabberScriptSource.cs b/src/SharpGrabber.BlackWidow/Definitions/IGrabberScriptSource.cs new file mode 100644 index 0000000..033f676 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Definitions/IGrabberScriptSource.cs @@ -0,0 +1,20 @@ +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Definitions +{ + /// + /// Provides access to the source of a grabber script. + /// + public interface IGrabberScriptSource + { + /// + /// Gets the source code of the grabber script. + /// + string GetSource(); + + /// + /// Gets the source code of the grabber script. + /// + Task GetSourceAsync(); + } +} diff --git a/src/SharpGrabber.BlackWidow/Exceptions/BlackWidowException.cs b/src/SharpGrabber.BlackWidow/Exceptions/BlackWidowException.cs new file mode 100644 index 0000000..e9f46c6 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Exceptions/BlackWidowException.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Exceptions +{ + public class BlackWidowException : Exception + { + public BlackWidowException() + { + } + + public BlackWidowException(string message) : base(message) + { + } + + public BlackWidowException(string message, Exception innerException) : base(message, innerException) + { + } + + protected BlackWidowException(SerializationInfo info, StreamingContext context) : base(info, context) + { + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Exceptions/ScriptApiVersionMismatchException.cs b/src/SharpGrabber.BlackWidow/Exceptions/ScriptApiVersionMismatchException.cs new file mode 100644 index 0000000..a113468 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Exceptions/ScriptApiVersionMismatchException.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Exceptions +{ + public class ScriptApiVersionMismatchException : BlackWidowException + { + public ScriptApiVersionMismatchException() + { + } + + public ScriptApiVersionMismatchException(string message) : base(message) + { + } + + public ScriptApiVersionMismatchException(string message, Exception innerException) : base(message, innerException) + { + } + + protected ScriptApiVersionMismatchException(SerializationInfo info, StreamingContext context) : base(info, context) + { + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Exceptions/ScriptInterpretException.cs b/src/SharpGrabber.BlackWidow/Exceptions/ScriptInterpretException.cs new file mode 100644 index 0000000..e5f74f6 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Exceptions/ScriptInterpretException.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Exceptions +{ + public class ScriptInterpretException : BlackWidowException + { + public ScriptInterpretException() : this("Script interpret error.") + { + } + + public ScriptInterpretException(string message) : base(message) + { + } + + public ScriptInterpretException(string message, Exception innerException) : base(message, innerException) + { + } + + protected ScriptInterpretException(SerializationInfo info, StreamingContext context) : base(info, context) + { + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Host/ConsoleLog.cs b/src/SharpGrabber.BlackWidow/Host/ConsoleLog.cs new file mode 100644 index 0000000..53ae0c1 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Host/ConsoleLog.cs @@ -0,0 +1,28 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Host +{ + /// + /// Describes a log entry. + /// + public class ConsoleLog + { + public ConsoleLog(ConsoleLogLevel level, params object[] objects) + { + Level = level; + Objects = objects; + } + + /// + /// Gets the level. + /// + public ConsoleLogLevel Level { get; } + + /// + /// Gets the logged objects. + /// + public object[] Objects { get; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Host/ConsoleLogLevel.cs b/src/SharpGrabber.BlackWidow/Host/ConsoleLogLevel.cs new file mode 100644 index 0000000..3a1a478 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Host/ConsoleLogLevel.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Host +{ + public enum ConsoleLogLevel + { + Log, + Debug, + Error, + Info, + Warning, + Trace + } +} diff --git a/src/SharpGrabber.BlackWidow/Host/IScriptHost.cs b/src/SharpGrabber.BlackWidow/Host/IScriptHost.cs new file mode 100644 index 0000000..a695a35 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Host/IScriptHost.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Host +{ + /// + /// Defines handlers for various operations on the script host. + /// + public interface IScriptHost + { + void Alert(object input); + + void Log(ConsoleLog log); + } +} diff --git a/src/SharpGrabber.BlackWidow/Host/ScriptHost.cs b/src/SharpGrabber.BlackWidow/Host/ScriptHost.cs new file mode 100644 index 0000000..ccb6403 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Host/ScriptHost.cs @@ -0,0 +1,29 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Host +{ + /// + /// Implements with events. + /// + public class ScriptHost : IScriptHost + { + public ScriptHost() + { + } + + public event Action OnAlert; + public event Action OnLog; + + public void Alert(object input) + { + OnAlert?.Invoke(input); + } + + public void Log(ConsoleLog log) + { + OnLog?.Invoke(log); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/IBlackWidowGrabber.cs b/src/SharpGrabber.BlackWidow/IBlackWidowGrabber.cs new file mode 100644 index 0000000..1d3167b --- /dev/null +++ b/src/SharpGrabber.BlackWidow/IBlackWidowGrabber.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Represents a BlackWidow grabber. + /// + public interface IBlackWidowGrabber : IGrabber + { + /// + /// Enumerates internal grabbers, each representing a single grabber script. + /// + IEnumerable GetScriptGrabbers(); + } +} diff --git a/src/SharpGrabber.BlackWidow/IBlackWidowService.cs b/src/SharpGrabber.BlackWidow/IBlackWidowService.cs new file mode 100644 index 0000000..c545087 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/IBlackWidowService.cs @@ -0,0 +1,62 @@ +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Repository; +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow +{ + /// + /// Manages a local grabber repository and keeps it constantly up-to-date. + /// + public interface IBlackWidowService : IDisposable + { + /// + /// Gets the dynamic grabber that wraps the internal grabbers of the BlackWidow service. + /// + IBlackWidowGrabber Grabber { get; } + + /// + /// Gets the script host. + /// + IScriptHost ScriptHost { get; } + + /// + /// Gets the local grabber repository. + /// + IGrabberRepository LocalRepository { get; } + + /// + /// Gets the remote grabber repository. + /// + IGrabberRepository RemoteRepository { get; } + + /// + /// Updates feed from the remote repository. + /// + Task UpdateFeedAsync(); + + /// + /// Enumerates local grabbers that might support grabbing from . + /// + IEnumerable GetLocalCandidates(Uri uri); + + /// + /// Tries to find a local grabber with . + /// + IGrabber GetLocalScript(string scriptId); + + /// + /// Gets a list of candidate grabber scripts for on the remote repository. + /// + IEnumerable GetRemoteCandidates(Uri uri); + + /// + /// Gets the grabber associated with the script if the latest version of the script with the specified + /// is available locally. + /// Otherwise, it updates the local repository to contain the latest version of the script. + /// + Task GetScriptAsync(string scriptId); + } +} diff --git a/src/SharpGrabber.BlackWidow/Internal/ConcurrentHashSet.cs b/src/SharpGrabber.BlackWidow/Internal/ConcurrentHashSet.cs new file mode 100644 index 0000000..ff2798f --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Internal/ConcurrentHashSet.cs @@ -0,0 +1,99 @@ +using System; +using System.Collections; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Security.Cryptography; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Internal +{ + internal class ConcurrentHashSet : ISet + { + private readonly ConcurrentDictionary _dic = new(); + + public int Count => _dic.Count; + + public bool IsReadOnly => (_dic as IDictionary).IsReadOnly; + + public bool Add(T item) + => _dic.TryAdd(item, false); + + public void Clear() + => _dic.Clear(); + + public bool Contains(T item) + => _dic.ContainsKey(item); + + public void CopyTo(T[] array, int arrayIndex) + => _dic.Keys.CopyTo(array, arrayIndex); + + public void ExceptWith(IEnumerable other) + { + foreach (var item in other) + _dic.TryRemove(item, out _); + } + + public IEnumerator GetEnumerator() + => _dic.Keys.GetEnumerator(); + + public void IntersectWith(IEnumerable other) + { + var otherSet = other as ISet ?? new HashSet(other); + foreach (var item in _dic.Keys.Where(k => !otherSet.Contains(k))) + _dic.TryRemove(item, out _); + } + + public bool IsProperSubsetOf(IEnumerable other) + { + var otherSet = new HashSet(other); + return otherSet.Count > Count && !_dic.Keys.Any(k => !otherSet.Contains(k)); + } + + public bool IsProperSupersetOf(IEnumerable other) + { + return Count > other.Count() && !other.Any(k => !_dic.ContainsKey(k)); + } + + public bool IsSubsetOf(IEnumerable other) + { + var otherSet = new HashSet(other); + return otherSet.Count >= Count && !_dic.Keys.Any(k => !otherSet.Contains(k)); + } + + public bool IsSupersetOf(IEnumerable other) + { + return !other.Any(k => !_dic.ContainsKey(k)); + } + + public bool Overlaps(IEnumerable other) + { + return other.Any(item => _dic.ContainsKey(item)); + } + + public bool Remove(T item) + => _dic.TryRemove(item, out _); + + public bool SetEquals(IEnumerable other) + => other.Count() == Count && other.All(item => _dic.ContainsKey(item)); + + public void SymmetricExceptWith(IEnumerable other) + { + foreach (var item in other) + if (!_dic.TryRemove(item, out _)) + _dic.TryAdd(item, false); + } + + public void UnionWith(IEnumerable other) + { + foreach (var item in other) + _dic.TryAdd(item, false); + } + + void ICollection.Add(T item) + => _dic.TryAdd(item, false); + + IEnumerator IEnumerable.GetEnumerator() + => _dic.Keys.GetEnumerator(); + } +} diff --git a/src/SharpGrabber.BlackWidow/Internal/EqualityUtils.cs b/src/SharpGrabber.BlackWidow/Internal/EqualityUtils.cs new file mode 100644 index 0000000..f15d559 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Internal/EqualityUtils.cs @@ -0,0 +1,40 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Internal +{ + internal static class EqualityUtils + { + public static bool Equals(object object1, object object2, params Func[] getValues) + where T : class + { + // test references + if (ReferenceEquals(object1, object2)) + return true; + + // test null values + if (object1 == null || object2 == null) + return false; + + // test different types + var o1 = object1 as T; + var o2 = object2 as T; + if (o1 != null ^ o2 != null) + return false; + if (o1 == null && o2 == null) + throw new ArgumentException($"Invalid type argument: {typeof(T)}", nameof(T)); + + foreach (var getValue in getValues) + { + var val1 = getValue(o1); + var val2 = getValue(o2); + var areEqual = val1 == null ? val2 == null : val1.Equals(val2); + if (!areEqual) + return false; + } + + return true; + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Internal/HashCodeUtils.cs b/src/SharpGrabber.BlackWidow/Internal/HashCodeUtils.cs new file mode 100644 index 0000000..2238364 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Internal/HashCodeUtils.cs @@ -0,0 +1,28 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Internal +{ + internal static class HashCodeUtils + { + public static int Compute(params object[] values) + { + return ComputeCustom(values); + } + + private static int ComputeCustom(object[] values) + { + unchecked + { + var hash = 17; + foreach (var value in values) + { + var ohash = value?.GetHashCode() ?? 0; + hash = hash * 23 + ohash; + } + return hash; + } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Internal/StringExtensions.cs b/src/SharpGrabber.BlackWidow/Internal/StringExtensions.cs new file mode 100644 index 0000000..8d871bf --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Internal/StringExtensions.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Internal +{ + internal static class StringExtension + { + public static string ToCamelCase(this string str) + { + if (string.IsNullOrEmpty(str)) + return str; + + if (str.Length == 1) + return str.ToLowerInvariant(); + + return char.ToLowerInvariant(str[0]) + str.Substring(1); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Internal/TypeExtensions.cs b/src/SharpGrabber.BlackWidow/Internal/TypeExtensions.cs new file mode 100644 index 0000000..28f4c99 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Internal/TypeExtensions.cs @@ -0,0 +1,30 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Internal +{ + internal static class TypeExtensions + { + public static bool IsNumericType(this object o) + { + switch (Type.GetTypeCode(o.GetType())) + { + case TypeCode.Byte: + case TypeCode.SByte: + case TypeCode.UInt16: + case TypeCode.UInt32: + case TypeCode.UInt64: + case TypeCode.Int16: + case TypeCode.Int32: + case TypeCode.Int64: + case TypeCode.Decimal: + case TypeCode.Double: + case TypeCode.Single: + return true; + default: + return false; + } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/DefaultInterpreterApiService.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/DefaultInterpreterApiService.cs new file mode 100644 index 0000000..9fa4337 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/DefaultInterpreterApiService.cs @@ -0,0 +1,107 @@ +using DotNetTools.SharpGrabber.BlackWidow.Exceptions; +using DotNetTools.SharpGrabber.Exceptions; +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.ConvertEx; +using DotNetTools.SharpGrabber.BlackWidow.TypeConversion; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api +{ + /// + /// Offical implementation for + /// + public class DefaultInterpreterApiService : IInterpreterApiService + { + private readonly IGrabberServices _grabberServices; + private readonly IGrabbedTypeCollection _grabbedTypeCollection; + private readonly ITypeConverter _typeConverter; + + public DefaultInterpreterApiService(IGrabberServices grabberServices, + IGrabbedTypeCollection grabbedTypeCollection, + ITypeConverter typeConverter) + { + _grabberServices = grabberServices; + _grabbedTypeCollection = grabbedTypeCollection; + _typeConverter = typeConverter; + } + + public object GetHostObject(int apiVersion, IGrabberServices grabberServices) + { + if (apiVersion <= 0) + throw new ArgumentOutOfRangeException(nameof(apiVersion)); + return apiVersion switch + { + 1 => new v1.ApiHostObject(grabberServices), + _ => throw new ScriptApiVersionMismatchException( + $"This script requires API version {apiVersion}; which is not supported."), + }; + } + + public ITypeConverter GetTypeConverter(int apiVersion) + { + if (apiVersion <= 0) + throw new ArgumentOutOfRangeException(nameof(apiVersion)); + return apiVersion switch + { + _ => TypeConverters.Default, + }; + } + + public ProcessedGrabScript ProcessResult(int apiVersion, object hostObject) + { + if (apiVersion <= 0) + throw new ArgumentOutOfRangeException(nameof(apiVersion)); + return apiVersion switch + { + 1 => ProcessV1((v1.ApiHostObject)hostObject), + _ => throw new ScriptApiVersionMismatchException( + $"This script requires API version {apiVersion}; which is not supported."), + }; + } + + private ProcessedGrabScript ProcessV1(v1.ApiHostObject hostObject) + { + if (hostObject.Grabber.Supports == null) + throw new ScriptInterpretException($"The {nameof(hostObject.Grabber.Supports)} function is not set."); + if (hostObject.Grabber.Grab == null) + throw new ScriptInterpretException($"The {nameof(hostObject.Grabber.Grab)} function is not set."); + + bool supports(Uri uri) + { + return hostObject.Grabber.Supports(uri?.ToString()); + } + + async Task grab(Uri uri, CancellationToken cancellationToken, GrabOptions options, + IProgress progress) + { + var grabbedList = new List(); + var result = new GrabResult(uri, grabbedList); + + var request = new v1.ApiGrabRequest(uri, cancellationToken, options, progress); + var response = new v1.ApiGrabResponse(result, grabbedList, _grabberServices, _grabbedTypeCollection, + _typeConverter); + + if (hostObject.Grabber.GrabAsync != null) + // invoke GrabAsync + await hostObject.Grabber.GrabAsync(request, response); + else + { + // invoke Grab + var success = await Task.Run(() => hostObject.Grabber.Grab(request, response), cancellationToken); + // var success = hostObject.Grabber.Grab(request, response); + if (success != true) + { + return null; + } + } + + return result; + } + + return new ProcessedGrabScript(supports, grab); + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/Delegates.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/Delegates.cs new file mode 100644 index 0000000..8690095 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/Delegates.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; +using System.Threading; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api +{ + public delegate bool SupportsDelegate(Uri uri); + + public delegate Task GrabDelegate(Uri uri, CancellationToken cancellationToken, GrabOptions options, IProgress progress); +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/IInterpreterApiService.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/IInterpreterApiService.cs new file mode 100644 index 0000000..1aa8aa5 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/IInterpreterApiService.cs @@ -0,0 +1,28 @@ +using DotNetTools.ConvertEx; +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api +{ + /// + /// Interpreter API service + /// + public interface IInterpreterApiService + { + /// + /// Gets the host object, which will be exposed to the script. + /// + object GetHostObject(int apiVersion, IGrabberServices grabberServices); + + /// + /// Processes the result of the call by processing the . + /// + ProcessedGrabScript ProcessResult(int apiVersion, object hostObject); + + /// + /// Gets the proper type converter that should be used for the specified API version. + /// + ITypeConverter GetTypeConverter(int apiVersion); + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/ProcessedGrabScript.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/ProcessedGrabScript.cs new file mode 100644 index 0000000..8556ca4 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/ProcessedGrabScript.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api +{ + public class ProcessedGrabScript + { + public ProcessedGrabScript(SupportsDelegate supports, GrabDelegate grabAsync) + { + Supports = supports; + GrabAsync = grabAsync; + } + + public SupportsDelegate Supports { get; } + + public GrabDelegate GrabAsync { get; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabRequest.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabRequest.cs new file mode 100644 index 0000000..bc3ced7 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabRequest.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1 +{ + public class ApiGrabRequest + { + private readonly Uri _uri; + private readonly CancellationToken _cancellationToken; + private readonly GrabOptions _options; + private readonly IProgress _progress; + private double _progressValue; + + public ApiGrabRequest(Uri uri, CancellationToken cancellationToken, GrabOptions options, IProgress progress) + { + _uri = uri; + _cancellationToken = cancellationToken; + _options = options; + _progress = progress; + } + + public string Url => _uri.ToString(); + + public GrabOptions Options => _options; + + public bool IsCanceled => _cancellationToken.IsCancellationRequested; + + public double Progress + { + get => _progressValue; + set => _progress.Report(_progressValue = value); + } + + public void TestCanceled() + => _cancellationToken.ThrowIfCancellationRequested(); + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabResponse.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabResponse.cs new file mode 100644 index 0000000..4ae3b4b --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabResponse.cs @@ -0,0 +1,100 @@ +using System; +using System.Collections.Generic; +using System.Dynamic; +using System.Reflection; +using System.Text; +using DotNetTools.ConvertEx; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1 +{ + public class ApiGrabResponse + { + private readonly GrabResult _grabResult; + private readonly IGrabberServices _grabberServices; + private readonly IGrabbedTypeCollection _grabbedTypeCollection; + private readonly ITypeConverter _typeConverter; + private readonly ICollection _grabbedCollection; + + public ApiGrabResponse(GrabResult grabResult, ICollection grabbedCollection, + IGrabberServices grabberServices, + IGrabbedTypeCollection grabbedTypeCollection, ITypeConverter typeConverter) + { + _grabResult = grabResult; + _grabberServices = grabberServices; + _typeConverter = typeConverter; + _grabbedTypeCollection = grabbedTypeCollection; + _grabbedCollection = grabbedCollection; + } + + public string Title + { + get => _grabResult.Title; + set => _grabResult.Title = value; + } + + public string Description + { + get => _grabResult.Description; + set => _grabResult.Description = value; + } + + public DateTime? CreationDate + { + get => _grabResult.CreationDate; + set => _grabResult.CreationDate = value; + } + + public bool IsSecure + { + get => _grabResult.IsSecure; + set => _grabResult.IsSecure = value; + } + + public void Grab(string grabbedTypeId, IDictionary values) + { + var grabbedType = _grabbedTypeCollection.GetGrabbed(grabbedTypeId); + if (grabbedType == null) + throw new NotSupportedException($"Grabbed type '{grabbedTypeId}' is not registered."); + + var grabbed = (IGrabbed) Activator.CreateInstance(grabbedType); + SetProperties(grabbed, values); + _grabbedCollection.Add(grabbed); + } + + private void SetProperties(object obj, IDictionary values) + { + if (obj == null) + return; + + var type = obj.GetType(); + foreach (var pair in values) + { + if (pair.Value == null) + continue; + + var prop = type.GetProperty(pair.Key, + BindingFlags.Public | BindingFlags.Instance | BindingFlags.IgnoreCase); + if (prop == null) + continue; + + if (pair.Value is IDictionary map) + { + if (map.Count == 0) + return; + var innerObject = prop.GetValue(obj); + if (innerObject == null) + { + innerObject = Activator.CreateInstance(prop.PropertyType); + prop.SetValue(obj, innerObject); + } + + SetProperties(innerObject, map); + return; + } + + var value = _typeConverter.Convert(pair.Value, prop.PropertyType); + prop.SetValue(obj, value); + } + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabberContext.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabberContext.cs new file mode 100644 index 0000000..29477d2 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiGrabberContext.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1 +{ + public class ApiGrabberContext + { + public GrabberSupportsDelegate Supports { get; set; } + + public GrabberGrabDelegate Grab { get; set; } + + public GrabberGrabAsyncDelegate GrabAsync { get; set; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiHostObject.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiHostObject.cs new file mode 100644 index 0000000..f93a106 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiHostObject.cs @@ -0,0 +1,26 @@ +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Html; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Http; +using DotNetTools.SharpGrabber.Exceptions; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1 +{ + public class ApiHostObject + { + public ApiHostObject(IGrabberServices grabberServices) + { + Http = new ApiHttpContext(grabberServices); + Mime = new ApiMimeContext(grabberServices.Mime); + } + + public ApiGrabberContext Grabber { get; } = new ApiGrabberContext(); + + public ApiHttpContext Http { get; } + + public ApiHtmlContext Html { get; } = new ApiHtmlContext(); + + public ApiMimeContext Mime { get; } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiMimeContext.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiMimeContext.cs new file mode 100644 index 0000000..5c5bbd4 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/ApiMimeContext.cs @@ -0,0 +1,19 @@ +using System; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1 +{ + public class ApiMimeContext + { + private readonly IMimeService _mimeService; + + public ApiMimeContext(IMimeService mimeService) + { + _mimeService = mimeService; + } + + public string GetExtension(string mime) + { + return _mimeService.ExtractMimeExtension(mime); + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Delegates.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Delegates.cs new file mode 100644 index 0000000..a6cb12f --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Delegates.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1 +{ + public delegate bool GrabberSupportsDelegate(string url); + + public delegate bool GrabberGrabDelegate(ApiGrabRequest request, ApiGrabResponse response); + + public delegate Task GrabberGrabAsyncDelegate(ApiGrabRequest request, ApiGrabResponse response); +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Html/ApiHtmlContext.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Html/ApiHtmlContext.cs new file mode 100644 index 0000000..92140b6 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Html/ApiHtmlContext.cs @@ -0,0 +1,17 @@ +using AngleSharp.Html.Parser; +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Html +{ + public class ApiHtmlContext + { + public ApiHtmlElement Parse(string source) + { + var parser = new HtmlParser(); + var doc = parser.ParseDocument(source); + return new ApiHtmlElement(doc.DocumentElement); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Html/ApiHtmlElement.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Html/ApiHtmlElement.cs new file mode 100644 index 0000000..1eda586 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Html/ApiHtmlElement.cs @@ -0,0 +1,66 @@ +using AngleSharp.Common; +using AngleSharp.Dom; +using AngleSharp.Html.Dom; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Html +{ + public class ApiHtmlElement + { + private readonly IElement _element; + private Dictionary _attributes; + + public ApiHtmlElement(IElement element) + { + _element = element; + } + + public string TagName => _element.TagName; + + public string InnerHTML => _element.InnerHtml; + + public string OuterHTML => _element.OuterHtml; + + public string InnerText => _element.TextContent; + + public int ChildrenCount => _element.Children.Length; + + public IDictionary Attributes + { + get + { + if (_attributes != null) + return _attributes; + + _attributes = _element.Attributes.AsEnumerable().ToDictionary(k => k.Name, k => k.Value); + return _attributes; + } + } + + public string GetAttribute(string name) + { + return Attributes.GetOrDefault(name); + } + + public ApiHtmlElement ChildAt(int index) + { + return new ApiHtmlElement(_element.Children[index]); + } + + public ApiHtmlElement Select(string cssSelector) + { + return new ApiHtmlElement(_element.QuerySelector(cssSelector)); + } + + public ApiHtmlElement[] SelectAll(string cssSelector) + { + return _element + .QuerySelectorAll(cssSelector) + .Select(n => new ApiHtmlElement(n)) + .ToArray(); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpClient.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpClient.cs new file mode 100644 index 0000000..5ff2734 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpClient.cs @@ -0,0 +1,95 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Http +{ + public class ApiHttpClient + { + private HttpClient _client; + + public ApiHttpClient(HttpClient client) + { + _client = client; + } + + public async Task SendAsync(ApiHttpRequest request) + { + using var message = await CreateRequestMessageAsync(request).ConfigureAwait(false); + using var response = await _client.SendAsync(message).ConfigureAwait(false); + return await ProcessResponseAsync(response, request).ConfigureAwait(false); + } + + public Task GetAsync(ApiHttpRequest request) + { + request.Method = HttpMethod.Get.Method; + return SendAsync(request); + } + + public Task HeadAsync(ApiHttpRequest request) + { + request.Method = HttpMethod.Head.Method; + return SendAsync(request); + } + + public Task PostAsync(ApiHttpRequest request) + { + request.Method = HttpMethod.Post.Method; + return SendAsync(request); + } + + public ApiHttpResponse Send(ApiHttpRequest request) + => SendAsync(request).GetAwaiter().GetResult(); + + public ApiHttpResponse Get(ApiHttpRequest request) + => GetAsync(request).GetAwaiter().GetResult(); + + public ApiHttpResponse Head(ApiHttpRequest request) + => HeadAsync(request).GetAwaiter().GetResult(); + + public ApiHttpResponse Post(ApiHttpRequest request) + => PostAsync(request).GetAwaiter().GetResult(); + + private async Task CreateRequestMessageAsync(ApiHttpRequest request) + { + var message = new HttpRequestMessage(new HttpMethod(request.Method), request.Url); + + foreach (var header in request.Headers) + message.Headers.Add(header.Key, header.Value); + + if (!string.IsNullOrEmpty(request.BodyText)) + { + using var requestStream = await message.Content.ReadAsStreamAsync().ConfigureAwait(false); + using var writer = new StreamWriter(requestStream); + writer.Write(request.BodyText); + } + + return message; + } + + private async Task ProcessResponseAsync(HttpResponseMessage response, + ApiHttpRequest request) + { + var contentType = response.Content.Headers.GetValues("Content-Type")?.FirstOrDefault(); + + string bodyText = null; + if (response.IsSuccessStatusCode) + { + var expectText = request.ExpectText; + if (!expectText) + expectText = !string.IsNullOrEmpty(contentType) && + contentType.StartsWith("text/", StringComparison.InvariantCultureIgnoreCase); + if (expectText) + { + bodyText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + } + } + + return new ApiHttpResponse(response, bodyText); + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpContext.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpContext.cs new file mode 100644 index 0000000..21c6f21 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpContext.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Http +{ + public class ApiHttpContext + { + private IGrabberServices _grabberServices; + + public ApiHttpContext(IGrabberServices grabberServices) + { + _grabberServices = grabberServices; + var client = _grabberServices.GetClient(); + Client = new ApiHttpClient(client); + } + + public ApiHttpClient Client { get; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpRequest.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpRequest.cs new file mode 100644 index 0000000..4aa3789 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpRequest.cs @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Http +{ + public class ApiHttpRequest + { + public string Url { get; set; } + + public string Method { get; set; } + + public string BodyText { get; set; } + + public bool ExpectText { get; set; } + + public IDictionary> Headers { get; set; } = new Dictionary>(); + + public ApiHttpRequest AddHeader(string name, string value) + { + if (!Headers.TryGetValue(name, out var list)) + Headers.Add(name, list = new List()); + list.Add(value); + return this; + } + + public ApiHttpRequest SetHeader(string name, string value) + { + if (!Headers.TryGetValue(name, out var list)) + Headers.Add(name, list = new List()); + list.Clear(); + list.Add(value); + return this; + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpResponse.cs b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpResponse.cs new file mode 100644 index 0000000..70c3d90 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/Api/v1/Http/ApiHttpResponse.cs @@ -0,0 +1,38 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api.v1.Http +{ + public class ApiHttpResponse + { + public ApiHttpResponse(HttpResponseMessage response, string bodyText) + { + Success = response.IsSuccessStatusCode; + StatusCode = (int)response.StatusCode; + StatusText = response.ReasonPhrase; + Headers = new Dictionary(); + foreach (var header in response.Headers) + Headers[header.Key] = header.Value.ToArray(); + BodyText = bodyText; + } + + public int StatusCode { get; } + + public string StatusText { get; } + + public IDictionary Headers { get; } + + public string BodyText { get; } + + public bool Success { get; } + + public void AssertSuccess() + { + if (!Success) + throw new HttpRequestException($"The status code {StatusCode} does not indicate success."); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/GrabberScriptInterpretOptions.cs b/src/SharpGrabber.BlackWidow/Interpreter/GrabberScriptInterpretOptions.cs new file mode 100644 index 0000000..f263ac8 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/GrabberScriptInterpretOptions.cs @@ -0,0 +1,22 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter +{ + /// + /// Defines different options for . + /// + public struct GrabberScriptInterpretOptions + { + /// + /// Gets the default instance of options. + /// + public static readonly GrabberScriptInterpretOptions Default = new(); + + /// + /// Gets or sets the additional data exposed to the script, besides the host object. + /// + public IDictionary ExposedData { get; set; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/GrabberScriptInterpreterService.cs b/src/SharpGrabber.BlackWidow/Interpreter/GrabberScriptInterpreterService.cs new file mode 100644 index 0000000..9a73043 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/GrabberScriptInterpreterService.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Text; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter +{ + /// + /// Default implementation for + /// + public class GrabberScriptInterpreterService : IGrabberScriptInterpreterService + { + private readonly Dictionary _interpreters = new(); + + public void Register(GrabberScriptType scriptType, IGrabberScriptInterpreter interpreter) + { + _interpreters[scriptType] = interpreter; + } + + public IGrabberScriptInterpreter GetInterpreter(GrabberScriptType scriptType) + { + return _interpreters.GetOrDefault(scriptType); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/IGrabberScriptInterpreter.cs b/src/SharpGrabber.BlackWidow/Interpreter/IGrabberScriptInterpreter.cs new file mode 100644 index 0000000..4cbb0b7 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/IGrabberScriptInterpreter.cs @@ -0,0 +1,43 @@ +using DotNetTools.SharpGrabber.BlackWidow.Repository; +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter +{ + /// + /// Interprets a script as a grabber. + /// + public interface IGrabberScriptInterpreter + { + /// + /// Loads the specified script source and interprets it as a grabber. + /// + Task InterpretAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, int apiVersion, + GrabberScriptInterpretOptions options, CancellationToken cancellationToken); + } + + public static class GrabberScriptInterpreterExtensions + { + /// + /// Loads the specified script source and interprets it as a grabber. + /// + public static Task InterpretAsync(this IGrabberScriptInterpreter interpreter, IGrabberRepositoryScript script, + IGrabberScriptSource source, int apiVersion, GrabberScriptInterpretOptions options) + { + return interpreter.InterpretAsync(script, source, apiVersion, options, CancellationToken.None); + } + + /// + /// Loads the specified script source and interprets it as a grabber. + /// + public static Task InterpretAsync(this IGrabberScriptInterpreter interpreter, IGrabberRepositoryScript script, + IGrabberScriptSource source, int apiVersion) + { + return interpreter.InterpretAsync(script, source, apiVersion, GrabberScriptInterpretOptions.Default); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/IGrabberScriptInterpreterService.cs b/src/SharpGrabber.BlackWidow/Interpreter/IGrabberScriptInterpreterService.cs new file mode 100644 index 0000000..86f2374 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/IGrabberScriptInterpreterService.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter +{ + /// + /// Manages grabber script interpreters. + /// + public interface IGrabberScriptInterpreterService + { + /// + /// Registers for . + /// + void Register(GrabberScriptType scriptType, IGrabberScriptInterpreter interpreter); + + /// + /// Gets the proper interpreter for , if available. + /// + IGrabberScriptInterpreter GetInterpreter(GrabberScriptType scriptType); + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/Conversion/JintConvertExTypeConverter.cs b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/Conversion/JintConvertExTypeConverter.cs new file mode 100644 index 0000000..d105aa0 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/Conversion/JintConvertExTypeConverter.cs @@ -0,0 +1,27 @@ +using DotNetTools.ConvertEx; +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.JavaScript +{ + internal class JintConvertExTypeConverter : Jint.Runtime.Interop.ITypeConverter + { + private readonly ITypeConverter _converter; + + public JintConvertExTypeConverter(ITypeConverter converter) + { + _converter = converter; + } + + public object Convert(object value, Type type, IFormatProvider formatProvider) + { + return _converter.Convert(value, type, formatProvider); + } + + public bool TryConvert(object value, Type type, IFormatProvider formatProvider, out object converted) + { + return _converter.TryConvert(value, type, formatProvider, out converted); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/Conversion/JintMultiTypeConverter.cs b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/Conversion/JintMultiTypeConverter.cs new file mode 100644 index 0000000..3688d0b --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/Conversion/JintMultiTypeConverter.cs @@ -0,0 +1,203 @@ +using Jint; +using Jint.Native; +using Jint.Runtime.Interop; +using Newtonsoft.Json.Linq; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Reflection.Emit; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.JavaScript +{ + /// + /// Handles type conversion for Jint. + /// + public class JintMultiTypeConverter : ITypeConverter + { + private readonly Engine _engine; + private readonly IList _converters; + + protected JintMultiTypeConverter(Engine engine, + Func> getConverters) + { + _engine = engine; + _converters = getConverters?.Invoke(this)?.ToArray() ?? + throw new ArgumentNullException(nameof(getConverters)); + } + + /// + /// Creates an instance of with the specified . + /// + public static JintMultiTypeConverter Create(Engine engine, IEnumerable converters) + { + return new JintMultiTypeConverter(engine, _ => converters); + } + + /// + /// Creates an instance of with the built-in stack, with + /// the option to add arbitrary converters. + /// + public static JintMultiTypeConverter CreateDefault(Engine engine, + IEnumerable arbitraryConverters = null) + { + IEnumerable GetConverters(JintMultiTypeConverter self) + { + var conversionManager = + new ConvertEx.TypeConverter((ConvertEx.TypeConverter) ConvertEx.ConvertEx.DefaultConverter); + var builtin = new BuiltInTypeConverter(self); + conversionManager.AddDigester(builtin); + conversionManager.AddConverter(builtin); + + var converters = new List() + { + new JintConvertExTypeConverter(conversionManager), + new DefaultTypeConverter(engine), + }; + if (arbitraryConverters != null) + converters.InsertRange(0, arbitraryConverters.Select(c => new JintConvertExTypeConverter(c))); + return converters; + } + + return new JintMultiTypeConverter(engine, GetConverters); + } + + public object Convert(object value, Type type, IFormatProvider formatProvider) + { + if (TryConvert(value, type, formatProvider, out var result)) + return result; + throw new InvalidCastException($"Could not convert value of type {value?.GetType()} to {type}"); + } + + public bool TryConvert(object value, Type type, IFormatProvider formatProvider, out object converted) + { + foreach (var converter in _converters) + if (converter.TryConvert(value, type, formatProvider, out converted)) + return true; + converted = null; + return false; + } + + private sealed class DelegateTarget + { + private readonly JintMultiTypeConverter _self; + private readonly Delegate _delegate; + private readonly Type[] _targetTypes; + private readonly Type _returnType; + + public DelegateTarget(JintMultiTypeConverter self, Delegate @delegate, MethodInfo targetMethod) + { + _self = self; + _delegate = @delegate; + _returnType = targetMethod.ReturnType; + _targetTypes = @delegate.Method.GetParameters().Select(p => p.ParameterType).ToArray(); + } + + public object InvokeDelegate(params object[] args) + { + var convertedArgs = new object[args.Length]; + for (var iparam = 0; iparam < _targetTypes.Length; iparam++) + { + var type = _targetTypes[iparam]; + var value = args[iparam]; + var convertedArg = _self.Convert(value, type, null); + convertedArgs[iparam] = convertedArg; + } + + var result = _delegate.DynamicInvoke(convertedArgs); + var convertedResult = _self.Convert(result, _returnType, null); + return convertedResult; + } + } + + private sealed class BuiltInTypeConverter : ConvertEx.TypeConverterBase, ConvertEx.ITypeDigester + { + private readonly JintMultiTypeConverter _self; + + public BuiltInTypeConverter(JintMultiTypeConverter self) + { + _self = self; + } + + private Engine _engine => _self._engine; + + public IEnumerable Offer(Type valueType, Type targetType) + { + if (valueType.IsSubclassOf(typeof(JsValue)) && !targetType.IsSubclassOf(typeof(JsValue))) + { + return new[] {typeof(object)}; + } + + return Array.Empty(); + } + + public override bool TryConvert(object value, Type type, IFormatProvider formatProvider, + out object converted) + { + var valueType = value.GetType(); + if (type.IsSubclassOf(typeof(Delegate))) + { + converted = ConvertDelegate((Delegate) value, type); + return true; + } + + if (type == typeof(JsValue)) + { + converted = JsValue.FromObject(_engine, value); + return true; + } + + if (type == typeof(JsValue[])) + { + converted = new[] {JsValue.FromObject(_engine, value)}; + return true; + } + + if (valueType.IsSubclassOf(typeof(JsValue)) && !type.IsSubclassOf(typeof(JsValue))) + { + converted = (value as JsValue).ToObject(); + return true; + } + + converted = null; + return false; + } + + private Delegate ConvertDelegate(Delegate value, Type targetType) + { + var targetMethod = targetType.GetMethod("Invoke"); + var parameters = targetMethod.GetParameters(); + var paramTypes = new Type[] {typeof(DelegateTarget)} + .Concat(parameters.Select(p => p.ParameterType)) + .ToArray(); + + var newMethod = new DynamicMethod("InvokeConverted", targetMethod.ReturnType, paramTypes, + typeof(DelegateTarget)); + var dynamicInvoker = typeof(DelegateTarget).GetMethod(nameof(DelegateTarget.InvokeDelegate), + BindingFlags.Instance | BindingFlags.Public) ?? + throw new Exception("Could not find the invocation middleware method."); + + var il = newMethod.GetILGenerator(); + il.Emit(OpCodes.Ldarg_0); + il.Emit(OpCodes.Ldc_I4, parameters.Length); + il.Emit(OpCodes.Newarr, typeof(object)); + var paramIndex = 0; + foreach (var paramInfo in parameters) + { + il.Emit(OpCodes.Dup); + il.Emit(OpCodes.Ldc_I4, paramIndex); + il.Emit(OpCodes.Ldarg, paramIndex + 1); + il.Emit(OpCodes.Stelem_Ref); + paramIndex++; + } + + il.EmitCall(OpCodes.Callvirt, dynamicInvoker, null); + il.Emit(OpCodes.Ret); + + var target = new DelegateTarget(_self, value, targetMethod); + return newMethod.CreateDelegate(targetType, target); + } + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintInterpreterServiceExtensions.cs b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintInterpreterServiceExtensions.cs new file mode 100644 index 0000000..125d92b --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintInterpreterServiceExtensions.cs @@ -0,0 +1,24 @@ +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api; +using System; +using System.Collections.Generic; +using System.Text; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.JavaScript +{ + /// + /// Provides extension methods for . + /// + public static class JintInterpreterServiceExtensions + { + /// + /// Registers Jint as the JavaScript interpreter. + /// + public static void RegisterJint(this IGrabberScriptInterpreterService interpreterService, IInterpreterApiService apiService, + IGrabberServices grabberServices, IScriptHost scriptHost) + { + interpreterService.Register(GrabberScriptType.JavaScript, new JintJavaScriptInterpreter(apiService, grabberServices, scriptHost)); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintJavaScriptHost.cs b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintJavaScriptHost.cs new file mode 100644 index 0000000..f4597a2 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintJavaScriptHost.cs @@ -0,0 +1,117 @@ +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.Exceptions; +using Jint; +using Jint.Native; +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.JavaScript +{ + [System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "")] + public class JintJavaScriptHost + { + private readonly Engine _engine; + private readonly IScriptHost _host; + + public JintJavaScriptHost(Engine engine, Host.IScriptHost scriptHost) + { + _engine = engine; + _host = scriptHost; + } + + public void Apply(Engine engine) + { + engine.SetValue(new JsString("GrabException"), typeof(GrabException)); + + engine.SetValue(new JsString("alert"), (Action)_host.Alert); + engine.SetValue(new JsString("console"), new ConsoleContext(this)); + engine.SetValue(new JsString("URL"), typeof(URL)); + } + + #region console + private class ConsoleContext + { + private IScriptHost _host; + + public ConsoleContext(JintJavaScriptHost self) + { + _host = self._host; + } + + public void log(params object[] args) + { + _host.Log(new ConsoleLog(ConsoleLogLevel.Log, args)); + } + + public void debug(params object[] args) + { + _host.Log(new ConsoleLog(ConsoleLogLevel.Debug, args)); + } + + public void error(params object[] args) + { + _host.Log(new ConsoleLog(ConsoleLogLevel.Error, args)); + } + + public void info(params object[] args) + { + _host.Log(new ConsoleLog(ConsoleLogLevel.Info, args)); + } + + public void trace(params object[] args) + { + _host.Log(new ConsoleLog(ConsoleLogLevel.Trace, args)); + } + + public void warning(params object[] args) + { + _host.Log(new ConsoleLog(ConsoleLogLevel.Warning, args)); + } + } + #endregion + + #region URL + private class URL + { + public URL(string url, string @base) + { + Uri uri; + if (string.IsNullOrEmpty(@base)) + uri = new(url); + else + uri = new(new Uri(@base), url); + hash = uri.Fragment; + host = uri.IsDefaultPort ? uri.Host : $"{uri.Host}:{uri.Port}"; + hostname = uri.Host; + href = uri.ToString(); + origin = $"{uri.Scheme}://{host}"; + pathname = uri.LocalPath; + port = uri.Port.ToString(); + protocol = uri.Scheme + ':'; + search = uri.Query; + if (!string.IsNullOrEmpty(uri.UserInfo)) + { + var userPass = uri.UserInfo.Split(new[] { ':' }, 2); + username = userPass[0]; + password = userPass[1]; + } + } + + public URL(string url) : this(url, null) { } + + public string hash { get; } + public string host { get; } + public string hostname { get; } + public string href { get; } + public string origin { get; } + public string username { get; } + public string password { get; } + public string pathname { get; } + public string port { get; } + public string protocol { get; } + public string search { get; } + } + #endregion + } +} diff --git a/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintJavaScriptInterpreter.cs b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintJavaScriptInterpreter.cs new file mode 100644 index 0000000..e533b53 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Interpreter/JavaScript/JintJavaScriptInterpreter.cs @@ -0,0 +1,172 @@ +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; +using DotNetTools.SharpGrabber.BlackWidow.Host; +using DotNetTools.SharpGrabber.BlackWidow.Internal; +using DotNetTools.SharpGrabber.BlackWidow.Interpreter.Api; +using DotNetTools.SharpGrabber.BlackWidow.Repository; +using Jint; +using Jint.Native; +using Jint.Runtime.Interop; + +namespace DotNetTools.SharpGrabber.BlackWidow.Interpreter.JavaScript +{ + /// + /// Defines a grabber script interpreter that internally uses Jint. + /// + public class JintJavaScriptInterpreter : IGrabberScriptInterpreter + { + private readonly IInterpreterApiService _interpreterApiService; + private readonly IGrabberServices _grabberServices; + private readonly IScriptHost _scriptHost; + + public JintJavaScriptInterpreter(IInterpreterApiService interpreterApiService, IGrabberServices grabberServices, + IScriptHost host) + { + BlackWidowInitializer.Test(); + _interpreterApiService = interpreterApiService; + _grabberServices = grabberServices; + _scriptHost = host; + } + + /// + /// Gets or sets the name of the main function. + /// + public string MainFunctionName { get; set; } = "main"; + + /// + /// Gets or sets the memory limit for script. A value of 0 represents no limit. Default value is 100 MiB. + /// + public long MemoryLimit { get; set; } = 104_857_600; + + /// + /// Gets or sets the maximum allowed time for the script to execute. represents no limit. + /// Default value is 30 seconds. + /// + public TimeSpan ExecutionTimeout { get; set; } = TimeSpan.FromSeconds(30); + + /// + /// Gets or sets the call recursion limit. A value of 0 represents no limit. Default value is 256. + /// + public int RecursionLimit { get; set; } = 256; + + /// + /// Clears execution limits. + /// + public void SetNoLimits() + { + MemoryLimit = 0; + ExecutionTimeout = TimeSpan.Zero; + RecursionLimit = 0; + } + + public async Task InterpretAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, + int apiVersion, + GrabberScriptInterpretOptions options, CancellationToken cancellationToken) + { + if (script == null) + throw new ArgumentNullException(nameof(script)); + if (source == null) + throw new ArgumentNullException(nameof(source)); + + var engine = CreateEngine(apiVersion, cancellationToken); + var scriptSource = await source.GetSourceAsync().ConfigureAwait(false); + + var hostObject = _interpreterApiService.GetHostObject(apiVersion, _grabberServices); + DefineHostObjectOnScript(engine, hostObject); + DefineAdditionalExposedData(engine, options.ExposedData); + engine.Execute(scriptSource); + + var processedScript = _interpreterApiService.ProcessResult(apiVersion, hostObject); + + return new JintGrabber(engine, processedScript, script.Name, _grabberServices); + } + + /// + /// Configures Jint engine options. + /// + protected virtual void ConfigureEngine(Options options, int apiVersion, CancellationToken cancellationToken) + { + options.SetTypeConverter(engine => ConfigureTypeConverter(engine, apiVersion)); + options.CancellationToken(cancellationToken); + if (MemoryLimit > 0) + options.LimitMemory(MemoryLimit); + if (ExecutionTimeout > TimeSpan.Zero) + options.TimeoutInterval(ExecutionTimeout); + if (RecursionLimit > 0) + options.LimitRecursion(RecursionLimit); + } + + protected virtual Jint.Runtime.Interop.ITypeConverter ConfigureTypeConverter(Engine engine, int apiVersion) + { + // var converter = _interpreterApiService.GetTypeConverter(apiVersion); + // var multiTypeConverter = + // JintMultiTypeConverter.CreateDefault(engine, new ConvertEx.ITypeConverter[] {converter}); + // return multiTypeConverter; + return new DefaultTypeConverter(engine); + } + + private static void DefineAdditionalExposedData(Engine engine, + IEnumerable> exposedData) + { + if (exposedData == null) + return; + + foreach (var exposedPair in exposedData) + { + engine.SetValue(exposedPair.Key, exposedPair.Value); + } + } + + private Jint.Engine CreateEngine(int apiVersion, CancellationToken cancellationToken) + { + var engine = new Engine((engine, options) => ConfigureEngine(options, apiVersion, cancellationToken)); + var host = new JintJavaScriptHost(engine, _scriptHost); + host.Apply(engine); + return engine; + } + + private void DefineHostObjectOnScript(Jint.Engine engine, object hostObject) + { + foreach (var property in hostObject.GetType().GetProperties()) + { + var val = property.GetValue(hostObject); + engine.SetValue(new JsString(property.Name.ToCamelCase()), val); + } + } + + private class JintGrabber : GrabberBase + { + private readonly Engine _engine; + private readonly ProcessedGrabScript _processedGrabScript; + + public JintGrabber(Engine engine, ProcessedGrabScript processedGrabScript, string name, + IGrabberServices grabberServices) : base( + grabberServices) + { + _engine = engine; + _processedGrabScript = processedGrabScript; + Name = name; + } + + public override string StringId => null; + + public override string Name { get; } + + public override bool Supports(Uri uri) + { + _engine.ResetConstraints(); + return _processedGrabScript.Supports(uri); + } + + protected override Task InternalGrabAsync(Uri uri, CancellationToken cancellationToken, + GrabOptions options, IProgress progress) + { + _engine.ResetConstraints(); + return _processedGrabScript.GrabAsync(uri, cancellationToken, options, progress); + } + } + } +} \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/Repository/GitHub/GitHubGrabberRepository.cs b/src/SharpGrabber.BlackWidow/Repository/GitHub/GitHubGrabberRepository.cs new file mode 100644 index 0000000..6bc1bb8 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GitHub/GitHubGrabberRepository.cs @@ -0,0 +1,106 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository.GitHub +{ + /// + /// Defines a grabber repository that fetches the feed file and the scripts from a GitHub repository. + /// + public class GitHubGrabberRepository : GrabberRepositoryBase + { + private readonly HttpClient _client; + private readonly bool _ownClient; + + public GitHubGrabberRepository(HttpClient httpClient, bool ownClient = true) + { + _ownClient = ownClient; + _client = httpClient; + _client.DefaultRequestHeaders.CacheControl = new System.Net.Http.Headers.CacheControlHeaderValue + { + NoCache = true, + MaxAge = TimeSpan.Zero, + }; + } + + public GitHubGrabberRepository() : this(new HttpClient()) { } + + public override bool CanPut => false; + + /// + /// Gets or sets the name of the repository e.g. 'dotnettools/SharpGrabber' + /// + public string Repository { get; set; } + + /// + /// Gets or sets the branch name. + /// + public string BranchName { get; set; } = "master"; + + /// + /// Gets or sets the path to the directory that contains the feed file and the scripts. + /// + public string RepoRootPath { get; set; } = "blackwidow"; + + /// + /// Gets or sets the name of the feed file. + /// + public string FeedFileName { get; set; } = "feed.json"; + + public override async Task GetFeedAsync(CancellationToken cancellationToken) + { + var url = GetFeedUrl(); + using var response = await _client.GetAsync(url, cancellationToken).ConfigureAwait(false); + response.EnsureSuccessStatusCode(); + var content = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + var feedModel = JsonConvert.DeserializeObject(content); + return new GrabberRepositoryFeed(feedModel.Scripts); + } + + public override async Task FetchSourceAsync(IGrabberRepositoryScript _script, CancellationToken cancellationToken) + { + if (_script is not FeedScriptModel script) + throw new InvalidOperationException($"The provided script does not belong to this repository."); + + var url = GetScriptUrl(script.File); + using var response = await _client.GetAsync(url, cancellationToken).ConfigureAwait(false); + var src = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + return new GrabberScriptSource(src); + } + + public override Task PutAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, CancellationToken cancellationToken) + { + throw new NotSupportedException("Putting is not supported for GitHub repositories."); + } + + protected override void Dispose(bool disposing) + { + if (_ownClient) + _client.Dispose(); + } + + protected virtual string GetFeedUrl() + => GetRawUrl(BranchName, RepoRootPath, FeedFileName); + + protected virtual string GetScriptUrl(string fileName) + => GetRawUrl(BranchName, RepoRootPath, fileName); + + protected virtual string GetRawUrl(params string[] parts) + => ($"https://raw.githubusercontent.com/{Repository}" + '/' + string.Join("/", parts ?? Array.Empty()).Trim('/')).Trim('/'); + + private sealed class FeedFileModel + { + public FeedScriptModel[] Scripts { get; set; } + } + + private sealed class FeedScriptModel : GrabberRepositoryScript + { + public string File { get; set; } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryBase.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryBase.cs new file mode 100644 index 0000000..4801166 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryBase.cs @@ -0,0 +1,130 @@ +using DotNetTools.SharpGrabber.BlackWidow.Internal; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Base class for implementing + /// + public abstract class GrabberRepositoryBase : IGrabberRepository + { + private readonly object _monitoringLock = new(); + private readonly ConcurrentHashSet _subscriptions = new(); + private bool _disposed; + private bool _monitoring; + + public virtual bool CanPut => false; + + public virtual bool CanNotifyChanges => false; + + protected bool AnySubscribers => _subscriptions.Count > 0; + + public abstract Task FetchSourceAsync(IGrabberRepositoryScript script, CancellationToken cancellationToken); + + public abstract Task GetFeedAsync(CancellationToken cancellationToken); + + public virtual Task PutAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, CancellationToken cancellationToken) + { + throw new NotSupportedException($"Putting is not supported by {GetType()}."); + } + + public Task SubscribeAsync() + { + IGrabberRepositorySubscription result = null; + if (CanNotifyChanges) + { + result = new Subscription(this); + StartOrStopMonitoring(); + } + return Task.FromResult(result); + } + + public void Dispose() + { + if (_disposed) + return; + _disposed = true; + Dispose(true); + _subscriptions.Clear(); + StartOrStopMonitoring(); + } + + protected void NotifyChanged(IGrabberRepositoryFeed feed) + { + foreach (var subscription in _subscriptions.OfType()) + subscription.Notify(feed); + } + + /// + /// Starts monitoring for changes to the repository and notifies with . + /// + protected virtual Task StartMonitoringAsync() + { + throw new NotImplementedException(); + } + + /// + /// Stops + /// + protected virtual Task StopMonitoringAsync() + { + throw new NotImplementedException(); + } + + protected virtual void Dispose(bool disposing) + { + } + + private void Unsubscribe(IGrabberRepositorySubscription subscription) + { + _subscriptions.Remove(subscription); + StartOrStopMonitoring(); + } + + private void StartOrStopMonitoring() + { + if (_monitoring == AnySubscribers) + return; + + lock (_monitoringLock) + { + if (_monitoring == AnySubscribers) + return; + _monitoring = AnySubscribers; + + if (_monitoring) + _ = StartMonitoringAsync(); + else + _ = StopMonitoringAsync(); + } + } + + private sealed class Subscription : IGrabberRepositorySubscription + { + private readonly GrabberRepositoryBase _base; + + public event Action FeedUpdated; + + public Subscription(GrabberRepositoryBase @base) + { + _base = @base; + } + + public void Notify(IGrabberRepositoryFeed feed) + { + FeedUpdated?.Invoke(feed, _base); + } + + public void Dispose() + { + _base.Unsubscribe(this); + } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryChangeDetector.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryChangeDetector.cs new file mode 100644 index 0000000..9aeb0e6 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryChangeDetector.cs @@ -0,0 +1,180 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Default implementation for + /// + public class GrabberRepositoryChangeDetector : IGrabberRepositoryChangeDetector + { + private readonly Dictionary _repositories; + private readonly List _disposables = new(); + private readonly AutoResetEvent _pollingSync = new(false); + private IGrabberRepository[] _pollingRepositories; + private CancellationTokenSource _cancellationTokenSource; + private CancellationTokenSource _pollingIntervalCancellation; + private TimeSpan _pollingInterval = TimeSpan.FromHours(1); + + public GrabberRepositoryChangeDetector(IEnumerable repositories) + { + _repositories = repositories.ToDictionary(r => r, r => (IGrabberRepositoryFeed)null); + _ = ProcessRepositoriesAsync(); + } + + public event GrabberRepositoryChangeEventHandler RepositoryChanged; + + /// + /// Gets or sets the polling interval, which is the minimum time to wait before fetching the feed of a manually trackable repository. + /// The default value is 1h. + /// + public TimeSpan PollingInterval + { + get => _pollingInterval; + set + { + _pollingInterval = value; + _ = TriggerDelayedPollingAsync(); + } + } + + public async Task ForceUpdateFeedAsync(bool pollableOnly = true) + { + if (pollableOnly) + { + _pollingIntervalCancellation?.Cancel(); + await TriggerPollingAsync(); + } + else + { + await PollAsync(_repositories.Keys, _cancellationTokenSource.Token).ConfigureAwait(false); + } + } + + public void Dispose() + { + if (_cancellationTokenSource == null || _cancellationTokenSource.IsCancellationRequested) + return; + _cancellationTokenSource.Cancel(); + _cancellationTokenSource = null; + _pollingIntervalCancellation?.Cancel(); + _pollingIntervalCancellation = null; + _repositories.Clear(); + RepositoryChanged = null; + foreach (var disposable in _disposables) + disposable.Dispose(); + _disposables.Clear(); + _pollingSync.Dispose(); + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + } + + private async Task ProcessRepositoriesAsync() + { + var pollingRepos = new List(); + foreach (var repository in _repositories.Keys) + { + if (repository.CanNotifyChanges) + { + var subscription = await repository.SubscribeAsync().ConfigureAwait(false); + subscription.FeedUpdated += Subscription_FeedUpdated; + _disposables.Add(subscription); + continue; + } + + pollingRepos.Add(repository); + } + _pollingRepositories = pollingRepos.ToArray(); + _ = TriggerPollingAsync(); + } + + private async Task TriggerPollingAsync() + { + if (!_pollingSync.Set()) + return; + + _cancellationTokenSource?.Cancel(); + _cancellationTokenSource = new CancellationTokenSource(); + try + { + // test if there are any repos to poll + if (_pollingRepositories == null || _pollingRepositories.Length == 0) + return; + + await PollAsync(_pollingRepositories, _cancellationTokenSource.Token).ConfigureAwait(false); + } + finally + { + _pollingSync.Reset(); + } + + // trigger delayed polling + _cancellationTokenSource.Token.ThrowIfCancellationRequested(); + _ = TriggerDelayedPollingAsync(); + } + + private async Task TriggerDelayedPollingAsync() + { + _pollingIntervalCancellation?.Cancel(); + _pollingIntervalCancellation = new(); + await Task.Delay(_pollingInterval, _pollingIntervalCancellation.Token).ConfigureAwait(false); + await TriggerPollingAsync().ConfigureAwait(false); + } + + private async Task PollAsync(IEnumerable repositories, CancellationToken cancellationToken) + { + async Task> GetFeedAsync(IGrabberRepository repo) + { + var feed = await repo.GetFeedAsync(cancellationToken).ConfigureAwait(false); + return new Tuple(repo, feed); + }; + + var tasks = new HashSet>>(); + foreach (var repo in repositories) + { + var task = GetFeedAsync(repo); + tasks.Add(task); + } + + while (tasks.Count > 0) + { + var task = await Task.WhenAny(tasks); + cancellationToken.ThrowIfCancellationRequested(); + tasks.Remove(task); + + var tuple = task.Result; + TestChanged(tuple.Item1, tuple.Item2); + cancellationToken.ThrowIfCancellationRequested(); + } + } + + private async void Subscription_FeedUpdated(IGrabberRepositoryFeed feed, IGrabberRepository repository) + { + if (RepositoryChanged == null) + return; + feed ??= await repository.GetFeedAsync().ConfigureAwait(false); + TestChanged(repository, feed); + } + + private void TestChanged(IGrabberRepository repository, IGrabberRepositoryFeed feed) + { + if (!_repositories.ContainsKey(repository)) + throw new InvalidOperationException("The repository is not registered."); + + var prevFeed = _repositories[repository]; + if (prevFeed != null && feed.Equals(prevFeed)) + // not changed + return; + + _repositories[repository] = feed; + RepositoryChanged?.Invoke(repository, feed, prevFeed); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryChangeEventHandler.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryChangeEventHandler.cs new file mode 100644 index 0000000..160486e --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryChangeEventHandler.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Represents a method that handles changes of a grabber repository. + /// + /// The updated repository + /// The new feed + /// The previous feed + public delegate void GrabberRepositoryChangeEventHandler(IGrabberRepository repository, IGrabberRepositoryFeed feed, IGrabberRepositoryFeed previousFeed); +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryConfigurationExtensions.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryConfigurationExtensions.cs new file mode 100644 index 0000000..c37fbf6 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryConfigurationExtensions.cs @@ -0,0 +1,52 @@ +using DotNetTools.SharpGrabber.BlackWidow.Repository.GitHub; +using DotNetTools.SharpGrabber.BlackWidow.Repository.Memory; +using System; +using System.Collections.Generic; +using System.Text; +using DotNetTools.SharpGrabber.BlackWidow.Repository.Local; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Defines extension methods for + /// + public static class GrabberRepositoryConfigurationExtensions + { + /// + /// Configures to use a physical repository. + /// + public static IBlackWidowRepositoryConfigurator UseMemory(this IBlackWidowRepositoryConfigurator configurator, bool readOnly = false) + { + var repo = new InMemoryRepository(readOnly); + return configurator.Use(repo); + } + + /// + /// Configures to use a physical repository. + /// + public static IBlackWidowRepositoryConfigurator UsePhysical(this IBlackWidowRepositoryConfigurator configurator, string rootPath, bool readOnly = false) + { + var repo = new PhysicalGrabberRepository(rootPath, readOnly); + return configurator.Use(repo); + } + + /// + /// Configures to use a GitHub repository. + /// + public static IBlackWidowRepositoryConfigurator UseGitHub(this IBlackWidowRepositoryConfigurator configurator, Action configure) + { + var repo = new GitHubGrabberRepository(); + configure.Invoke(repo); + return configurator.Use(repo); + } + + /// + /// Configures to use the official GitHub repository. + /// + public static IBlackWidowRepositoryConfigurator UseOfficial(this IBlackWidowRepositoryConfigurator configurator) + { + var repo = new OfficialGrabberRepository(); + return configurator.Use(repo); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryFeed.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryFeed.cs new file mode 100644 index 0000000..552b120 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryFeed.cs @@ -0,0 +1,69 @@ +using DotNetTools.SharpGrabber.BlackWidow.Internal; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Default in-memory implementation for + /// + public class GrabberRepositoryFeed : IGrabberRepositoryFeed + { + private readonly ConcurrentDictionary _scripts = new(); + + public GrabberRepositoryFeed() { } + + public GrabberRepositoryFeed(IEnumerable scripts) + { + if (scripts == null) + throw new ArgumentNullException(nameof(scripts)); + foreach (var script in scripts) + _scripts.TryAdd(script.Id, script); + } + + public IGrabberRepositoryScript GetScript(string scriptId) + { + return _scripts.GetOrDefault(scriptId); + } + + public IEnumerable GetScripts() + { + return _scripts.Values.AsEnumerable(); + } + + public void Add(IGrabberRepositoryScript script) + { + _scripts.TryAdd(script.Id, script); + } + + public void Remove(string id) + { + _scripts.TryRemove(id, out _); + } + + public override int GetHashCode() + { + return 0; + } + + public override bool Equals(object obj) + { + if (obj is GrabberRepositoryFeed feed) + { + if (_scripts.Count != feed._scripts.Count) + return false; + foreach (var ownScript in _scripts.Values) + { + var otherScript = feed.GetScript(ownScript.Id); + if (otherScript == null || !otherScript.Equals(ownScript)) + return false; + } + return true; + } + return base.Equals(obj); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryFeedDifference.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryFeedDifference.cs new file mode 100644 index 0000000..9c3c03d --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryFeedDifference.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Defines all possible types of difference between two objects. + /// + public enum GrabberRepositoryFeedDifferenceType + { + /// + /// Indicates that a script has been added. + /// + ScriptAdded, + + /// + /// Indicates that a script has been removed. + /// + ScriptRemoved, + + /// + /// Indicates that a script has been changed. + /// + ScriptChanged + } + + /// + /// Describes a difference between two objects. + /// + public class GrabberRepositoryFeedDifference + { + public GrabberRepositoryFeedDifference(GrabberRepositoryFeedDifferenceType type, IGrabberRepositoryScript ownScript, IGrabberRepositoryScript otherScript) + { + Type = type; + OwnScript = ownScript; + OtherScript = otherScript; + } + + /// + /// Gets the type of this difference. + /// + public GrabberRepositoryFeedDifferenceType Type { get; } + + /// + /// Gets the descriptor for the own script. + /// This value is NULL if is . + /// + public IGrabberRepositoryScript OwnScript { get; } + + /// + /// Gets the descriptor for the other script. + /// This value is NULL if is . + /// + public IGrabberRepositoryScript OtherScript { get; } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryScript.cs b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryScript.cs new file mode 100644 index 0000000..bef8849 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/GrabberRepositoryScript.cs @@ -0,0 +1,70 @@ +using DotNetTools.SharpGrabber.BlackWidow.Internal; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Default implementation for + /// + public class GrabberRepositoryScript : IGrabberRepositoryScript + { + private string[] _regularExpressionStrings; + private Regex[] _regularExpressions; + + public string Id { get; set; } + + public string Name { get; set; } + + public GrabberScriptType Type { get; set; } + + public string Version { get; set; } + + public bool IsDeprecated { get; set; } + + public string[] SupportedRegularExpressions + { + get => _regularExpressionStrings; + set + { + if (_regularExpressionStrings == value) + return; + _regularExpressionStrings = value; + _regularExpressions = value?.Select(s => new Regex(s, RegexOptions.Compiled | RegexOptions.IgnoreCase)).ToArray(); + } + } + + public int ApiVersion { get; set; } = 1; + + public bool IsMatch(Uri uri) + { + // test for potential general support + if (_regularExpressions == null || _regularExpressions.Length == 0) + return true; + + var uriString = uri.ToString(); + return _regularExpressions.Any(regex => regex.IsMatch(uriString)); + } + + public override int GetHashCode() + { + return HashCodeUtils.Compute(Id); + } + + public override bool Equals(object obj) + { + return EqualityUtils.Equals(this, obj, + o => o.Id, + o => o.Name, + o => o.Type, + o => o.Version, + o => o.IsDeprecated, + o => o.SupportedRegularExpressions, + o => o.ApiVersion); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/IGrabberRepository.cs b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepository.cs new file mode 100644 index 0000000..71fcb47 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepository.cs @@ -0,0 +1,80 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Represents a BlackWidow repository. + /// + public interface IGrabberRepository : IDisposable + { + /// + /// Gets whether or not this repository supports putting scripts. + /// + bool CanPut { get; } + + /// + /// Gets whether or not this implementation supports notifying changes. + /// + bool CanNotifyChanges { get; } + + /// + /// Gets the latest feed from the source. + /// This will result in an I/O operation such as a web service call, disk scan etc. + /// + Task GetFeedAsync(CancellationToken cancellationToken); + + /// + /// Fetches the source of a specific script. + /// + Task FetchSourceAsync(IGrabberRepositoryScript script, CancellationToken cancellationToken); + + /// + /// Puts the with its into the repository. + /// + /// Thrown if putting scripts into this repository is not supported. + Task PutAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, CancellationToken cancellationToken); + + /// + /// Tries to subscribe to changes to the repository. + /// + /// The subscription, or NULL if the implementation is unable to detect its changes. + Task SubscribeAsync(); + } + + /// + /// Defines extension methods for . + /// + public static class GrabberRepositoryExtensions + { + /// + /// Gets the latest feed from the source. + /// This will result in an I/O operation such as a web service call, disk scan etc. + /// + public static Task GetFeedAsync(this IGrabberRepository repository) + { + return repository.GetFeedAsync(CancellationToken.None); + } + + /// + /// Fetches the source of a specific script. + /// + public static Task FetchSourceAsync(this IGrabberRepository repository, IGrabberRepositoryScript script) + { + return repository.FetchSourceAsync(script, CancellationToken.None); + } + + /// + /// Puts the with its into the repository. + /// + /// Thrown if putting scripts into this repository is not supported. + public static Task PutAsync(this IGrabberRepository repository, IGrabberRepositoryScript script, IGrabberScriptSource source) + { + return repository.PutAsync(script, source, CancellationToken.None); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryChangeDetector.cs b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryChangeDetector.cs new file mode 100644 index 0000000..841df0e --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryChangeDetector.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Detects changes in a grabber repository. + /// + public interface IGrabberRepositoryChangeDetector : IDisposable + { + /// + /// Invoked when a change in a repository gets detected. + /// + event GrabberRepositoryChangeEventHandler RepositoryChanged; + + /// + /// Forces a manual update of all repository feeds. + /// + /// Whether or not to only update feeds of repositories that don't support change notification. + Task ForceUpdateFeedAsync(bool pollableOnly = true); + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryFeed.cs b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryFeed.cs new file mode 100644 index 0000000..0bc8b44 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryFeed.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Describes the state of a repository. + /// + public interface IGrabberRepositoryFeed + { + /// + /// Tries to find a script with the specified identifier in the repository. + /// + /// The script if found; otherwise NULL. + IGrabberRepositoryScript GetScript(string scriptId); + + /// + /// Enumerates descriptors of all scripts in the repository. + /// + IEnumerable GetScripts(); + } + + /// + /// Defines extension methods for . + /// + public static class GrabberRepositoryFeedExtensions + { + /// + /// Gets all the differences between this feed and . + /// + public static IEnumerable GetDifferences(this IGrabberRepositoryFeed feed, IGrabberRepositoryFeed otherFeed) + { + var ownDic = feed.GetScripts().ToDictionary(s => s.Id); + var otherDic = otherFeed.GetScripts().ToDictionary(s => s.Id); + + // return what's new + foreach (var id in ownDic.Keys.Where(k => !otherDic.ContainsKey(k))) + yield return new GrabberRepositoryFeedDifference(GrabberRepositoryFeedDifferenceType.ScriptAdded, ownDic[id], null); + + // return what's removed + foreach (var id in otherDic.Keys.Where(k => !ownDic.ContainsKey(k))) + yield return new GrabberRepositoryFeedDifference(GrabberRepositoryFeedDifferenceType.ScriptRemoved, null, otherDic[id]); + + // return what's changed + foreach (var id in otherDic.Keys.Intersect(ownDic.Keys)) + { + var ownScript = ownDic[id]; + var otherScript = otherDic[id]; + if (otherScript.Equals(ownScript)) + continue; + yield return new GrabberRepositoryFeedDifference(GrabberRepositoryFeedDifferenceType.ScriptChanged, ownScript, otherScript); + } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryScript.cs b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryScript.cs new file mode 100644 index 0000000..ff0b2e1 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositoryScript.cs @@ -0,0 +1,67 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Describes a script in a BlackWidow repository. + /// + public interface IGrabberRepositoryScript + { + /// + /// Gets the string that uniquely identifies this script. + /// + string Id { get; } + + /// + /// Gets the friendly name of this script. + /// + string Name { get; } + + /// + /// Gets the semantic version. + /// + string Version { get; } + + /// + /// Gets the script type, which is used to understand how the script should be interpreted. + /// + GrabberScriptType Type { get; } + + /// + /// Gets the BlackWidow API version. + /// + int ApiVersion { get; } + + /// + /// Gets whether the script is deprecated or works flawlessly. + /// + bool IsDeprecated { get; } + + /// + /// Gets a list of optional regular expressions at least one of which a URI must match before grabbing. + /// + string[] SupportedRegularExpressions { get; } + + /// + /// Tests whether or not matches any of . + /// + bool IsMatch(Uri uri); + } + + /// + /// Defines extension methods for . + /// + public static class GrabberRepositoryScriptExtensions + { + /// + /// Returns version of the script as . + /// + public static Version GetVersion(this IGrabberRepositoryScript script) + => string.IsNullOrEmpty(script.Version) ? null : Version.Parse(script.Version); + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositorySubscription.cs b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositorySubscription.cs new file mode 100644 index 0000000..ab4e2ac --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/IGrabberRepositorySubscription.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Describes a subscription to changes of a grabber repository. + /// + /// + /// To unsubscribe, the instance should be disposed. + /// + public interface IGrabberRepositorySubscription : IDisposable + { + /// + /// Invoked when the feed gets updated. + /// + event Action FeedUpdated; + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/Local/PhysicalGrabberRepository.cs b/src/SharpGrabber.BlackWidow/Repository/Local/PhysicalGrabberRepository.cs new file mode 100644 index 0000000..fb80d71 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/Local/PhysicalGrabberRepository.cs @@ -0,0 +1,127 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; +using Newtonsoft.Json; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository.Local +{ + /// + /// Mounts to a phyisical directory to fetch and store grabbers. + /// + public class PhysicalGrabberRepository : GrabberRepositoryBase + { + private readonly string _rootPath; + private readonly bool _readOnly; + private bool _monitoring; + + public PhysicalGrabberRepository(string rootPath, bool readOnly = false) + { + _rootPath = rootPath; + _readOnly = readOnly; + } + + public override bool CanPut => !_readOnly; + + /// + /// Gets or sets the descriptor file name without extension. The default value is 'descriptor.json'. + /// + public string DescriptorFileName { get; set; } = "descriptor.json"; + + /// + /// Gets or sets the script file name without extension. The default value is 'script'. + /// + public string ScriptFileNameWithoutExtension { get; set; } = "script"; + + public override Task GetFeedAsync(CancellationToken cancellationToken) + { + Directory.CreateDirectory(_rootPath); + var root = new DirectoryInfo(_rootPath); + var ids = root.EnumerateDirectories().Select(d => d.Name).ToArray(); + var scripts = ids.Select(id => ReadScriptInfo(id)); + var feed = new GrabberRepositoryFeed(scripts); + return Task.FromResult(feed); + } + + public override Task FetchSourceAsync(IGrabberRepositoryScript script, CancellationToken cancellationToken) + { + var scriptPath = GetScriptPath(script); + var source = new GrabberScriptSource(File.ReadAllText(scriptPath)); + return Task.FromResult(source); + } + + public override async Task PutAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, CancellationToken cancellationToken) + { + if (_readOnly) + throw new NotSupportedException("The repository is read-only."); + + var descriptorPath = GetDescriptorPath(script.Id); + var scriptPath = GetScriptPath(script); + Directory.CreateDirectory(Path.GetDirectoryName(descriptorPath)); + Directory.CreateDirectory(Path.GetDirectoryName(scriptPath)); + + var sourceContent = await source.GetSourceAsync().ConfigureAwait(false); + File.WriteAllText(descriptorPath, SerializeDescriptor(script)); + File.WriteAllText(scriptPath, sourceContent); + + if (_monitoring) + { + var newFeed = await GetFeedAsync(CancellationToken.None).ConfigureAwait(false); + NotifyChanged(newFeed); + } + } + + protected override Task StartMonitoringAsync() + { + _monitoring = true; + return Task.CompletedTask; + } + + protected override Task StopMonitoringAsync() + { + _monitoring = false; + return Task.CompletedTask; + } + + protected virtual string SerializeDescriptor(IGrabberRepositoryScript script) + { + return JsonConvert.SerializeObject(script, Formatting.Indented); + } + + protected virtual IGrabberRepositoryScript DeserializeDescriptor(string serializedValue) + { + return JsonConvert.DeserializeObject(serializedValue); + } + + private IGrabberRepositoryScript ReadScriptInfo(string scriptId) + { + var descriptorPath = GetDescriptorPath(scriptId); + var fileContent = File.ReadAllText(descriptorPath); + return DeserializeDescriptor(fileContent); + } + + private string GetPath(IEnumerable parts) + { + var array = new[] { _rootPath }.Union(parts).ToArray(); + return Path.Combine(array); + } + + private string GetPath(params string[] parts) + => GetPath((IEnumerable)parts); + + private string GetDescriptorPath(string scriptId) + => GetPath(scriptId, DescriptorFileName); + + private string GetScriptPath(IGrabberRepositoryScript script) + { + var attribute = script.Type.GetScriptTypeAttribute(false); + var scriptName = string.IsNullOrEmpty(attribute?.FileExtension) + ? ScriptFileNameWithoutExtension + : $"{ScriptFileNameWithoutExtension}.{attribute.FileExtension}"; + return GetPath(script.Id, scriptName); + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/Memory/InMemoryRepository.cs b/src/SharpGrabber.BlackWidow/Repository/Memory/InMemoryRepository.cs new file mode 100644 index 0000000..ad9a097 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/Memory/InMemoryRepository.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using DotNetTools.SharpGrabber.BlackWidow.Definitions; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository.Memory +{ + /// + /// In-memory implementation of grabber repository. + /// + public class InMemoryRepository : GrabberRepositoryBase + { + private readonly bool _readOnly; + private readonly Dictionary _scripts = new(); + + public InMemoryRepository(bool readOnly = false) + { + _readOnly = readOnly; + } + + public override bool CanPut => !_readOnly; + + public override Task FetchSourceAsync(IGrabberRepositoryScript script, CancellationToken cancellationToken) + { + var info = _scripts.GetOrDefault(script.Id); + return Task.FromResult(info?.Source); + } + + public override Task GetFeedAsync(CancellationToken cancellationToken) + { + var feed = new GrabberRepositoryFeed(_scripts.Values.Select(i => i.Script)); + return Task.FromResult(feed); + } + + public override Task PutAsync(IGrabberRepositoryScript script, IGrabberScriptSource source, CancellationToken cancellationToken) + { + if (_readOnly) + throw new NotSupportedException("Cannot put into the memory repository because it's read-only."); + + var info = new ScriptInfo(script, source); + _scripts[script.Id] = info; + return Task.CompletedTask; + } + + private sealed class ScriptInfo + { + public ScriptInfo(IGrabberRepositoryScript script, IGrabberScriptSource source) + { + Script = script; + Source = source; + } + + public IGrabberRepositoryScript Script { get; } + + public IGrabberScriptSource Source { get; } + } + } +} diff --git a/src/SharpGrabber.BlackWidow/Repository/OfficialGrabberRepository.cs b/src/SharpGrabber.BlackWidow/Repository/OfficialGrabberRepository.cs new file mode 100644 index 0000000..62035c1 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/Repository/OfficialGrabberRepository.cs @@ -0,0 +1,32 @@ +using DotNetTools.SharpGrabber.BlackWidow.Repository.GitHub; +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.Repository +{ + /// + /// Defines the official SharpGrabber grabber repository. + /// + public class OfficialGrabberRepository : GitHubGrabberRepository + { + public OfficialGrabberRepository() + { + Setup(); + } + + public OfficialGrabberRepository(HttpClient httpClient, bool ownClient = true) : base(httpClient, ownClient) + { + Setup(); + } + + private void Setup() + { + Repository = BlackWidowConstants.GitHub.OfficialRepository.RepositoryAddress; + BranchName = BlackWidowConstants.GitHub.OfficialRepository.MasterBranch; + RepoRootPath = BlackWidowConstants.GitHub.OfficialRepository.RootPath; + FeedFileName = BlackWidowConstants.GitHub.OfficialRepository.FeedFileName; + } + } +} diff --git a/src/SharpGrabber.BlackWidow/SharpGrabber.BlackWidow.csproj b/src/SharpGrabber.BlackWidow/SharpGrabber.BlackWidow.csproj new file mode 100644 index 0000000..34ad2a3 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/SharpGrabber.BlackWidow.csproj @@ -0,0 +1,46 @@ + + + + netstandard2.0 + DotNetTools.SharpGrabber.BlackWidow + true + Javid Shoaei + github.com/dotnettools + Adds support for executing grabber scripts written in ECMAScript (also known as JavaScript); and introduces grabber repositories to always keep the grabbers updated at runtime. + Copyright (C) 2021 Javid Shoaei and other contributors + LICENSE + https://github.com/dotnettools/SharpGrabber + blackwidow-logo.png + + 9 + https://github.com/dotnettools/SharpGrabber + blackwidow,grab,scrape,script,ecmascript,ecma,javascript,js + 1701;1702;1591 + true + + + + + + + + + + + + + + True + + + + True + + + + + + + + + diff --git a/src/SharpGrabber.BlackWidow/SharpGrabber.BlackWidow.csproj.DotSettings b/src/SharpGrabber.BlackWidow/SharpGrabber.BlackWidow.csproj.DotSettings new file mode 100644 index 0000000..133a4ba --- /dev/null +++ b/src/SharpGrabber.BlackWidow/SharpGrabber.BlackWidow.csproj.DotSettings @@ -0,0 +1,3 @@ + + True + True \ No newline at end of file diff --git a/src/SharpGrabber.BlackWidow/TypeConversion/ToBoolConverter.cs b/src/SharpGrabber.BlackWidow/TypeConversion/ToBoolConverter.cs new file mode 100644 index 0000000..060867b --- /dev/null +++ b/src/SharpGrabber.BlackWidow/TypeConversion/ToBoolConverter.cs @@ -0,0 +1,38 @@ +using DotNetTools.ConvertEx; +using DotNetTools.SharpGrabber.BlackWidow.Internal; +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.TypeConversion +{ + internal class ToBoolConverter : ITypeConverter + { + public bool TryConvert(object value, Type targetType, IFormatProvider formatProvider, out object convertedValue) + { + if (targetType != typeof(bool)) + { + convertedValue = null; + return false; + } + + if (value is string) + { + var str = value?.ToString(); + convertedValue = !string.IsNullOrEmpty(str); + } + else if (value.GetType().IsNumericType()) + { + var num = (double)Convert.ChangeType(value, typeof(double)); + convertedValue = num != 0; + return true; + } + else + { + convertedValue = null; + return false; + } + return true; + } + } +} diff --git a/src/SharpGrabber.BlackWidow/TypeConversion/TypeConverters.cs b/src/SharpGrabber.BlackWidow/TypeConversion/TypeConverters.cs new file mode 100644 index 0000000..9442da2 --- /dev/null +++ b/src/SharpGrabber.BlackWidow/TypeConversion/TypeConverters.cs @@ -0,0 +1,28 @@ +using DotNetTools.ConvertEx; +using DotNetTools.ConvertEx.Converters; +using System; +using System.Collections.Generic; +using System.Text; + +namespace DotNetTools.SharpGrabber.BlackWidow.TypeConversion +{ + internal static class TypeConverters + { + public static readonly ITypeConverter Default; + + static TypeConverters() + { + var converter = new TypeConverter(); + converter + .AddDigester() + .AddConverter() + .AddConverter() + .AddConverter() + .AddConverter() + .AddConverter() + .AddConverter() + .AddConverter(); + Default = converter; + } + } +} diff --git a/src/SharpGrabber.Desktop/Components/StreamReferenceView.xaml b/src/SharpGrabber.Desktop/Components/StreamReferenceView.xaml index c9a05df..9287cdf 100644 --- a/src/SharpGrabber.Desktop/Components/StreamReferenceView.xaml +++ b/src/SharpGrabber.Desktop/Components/StreamReferenceView.xaml @@ -46,7 +46,7 @@ - + diff --git a/src/SharpGrabber.Desktop/Components/StreamResourceView.xaml b/src/SharpGrabber.Desktop/Components/StreamResourceView.xaml index 29d8693..3840b42 100644 --- a/src/SharpGrabber.Desktop/Components/StreamResourceView.xaml +++ b/src/SharpGrabber.Desktop/Components/StreamResourceView.xaml @@ -43,7 +43,7 @@ - + diff --git a/src/SharpGrabber.Desktop/Constants.cs b/src/SharpGrabber.Desktop/Constants.cs index 089e017..ed696c5 100644 --- a/src/SharpGrabber.Desktop/Constants.cs +++ b/src/SharpGrabber.Desktop/Constants.cs @@ -6,7 +6,7 @@ public static class Constants { public static readonly string AppName = "SharpGrabber [Desktop]"; - public static readonly Version AppVersion = Version.Parse("3.3"); + public static readonly Version AppVersion = Version.Parse("4.0"); public static readonly string AppFullName = $"{AppName} {AppVersion}"; } diff --git a/src/SharpGrabber.Desktop/MainWindow.xaml b/src/SharpGrabber.Desktop/MainWindow.xaml index d3461a5..27d0a17 100644 --- a/src/SharpGrabber.Desktop/MainWindow.xaml +++ b/src/SharpGrabber.Desktop/MainWindow.xaml @@ -18,7 +18,12 @@ - + +