A
There is no specific site on the question, so I will answer and offer the equipment that will be suitable for any site.The target only uses attribute mapping src Yeah. imgbut this decision is also possible for pictures background-image♪ It's complicated, but it's possible. Javascripts for simplicity use jQuery, suggesting that there is one on the target site, too. But it may also be a clean JavaScript or another library, provided that the library is used on the website.Use the library. https://github.com/cefsharp/CefSharp to deal with such challenges.What is this?It's a controlled shell over CEF. https://en.wikipedia.org/wiki/Chromium_Embedded_Framework ) I mean, you get the power of Chromium, which you run program.Why CEF/CefSharp?There is no need for a password page (this is a complex and ungrateful task that is not highly recommended).It is possible to work with a downloaded page (after squeaking).There's an opportunity to do an arbitrary JavaScript with the last possibilities.It is possible to summon AJAX with JavaScript, and then, if successful (success), to erode events in C#-code with the result of AJAX.CeSharp varietiesCefSharp.WinFormsCefSharp. WpfCefSharp.OffScreenThe first two are used if you need to give users the Brauser control element. Conceptually. https://msdn.microsoft.com/en-us/library/system.windows.forms.webbrowser(v=vs.110).aspx In Windows Forms, which is the IE control sheath, not Chromium, as in our case.We will therefore use the CefSharp.OffScreen variety.Code writingLet's say we have a console app, but it's up to you.Set up Nuget-packet CefSharp.OffScreen 51st version:Install-Package CefSharp.OffScreen -Version 51.0.0
The thing is, C# all the masses make up List<object>, JavaScript's result. objectwhich already exists List<object>♪ string♪ bool♪ int depending on the result. In order to make the results highly representative, we will create a small ConvertHelper:public static class ConvertHelper
{
public static T[] GetArrayFromObjectList<T>(object obj)
{
return ((IEnumerable<object>)obj)
.Cast<T>()
.ToArray();
}
public static List<T> GetListFromObjectList<T>(object obj)
{
return ((IEnumerable<object>)obj)
.Cast<T>()
.ToList();
}
public static T ToTypedVariable<T>(object obj)
{
if (obj == null)
{
dynamic dynamicResult = null;
return dynamicResult;
}
Type type = typeof(T);
if (type.IsArray)
{
dynamic dynamicResult = typeof(ConvertHelper).GetMethod(nameof(GetArrayFromObjectList))
.MakeGenericMethod(type.GetElementType())
.Invoke(null, new[] { obj });
return dynamicResult;
}
if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(List<>))
{
dynamic dynamicResult = typeof(ConvertHelper).GetMethod(nameof(GetListFromObjectList))
.MakeGenericMethod(type.GetGenericArguments().Single())
.Invoke(null, new[] { obj });
return dynamicResult;
}
return (T)obj;
}
}
We're building CefSharpWrapper:public sealed class CefSharpWrapper
{
private ChromiumWebBrowser _browser;
public void InitializeBrowser()
{
CefSettings settings = new CefSettings();
// Disable GPU in WPF and Offscreen until GPU issues has been resolved
settings.CefCommandLineArgs.Add("disable-gpu", "1");
//Perform dependency check to make sure all relevant resources are in our output directory.
Cef.Initialize(settings, shutdownOnProcessExit: true, performDependencyCheck: true);
_browser = new ChromiumWebBrowser();
// wait till browser initialised
AutoResetEvent waitHandle = new AutoResetEvent(false);
EventHandler onBrowserInitialized = null;
onBrowserInitialized = (sender, e) =>
{
_browser.BrowserInitialized -= onBrowserInitialized;
waitHandle.Set();
};
_browser.BrowserInitialized += onBrowserInitialized;
waitHandle.WaitOne();
}
public void ShutdownBrowser()
{
// Clean up Chromium objects. You need to call this in your application otherwise
// you will get a crash when closing.
Cef.Shutdown();
}
public Task<T> GetResultAfterPageLoad<T>(string pageUrl, Func<Task<T>> onLoadCallback)
{
TaskCompletionSource<T> tcs = new TaskCompletionSource<T>();
EventHandler<LoadingStateChangedEventArgs> onPageLoaded = null;
T t = default(T);
// An event that is fired when the first page is finished loading.
// This returns to us from another thread.
onPageLoaded = async (sender, e) =>
{
// Check to see if loading is complete - this event is called twice, one when loading starts
// second time when it's finished
// (rather than an iframe within the main frame).
if (!e.IsLoading)
{
// Remove the load event handler, because we only want one snapshot of the initial page.
_browser.LoadingStateChanged -= onPageLoaded;
t = await onLoadCallback();
tcs.SetResult(t);
}
};
_browser.LoadingStateChanged += onPageLoaded;
_browser.Load(pageUrl);
return tcs.Task;
}
public async Task<T> EvaluateJavascript<T>(string script)
{
JavascriptResponse javascriptResponse = await _browser.EvaluateScriptAsync(script);
if (javascriptResponse.Success)
{
object scriptResult = javascriptResponse.Result;
return ConvertHelper.ToTypedVariable<T>(scriptResult);
}
throw new ScriptException(javascriptResponse.Message);
}
}
Next, we're going to get our class. CefSharpWrapper from Main method.public class Program
{
private static void Main()
{
MainAsync().Wait();
}
private static async Task MainAsync()
{
CefSharpWrapper wrapper = new CefSharpWrapper();
wrapper.InitializeBrowser();
string[] imageUrls = await wrapper.GetResultAfterPageLoad("https://yandex.ru", async () =>
await wrapper.EvaluateJavascript<string[]>("$('img').map((index, element) => $(element).prop('src')).toArray()"));
string imageFolder = "C://Test";
if (!Directory.Exists(imageFolder))
{
Directory.CreateDirectory(imageFolder);
}
WebClient client = new WebClient();
for (int i = 0; i < imageUrls.Length; i++)
{
string imageUrl = imageUrls[i];
byte[] fileBytes = await client.DownloadDataTaskAsync(imageUrl);
// Можете написать алгоритм позволяющий подбирать расширения
string imagePath = Path.Combine(imageFolder, i + ".jpg");
File.WriteAllBytes(imagePath, fileBytes);
}
wrapper.ShutdownBrowser();
}
}