I'm still new at this threading thingy. Lets say I have 50000 URLs and I want to get the contents of these URLs simultaneously, like processing every 10 URLs together. then once one of these URLs finishes processing, the program should add another 1 from the queue list until it finishes processing all URLs in the list. now how can I do that with C#.. here is the code I'm doing so far..
class RequestState
{
public WebRequest Request;
// holds the request
public object Data;
// store any data in this
public string SiteUrl;
// holds the UrlString to match up results (Database lookup, etc).
public RequestState(WebRequest request, object data, string siteUrl)
{
this.Request = request;
this.Data = data;
this.SiteUrl = siteUrl;
}
}
private void PROCESS_URLS_Click(object sender, EventArgs e)
{
//run the process
process_URLs();
}
private int ThreadsCount = 0;
private void process_URLs()
{
//count threads number
ThreadsCount = URLS_LISTVIEW.SelectedItems.Count;
//loop through all URLs in listview
for (int i = 0; i < URLS_LISTVIEW.SelectedItems.Count; i++)
{
try
{
//get url string
string myURLs = URLS_LISTVIEW.SelectedItems[i].SubItems[0].Text.Trim();
// for each URL in the collection...
WebRequest request = HttpWebRequest.Create(myURLs);
request.Method = "GET";
object data = new object();
RequestState state = new RequestState(request, data, myURLs);
IAsyncResult result = request.BeginGetResponse(new AsyncCallback(UpdateItem), state);
ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle, new WaitOrTimerCallback(ScanTimeoutCallback), state, (30 * 1000), true);
}
catch (ThreadStateException es)
{
MessageBox.Show(es.Message);
}
}
}
private void UpdateItem(IAsyncResult result)
{
RequestState state = (RequestState)result.AsyncState;
WebRequest request = (WebRequest)state.Request;
try
{// grab the custom state object
// get the Response
HttpWebResponse response = (HttpWebResponse)request.EndGetResponse(result);
// process the response...
Stream s = (Stream)response.GetResponseStream();
StreamReader readStream = new StreamReader(s);
//data grabbed
string dataString 开发者_Python百科= readStream.ReadToEnd();
response.Close();
s.Close();
readStream.Close();
//finished grabbing content for this thread.
ThreadsCount = ThreadsCount - 1;
//if all threads finished running then execute final code to tell the user the process finished
if (ThreadsCount < 1)
{
//show message
MessageBox.Show("finished");
}
// Thread.Sleep(400);
}
private static void ScanTimeoutCallback(object state, bool timedOut)
{
if (timedOut)
{
RequestState reqState = (RequestState)state;
if (reqState != null)
reqState.Request.Abort();
}
}
any ideas would be appreciated :)
kind regards,
Have a look at the TPL, there's an option to specify the maximum parallelism:
List<string> UriList = new List<string>();
...
Parallel.ForEach(UriList,
new ParallelOptions() {MaxDegreeOfParallelism=10},
(x) =>
{
ProcessUrl(x);
});
This would process at most 10 Urls in parallel since we use the overload of Parallel.Foreach()
that allow us to specify MaxDegreeOfParallelism
.
Edit:
Here a simple example that downloads the Html from http://google.com 50 times in parallel (but only at a maximum with 10 threads concurrently) and stores the results in an array:
List<string> UriList = new List<string>();
for(int i =0;i<50;i++)
UriList.Add("http://google.com");
string[] HtmlResults = new string[UriList.Count];
Parallel.ForEach(UriList,
new ParallelOptions() { MaxDegreeOfParallelism = 10 },
(url, i, j) =>
{
WebClient wc = new WebClient();
HtmlResults[j] = wc.DownloadString(url);
});
Not to create more confusion but in your particular case PLINQ would also work very well since there are no dependencies between the item to process, and you have an actual result that the URL is "transformed" into:
var htmlResultList = UriList.AsParallel()
.WithDegreeOfParallelism(10)
.AsOrdered()
.Select(url => { WebClient wc = new WebClient(); return wc.DownloadString(url); })
.ToList();
(This should be a comment under @BrokenGlass, but I can't post comments yet)
You can take a look at this article on how to use Parallel Processing and PLINQ to do what you're looking for. The entire set of articles that precede it have some good information as well.
Edit: If this is a standalone, spawn a new thread to run this portion in the background so it doesn't result in an unresponsive UI.
Edit 2: If you want, you can also throw your strings in a ConcurrentQueue so you can add items from the UI while looking them up.
精彩评论