c# webclient returns 404 on working page
im trying to create some piece of code that will check webpage and collect
some data, that i need to work on. I already got the code, and it worked
pretty good for a while, but this time something is wrong - it returns 404
error code for 90% of visited pages, and im sure that those pages exist. i
already added headers to my webclient, and sleep thread to slow down a
bit, but nothing seems to help. could anyone check it out and ultimately
give some advice, why this works in such way?
public static string Download(string uri)
{
WebClient client = new WebClient();
client.Headers.Add("user-agent", "Mozilla/4.0 (compatible;
MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
string s = client.DownloadString(uri);
return s;
}
private void button1_Click(object sender, EventArgs e)
{
foreach (string[] dataRow in data)
{
symbol = dataRow[0];
urlId = symbol.Remove(symbol.Length - 3);
try
{
System.Threading.Thread.Sleep(2000);
String grohedownloaded =
Download(@"http://www.hansgrohe.pl/productdetail.html?model="
+ urlId + "XXX&lang=pl_PL");
foreach (Match match in Regex.Matches(grohedownloaded,
"href=\"(.*?)\" class=\"cloud-zoom\""))
{
string a = match.Groups[1].Value; //+
match.Groups[2].Value;
MessageBox.Show(symbol + " " + a);
}
MessageBox.Show("test");
}
catch (WebException ex)
{
if (ex.Status == WebExceptionStatus.ProtocolError &&
ex.Response != null)
{
var resp = (HttpWebResponse)ex.Response;
if (resp.StatusCode == HttpStatusCode.NotFound)
{
MessageBox.Show("error 404: " + symbol);
}
else
{
MessageBox.Show(resp.StatusCode.ToString());
}
}
}
}
}
No comments:
Post a Comment