Web Page Watchdog
Problem: I frequently check some online shopping websites to see if a product is on sale, or to see if a web page has new articles. Somehow, it is time consuming to do these work by hands.
Solution: I wrote this simple C# program to do this work for me. It reads and saves the web page, and then waits for a certain given time, say 12 hours. The program will read the web page again, and compare this one with the one saved before. If there is a difference, it will save a copy and let you know. It continue to watch for the changes in the web page.
Source Code:
using System;
using System.IO;
using System.Net;
using System.Text;
using System.Threading;
class WatchDog
{
int file = 0;
bool forever = true;
int waitingTime; //seconds
string dateTime = null;
string webURL = null;
string fileName = null;
byte[] buf = new byte[8192];
public WatchDog(string webURL_pass, int waitingTime_pass)
{
waitingTime = waitingTime_pass;
webURL = webURL_pass;
//get the file name
fileName = webURL;
fileName = fileName.Replace("http://", "");
fileName = fileName.Replace('/', '_');
fileName = fileName.Replace('.', '_');
fileName = fileName.Replace('?', '_');
fileName = fileName.Replace('&', '_');
fileName = fileName.Replace('=', '_');
}
public void run()
{
while (forever)
{
if (file == 0)
{
file = 1;
}
// create a writer and open the file
TextWriter tw = new StreamWriter(fileName + "_" + file + ".html");
// prepare the web page we will be asking for
HttpWebRequest request = (HttpWebRequest)
WebRequest.Create(webURL);
// execute the request
HttpWebResponse response = (HttpWebResponse)
request.GetResponse();
// we will read data via the response stream
Stream resStream = response.GetResponseStream();
string tempString = null;
int count = 0;
do
{
// fill the buffer with data
count = resStream.Read(buf, 0, buf.Length);
// make sure we read some data
if (count != 0)
{
// translate from bytes to text
tempString = Encoding.UTF8.GetString(buf, 0, count);
// write a line of text to the file
tw.Write(tempString);
}
}
while (count > 0); // any more data to read?
tw.Close();
dateTime = "" + DateTime.Now.Year + "_" + DateTime.Now.Month + "_" + DateTime.Now.Day + "_" + DateTime.Now.Hour + "_" + DateTime.Now.Minute + "_" + DateTime.Now.Second;
Console.WriteLine("sleeping for " + waitingTime + " seconds, start at " + dateTime);
Thread.Sleep(waitingTime * 1000);
if (file == 2) //compare
{
if (!CompFiles(fileName + "_1.html", fileName + "_2.html"))
{
//the page is changed, save a copy as a backup
FileInfo original = new FileInfo(fileName + "_2.html");
original.CopyTo("backup/" + fileName + "_" + dateTime + ".html");
file = 0;
}
}
if (file == 1)
{
file = 2;
}
}
//haha, never reached
Console.WriteLine("Stopped.");
Console.Read();
}
public static bool CompFiles(string file1, string file2)
{
int i = 0, j = 0;
FileStream f1;
FileStream f2;
try
{
// open first file
try
{
f1 = new FileStream(file1, FileMode.Open);
}
catch (FileNotFoundException exc)
{
Console.WriteLine(exc.Message);
return false; ;
}
// open second file
try
{
f2 = new FileStream(file2, FileMode.Open);
}
catch (FileNotFoundException exc)
{
Console.WriteLine(exc.Message);
return false;
}
}
catch (IndexOutOfRangeException exc)
{
Console.WriteLine(exc.Message);
return false;
}
// Compare files
try
{
do
{
i = f1.ReadByte();
j = f2.ReadByte();
if (i != j) break;
} while (i != -1 && j != -1);
}
catch (IOException exc)
{
Console.WriteLine(exc.Message);
}
if (i != j)
{
Console.WriteLine("Files differ.");
f1.Close();
f2.Close();
return false;
}
else
{
Console.WriteLine("Files are the same.");
f1.Close();
f2.Close();
return true;
}
}
}
class MultiThread
{
public static void Main()
{
WatchDog wd = new WatchDog("http://www.sunnyspeed.com", 10);
Thread newThread = new Thread(new ThreadStart(wd.run));
newThread.Start();
}
}
Thoughts:
1. This code will think even single bit difference as a different file. Therefore, some web page displays current time, the page varies every minute. This needs to be aware.
2. GUI can be added to do multiple jobs at the same time. This code only takes a tiny bit of system resource. You can have this program running all the time.
If you have any questions or suggestions, please email me.
Sunny Sun @ September, 2008
powered by www.sunnyspeed.com