Showing posts with label Snippets. Show all posts
Showing posts with label Snippets. Show all posts

Monday, August 22, 2016

Kill Child Process When Parent Exit C#


 

 

Taken from
http://stackoverflow.com/questions/3342941/kill-child-process-when-parent-process-is-killed

for My NOTE



/// <summary>
/// Allows processes to be automatically killed if this parent process unexpectedly quits.
/// This feature requires Windows 8 or greater. On Windows 7, nothing is done.</summary>
/// <remarks>References:
///  http://stackoverflow.com/a/4657392/386091
///  http://stackoverflow.com/a/9164742/386091 </remarks>
public static class ChildProcessTracker
{
    /// <summary>
    /// Add the process to be tracked. If our current process is killed, the child processes
    /// that we are tracking will be automatically killed, too. If the child process terminates
    /// first, that's fine, too.</summary>
    /// <param name="process"></param>
    public static void AddProcess(Process process)
    {
        if (s_jobHandle != IntPtr.Zero)
        {
            bool success = AssignProcessToJobObject(s_jobHandle, process.Handle);
            if (!success)
                throw new Win32Exception();
        }
    }

    static ChildProcessTracker()
    {
        // This feature requires Windows 8 or later. To support Windows 7 requires
        //  registry settings to be added if you are using Visual Studio plus an
        //  app.manifest change.
        //  http://stackoverflow.com/a/4232259/386091
        //  http://stackoverflow.com/a/9507862/386091
        if (Environment.OSVersion.Version < new Version(6, 2))
            return;

        // The job name is optional (and can be null) but it helps with diagnostics.
        //  If it's not null, it has to be unique. Use SysInternals' Handle command-line
        //  utility: handle -a ChildProcessTracker
        string jobName = "ChildProcessTracker" + Process.GetCurrentProcess().Id;
        s_jobHandle = CreateJobObject(IntPtr.Zero, jobName);

        var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION();

        // This is the key flag. When our process is killed, Windows will automatically
        //  close the job handle, and when that happens, we want the child processes to
        //  be killed, too.
        info.LimitFlags = JOBOBJECTLIMIT.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;

        var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION();
        extendedInfo.BasicLimitInformation = info;

        int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION));
        IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length);
        try
        {
            Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false);

            if (!SetInformationJobObject(s_jobHandle, JobObjectInfoType.ExtendedLimitInformation,
                extendedInfoPtr, (uint)length))
            {
                throw new Win32Exception();
            }
        }
        finally
        {
            Marshal.FreeHGlobal(extendedInfoPtr);
        }
    }

    [DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
    static extern IntPtr CreateJobObject(IntPtr lpJobAttributes, string name);

    [DllImport("kernel32.dll")]
    static extern bool SetInformationJobObject(IntPtr job, JobObjectInfoType infoType,
        IntPtr lpJobObjectInfo, uint cbJobObjectInfoLength);

    [DllImport("kernel32.dll", SetLastError = true)]
    static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process);

    // Windows will automatically close any open job handles when our process terminates.
    //  This can be verified by using SysInternals' Handle utility. When the job handle
    //  is closed, the child processes will be killed.
    private static readonly IntPtr s_jobHandle;
}

public enum JobObjectInfoType
{
    AssociateCompletionPortInformation = 7,
    BasicLimitInformation = 2,
    BasicUIRestrictions = 4,
    EndOfJobTimeInformation = 6,
    ExtendedLimitInformation = 9,
    SecurityLimitInformation = 5,
    GroupInformation = 11
}

[StructLayout(LayoutKind.Sequential)]
public struct JOBOBJECT_BASIC_LIMIT_INFORMATION
{
    public Int64 PerProcessUserTimeLimit;
    public Int64 PerJobUserTimeLimit;
    public JOBOBJECTLIMIT LimitFlags;
    public UIntPtr MinimumWorkingSetSize;
    public UIntPtr MaximumWorkingSetSize;
    public UInt32 ActiveProcessLimit;
    public Int64 Affinity;
    public UInt32 PriorityClass;
    public UInt32 SchedulingClass;
}

[Flags]
public enum JOBOBJECTLIMIT : uint
{
    JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE = 0x2000
}

[StructLayout(LayoutKind.Sequential)]
public struct IO_COUNTERS
{
    public UInt64 ReadOperationCount;
    public UInt64 WriteOperationCount;
    public UInt64 OtherOperationCount;
    public UInt64 ReadTransferCount;
    public UInt64 WriteTransferCount;
    public UInt64 OtherTransferCount;
}

[StructLayout(LayoutKind.Sequential)]
public struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION
{
    public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation;
    public IO_COUNTERS IoInfo;
    public UIntPtr ProcessMemoryLimit;
    public UIntPtr JobMemoryLimit;
    public UIntPtr PeakProcessMemoryUsed;
    public UIntPtr PeakJobMemoryUsed;
}


This answer started with @Matt Howells' excellent answer plus others (see links in the code below). Improvements:
  • Supports 32-bit and 64-bit.
  • Fixes some problems in @Matt Howells' answer:
    1. The small memory leak of extendedInfoPtr
    2. The 'Win32' compile error, and
    3. A stack-unbalanced exception I got in the call to CreateJobObject (using Windows 10, Visual Studio 2015, 32-bit).
  • Names the Job, so you if you use SysInternals, for example, you can easily find it.
  • Has a somewhat simpler API and less code.
Here's how to use this code:
// Get a Process object somehow.
Process process = Process.Start(exePath, args);
// Add the Process to ChildProcessTracker.
ChildProcessTracker.AddProcess(process);
To support Windows 7 requires:
In my case, I didn't need to support Windows 7, so I have a simple check at the top of the static constructor below.

Tuesday, August 25, 2015

Resizing Image With quality or proportional

 

Resizing image with quality. 

Taken from http://stackoverflow.com/a/24199315/1247243 all credit belong to Mark


/// <summary>
/// Resize the image to the specified width and height.
/// </summary>
/// <param name="image">The image to resize.</param>
/// <param name="width">The width to resize to.</param>
/// <param name="height">The height to resize to.</param>
/// <returns>The resized image.</returns>
public static Bitmap ResizeImage(Image image, int width, int height)
{
    var destRect = new Rectangle(0, 0, width, height);
    var destImage = new Bitmap(width, height);

    destImage.SetResolution(image.HorizontalResolution, image.VerticalResolution);

    using (var graphics = Graphics.FromImage(destImage))
    {
        graphics.CompositingMode = CompositingMode.SourceCopy;
        graphics.CompositingQuality = CompositingQuality.HighQuality;
        graphics.InterpolationMode = InterpolationMode.HighQualityBicubic;
        graphics.SmoothingMode = SmoothingMode.HighQuality;
        graphics.PixelOffsetMode = PixelOffsetMode.HighQuality;

        using (var wrapMode = new ImageAttributes())
        {
            wrapMode.SetWrapMode(WrapMode.TileFlipXY);
            graphics.DrawImage(image, destRect, 0, 0, image.Width,image.Height, GraphicsUnit.Pixel, wrapMode);
        }
    }

    return destImage;
}
  • wrapMode.SetWrapMode(WrapMode.TileFlipXY) prevents ghosting around the image borders -- naive resizing will sample transparent pixels beyond the image boundaries, but by mirroring the image we can get a better sample (this setting is very noticeable)
  • destImage.SetResolution maintains DPI regardless of physically size -- may increase quality when reducing image dimensions or when printing
  • Compositing controls how pixels are blended with the background -- might not be needed since we're only drawing one thing
  • InterpolationMode determines how intermediate values between two endpoints are calculated
  • SmoothingMode specifies whether lines, curves, and the edges of filled areas use smoothing (also called antialiasing) -- probably only works on vectors
  • PixelOffsetMode affects rendering quality when drawing the new image
Maintaining aspect ratio is left as an exercise for the reader (actually, I just don't think it's this function's job to do that for you).
Also, this is a good article describing some of the pitfalls with image resizing. The above function will cover most of them, but you still have to worry about saving.

Or using proportion resizing 


taken from http://stackoverflow.com/a/6501997/1247243 credit belong to Alex

Like this?
public static void Test()
{
    using (var image = Image.FromFile(@"c:\logo.png"))
    using (var newImage = ScaleImage(image, 300, 400))
    {
        newImage.Save(@"c:\test.png", ImageFormat.Png);
    }
}

public static Image ScaleImage(Image image, int maxWidth, int maxHeight)
{
    var ratioX = (double)maxWidth / image.Width;
    var ratioY = (double)maxHeight / image.Height;
    var ratio = Math.Min(ratioX, ratioY);

    var newWidth = (int)(image.Width * ratio);
    var newHeight = (int)(image.Height * ratio);

    var newImage = new Bitmap(newWidth, newHeight);

    using (var graphics = Graphics.FromImage(newImage))
        graphics.DrawImage(image, 0, 0, newWidth, newHeight);

    return newImage;
}

Saturday, August 8, 2015

OpenFileDialog and SaveFileDialog

 


 

Open file:

            OpenFileDialog opendialog = new OpenFileDialog();
            opendialog.Filter = "Text files|*.txt";
            opendialog.Title = "Import Settings";
            DialogResult result = opendialog.ShowDialog(); // Show the dialog.
            if (result == DialogResult.OK) // Test result.
            {
                //opendialog.FileName;
            }



Save File Dialog
            SaveFileDialog savedialog = new SaveFileDialog();
            savedialog.Filter = "Text files|*.txt";
            savedialog.Title = "Export Settings";
            DialogResult result = savedialog.ShowDialog(); // Show the dialog.
            if (result == DialogResult.OK) // Test result.
            {
               //savedialog.FileName;
            }

Friday, August 7, 2015

Snippet Import and Export setting in C#


Dont forget changing your name space according your own namespace.

Credit http://snipplr.com/view/24482/persisting-data-using-xml-config-files-in-winforms-saving-and-restoring-user-and-application-data/  taken as my own snippet




using System;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Xml.Linq;
using System.Xml.XPath;


namespace TestingGround
{
    public static class SettingsIO
    {
        internal static void Import(string settingsFilePath)
        {
            if (!File.Exists(settingsFilePath))
            {
                throw new FileNotFoundException();
            }

            var appSettings = Properties.Settings.Default;
            try
            {
                var config =
    ConfigurationManager.OpenExeConfiguration(
    ConfigurationUserLevel.PerUserRoamingAndLocal);

                string appSettingsXmlName =
   Properties.Settings.Default.Context["GroupName"].ToString();
                // returns "MyApplication.Properties.Settings";

                // Open settings file as XML
                var import = XDocument.Load(settingsFilePath);
                // Get the whole XML inside the settings node
                var settings = import.XPathSelectElements("//" + appSettingsXmlName);

                config.GetSectionGroup("userSettings")
                    .Sections[appSettingsXmlName]
                    .SectionInformation
                    .SetRawXml(settings.Single().ToString());
                config.Save(ConfigurationSaveMode.Modified);
                ConfigurationManager.RefreshSection("userSettings");

                appSettings.Reload();
            }
            catch (Exception) // Should make this more specific
            {
                // Could not import settings.
                appSettings.Reload(); // from last set saved, not defaults
            }
        }

        internal static void Export(string settingsFilePath)
        {
            Properties.Settings.Default.Save();
            var config =
    ConfigurationManager.OpenExeConfiguration(
    ConfigurationUserLevel.PerUserRoamingAndLocal);
            config.SaveAs(settingsFilePath);
        }
    }
}

 OOT

Or you can using this one. https://github.com/crdx/PortableSettingsProvider , this will save your setting in YOURAPPNAME.settings but little bit work. :) . This more portable like inifile in delphi or pascal.

Good luck

Tuesday, August 4, 2015

MessageBox in top other application

MessageBox in top other application for warning....


MessageBox.Show(new Form() { TopMost = true }, "You have not inputted a username or password. Would you like to configure your settings now?",
                 "Settings Needed",
                 MessageBoxButtons.YesNo,
                 MessageBoxIcon.Question);
or

MessageBox.Show(this,
                "You have not inputted a username or password. Would you like to configure your settings now?",
                "Settings Needed",
                MessageBoxButtons.YesNo,
                MessageBoxIcon.Question
                MessageBoxDefaultButton.Button1,  // specify "Yes" as the default
                (MessageBoxOptions)0x40000);      // specify MB_TOPMOST
another way using class...



static public class TopMostMessageBox
{
    static public DialogResult Show(string message)
    {
        return Show(message, string.Empty, MessageBoxButtons.OK);
    }

    static public DialogResult Show(string message, string title)
    {
        return Show(message, title, MessageBoxButtons.OK);
    }

    static public DialogResult Show(string message, string title, 
        MessageBoxButtons buttons)
    {
        // Create a host form that is a TopMost window which will be the 
        // parent of the MessageBox.
        Form topmostForm = new Form();
        // We do not want anyone to see this window so position it off the 
        // visible screen and make it as small as possible
        topmostForm.Size = new System.Drawing.Size(1, 1);
        topmostForm.StartPosition = FormStartPosition.Manual;
        System.Drawing.Rectangle rect = SystemInformation.VirtualScreen;
        topmostForm.Location = new System.Drawing.Point(rect.Bottom + 10, 
            rect.Right + 10);
        topmostForm.Show();
        // Make this form the active form and make it TopMost
        topmostForm.Focus();
        topmostForm.BringToFront();
        topmostForm.TopMost = true;
        // Finally show the MessageBox with the form just created as its owner
        DialogResult result = MessageBox.Show(topmostForm, message, title, 
            buttons);
        topmostForm.Dispose(); // clean it up all the way

        return result;
    }
} 
Called like this


TopMostMessageBox.Show(
    "This will appear in a message box that is a topmost window",
    "Title", MessageBoxButtons.AbortRetryIgnore);

Friday, July 31, 2015

Right Click menu in richtextbox in C#



Add this code to your event....
        private void richTextBox1_MouseDown(object sender, MouseEventArgs e)
        {
            if (e.Button == System.Windows.Forms.MouseButtons.Right)
            {
                ContextMenu contextMenu = new System.Windows.Forms.ContextMenu();
                MenuItem menuItem = new MenuItem("Cut");
                menuItem.Click += new EventHandler(CutAction);
                contextMenu.MenuItems.Add(menuItem);
                menuItem = new MenuItem("Copy");
                menuItem.Click += new EventHandler(CopyAction);
                contextMenu.MenuItems.Add(menuItem);
                menuItem = new MenuItem("Paste");
                menuItem.Click += new EventHandler(PasteAction);
                contextMenu.MenuItems.Add(menuItem);
                menuItem = new MenuItem("Select All");
                menuItem.Click += new EventHandler(SelectAll);
                contextMenu.MenuItems.Add(menuItem);
                richTextBox1.ContextMenu = contextMenu;
            }
        }

        void CutAction(object sender, EventArgs e)
        {
            richTextBox1.Cut();
        }

        void CopyAction(object sender, EventArgs e)
        {
            Clipboard.SetText(richTextBox1.SelectedText);
        }

        void PasteAction(object sender, EventArgs e)
        {
            if (Clipboard.ContainsText())
            {
                richTextBox1.Text += Clipboard.GetText(TextDataFormat.Text).ToString();
            }
        }

        void SelectAll(object sender, EventArgs e)
        {
           richTextBox1.SelectAll();
           richTextBox1.Focus();
        }

Thursday, June 11, 2015

MessageBox using Winform and richtextbox

private void ShowRichMessageBox(string title, string message)
        {
            RichTextBox rtbMessage = new RichTextBox();
            rtbMessage.Text = message;
            rtbMessage.Dock = DockStyle.Fill;
            rtbMessage.ReadOnly = true;
            rtbMessage.BorderStyle = BorderStyle.FixedSingle;

            Form RichMessageBox = new Form();
            RichMessageBox.Text = title;
            RichMessageBox.StartPosition = FormStartPosition.CenterScreen;
            RichMessageBox.MaximizeBox = false;
            RichMessageBox.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
            RichMessageBox.Size = new Size(300, 250);

            RichMessageBox.Controls.Add(rtbMessage);
            RichMessageBox.ShowDialog();
        }

Getting WebException with full response body

Di load langsung...
   try
        {
            WebClient client = new WebClient();
            client.Encoding = Encoding.UTF8;
            string content = client.DownloadString("https://sandiegodata.atlassian.net/wiki/pages/doaddcomment.action?pageId=524365");
            Console.WriteLine(content);
            Console.ReadKey();
        }
        catch (WebException erro)
        {
            string webpageContent  = ""; 
            if( erro.Response == null )
                  {
                   webpageContent = "Error:"+erro.Status;
                  }
                  else if( erro.Response != null)
                  {
                    var resp = new StreamReader(erro.Response.GetResponseStream()).ReadToEnd();
                   
                }
                
        }
with json.net
  catch (WebException ex)
        {
if( a.Response == null )
                  {
                   webpageContent = "Error:"+a.Status;
                  }
                  else if( a.Response != null)
                  {
                var resp = new StreamReader(a.Response.GetResponseStream()).ReadToEnd();

                dynamic obj = JsonConvert.DeserializeObject(resp);
                var messageFromServer = obj.error.message;
                var codemessage = obj.error.code;
                string mesageex = a.Message;
                webpageContent = "Error:"+codemessage.Value + ":" + messageFromServer.Value;
                  }
}

Saturday, June 6, 2015

Killing child process when parent closing, OTHERWAY

taken from http://stackoverflow.com/questions/6266820/working-example-of-createjobobject-setinformationjobobject-pinvoke-in-net

http://stackoverflow.com/questions/3342941/kill-child-process-when-parent-process-is-killed/4657392#4657392
http://stackoverflow.com/questions/14955045/assignprocesstojobobject-dont-assign-properly
http://www.xtremevbtalk.com/showthread.php?p=1335552#post1335552

Working example:
https://www.add-in-express.com/creating-addins-blog/2013/11/05/release-excel-com-objects/

using System;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace JobManagement
{
    public class Job : IDisposable
    {
        [DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
        static extern IntPtr CreateJobObject(IntPtr a, string lpName);

        [DllImport("kernel32.dll")]
        static extern bool SetInformationJobObject(IntPtr hJob, JobObjectInfoType infoType, IntPtr lpJobObjectInfo, UInt32 cbJobObjectInfoLength);

        [DllImport("kernel32.dll", SetLastError = true)]
        static extern bool AssignProcessToJobObject(IntPtr job, IntPtr process);

        [DllImport("kernel32.dll", SetLastError = true)]
        [return: MarshalAs(UnmanagedType.Bool)]
        static extern bool CloseHandle(IntPtr hObject);

        private IntPtr handle;
        private bool disposed;

        public Job()
        {
            handle = CreateJobObject(IntPtr.Zero, null);

            var info = new JOBOBJECT_BASIC_LIMIT_INFORMATION
            {
                LimitFlags = 0x2000
            };

            var extendedInfo = new JOBOBJECT_EXTENDED_LIMIT_INFORMATION
            {
                BasicLimitInformation = info
            };

            int length = Marshal.SizeOf(typeof(JOBOBJECT_EXTENDED_LIMIT_INFORMATION));
            IntPtr extendedInfoPtr = Marshal.AllocHGlobal(length);
            Marshal.StructureToPtr(extendedInfo, extendedInfoPtr, false);

            if (!SetInformationJobObject(handle, JobObjectInfoType.ExtendedLimitInformation, extendedInfoPtr, (uint)length))
                throw new Exception(string.Format("Unable to set information.  Error: {0}", Marshal.GetLastWin32Error()));
        }

        public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

        private void Dispose(bool disposing)
        {
            if (disposed)
                return;

            if (disposing) { }

            Close();
            disposed = true;
        }

        public void Close()
        {
            CloseHandle(handle);
            handle = IntPtr.Zero;
        }

        public bool AddProcess(IntPtr processHandle)
        {
            return AssignProcessToJobObject(handle, processHandle);
        }

        public bool AddProcess(int processId)
        {
            return AddProcess(Process.GetProcessById(processId).Handle);
        }

    }

    #region Helper classes

    [StructLayout(LayoutKind.Sequential)]
    struct IO_COUNTERS
    {
        public UInt64 ReadOperationCount;
        public UInt64 WriteOperationCount;
        public UInt64 OtherOperationCount;
        public UInt64 ReadTransferCount;
        public UInt64 WriteTransferCount;
        public UInt64 OtherTransferCount;
    }


    [StructLayout(LayoutKind.Sequential)]
    struct JOBOBJECT_BASIC_LIMIT_INFORMATION
    {
        public Int64 PerProcessUserTimeLimit;
        public Int64 PerJobUserTimeLimit;
        public UInt32 LimitFlags;
        public UIntPtr MinimumWorkingSetSize;
        public UIntPtr MaximumWorkingSetSize;
        public UInt32 ActiveProcessLimit;
        public UIntPtr Affinity;
        public UInt32 PriorityClass;
        public UInt32 SchedulingClass;
    }

    [StructLayout(LayoutKind.Sequential)]
    public struct SECURITY_ATTRIBUTES
    {
        public UInt32 nLength;
        public IntPtr lpSecurityDescriptor;
        public Int32 bInheritHandle;
    }

    [StructLayout(LayoutKind.Sequential)]
    struct JOBOBJECT_EXTENDED_LIMIT_INFORMATION
    {
        public JOBOBJECT_BASIC_LIMIT_INFORMATION BasicLimitInformation;
        public IO_COUNTERS IoInfo;
        public UIntPtr ProcessMemoryLimit;
        public UIntPtr JobMemoryLimit;
        public UIntPtr PeakProcessMemoryUsed;
        public UIntPtr PeakJobMemoryUsed;
    }

    public enum JobObjectInfoType
    {
        AssociateCompletionPortInformation = 7,
        BasicLimitInformation = 2,
        BasicUIRestrictions = 4,
        EndOfJobTimeInformation = 6,
        ExtendedLimitInformation = 9,
        SecurityLimitInformation = 5,
        GroupInformation = 11
    }

    #endregion

}

Sunday, November 16, 2014

17 Best 'Label' and 'Feed' Search Techniques in Blogger



Today we will discuss various interesting ways of doing effective search in BlogSpot blogs. You will learn how to do search by category, by query or even doing effective search inside feeds. You will also learn how to run a query for searching posts with multiple labels and you will discover how to search for a particular keyword inside posts tagged under a specific label using its Feed data. We will also learn how to sort search results by Date and index count. There is too much learning today!
In wordpress we call labels as Categories and sub categories as Tags but in blogger we are limited only to Categories which are termed as Labels.

Syntax for Label search

Blogger databases are not visible to its users but users has the advantage of running certain queries to access the tables. With the introduction of Blogger API v3, you can now easily add running list of blog posts, pages and comments to a non-Blogger hosted site. Qasim will discuss this with you all in a post tonight. My job is to introduce you to simple search queries that can do wonders.
Label Search Syntax URL:
You may already know the below syntax.

 http://www.mybloggertricks.com/search/label/label+Name

Note: Labels are case sensitive. If a label names starts with an uppercase letter and you write a lowercase then the query will not return the result. If a label name is Social Media (first letters are capital) and you type social mEdia then it wont work. You must type the exact same letters

1. Searching a Label

Lets try searching for all posts nested under the label Social Media in our blog. The URL would look like the one below:

 http://www.mybloggertricks.com/search/label/Social Media

2. Searching a keyword within a Label

I tried this but seems like Blogger doesn't support running queries within a label. For example lets search for all Facebook related posts under the category Social Media
Note: Spaces between labels can be denoted inside URLS with %20. You can write Social Media as Social%20Media also.


 http://www.mybloggertricks.com/search/label/Social%20Media?q=facebook

you will observe that though the query is executed but the results are same no matter whether you search for Facebook, twitter or anything. The following urls all return the same result.

 
http://www.mybloggertricks.com/search/label/Social%20Media?q=twitter
http://www.mybloggertricks.com/search/label/Social%20Media?q=tumblr
http://www.mybloggertricks.com/search/label/Social%20Media?q=googleplus
http://www.mybloggertricks.com/search/label/Social%20Media?q=blablabla



  But wait! Can't we search a keyword inside a label at all? Well for that we will introduced a rarely discussed trick using Feeds. Click here

3. Running a simple Search query

Now lets search for a keyword inside entire blog posts. This is a query search that looks for a keyword in all posts irrespective of which label it belongs to.

 
http://www.mybloggertricks.com/search?q=mohammad
http://www.mybloggertricks.com/search?q=presentation%20by%20mohammad

Note again that spaces between keywords are denoted by %20.

4. Multiple Label Search

If you wish to search posts tagged under two labels, you will need to use the or operator (|) in order to do that. For that follow the syntax below:

 www.YOUR-BLOG.com/search/?q=label:LABEL1|label:LABEL2

Lets now search all posts containing the labels SEO and Social Media

 http://www.mybloggertricks.com/search/?q=label:SEO|label:Social%20Media

Interesting? sure it is!

5. Sort Search Results  by Date

If you wish to sort the search result by date and time then you can easily do that with an extra parameter called by-date=true. It will display most recent entries first and oldest later.
Following is the syntax for displaying most recent posts on facebook

 http://www.mybloggertricks.com/search?q=facebook&by-date=true

You might have observed that though this method returns most recent posts on facebook but it is also showing some irrelevant posts that doesn't contain the keyword facebook at all. Read the next method to understand why.

6. Sort Search Results by Relevance

By default search queries are displayed based on relevance. But when you sort results by date, relevance is destroyed. You can either remove the sorting parameter that we used early or set its value to false as shown below

 http://www.mybloggertricks.com/search?q=facebook&by-date=false

7. Set Search Count

By default blogger display 20 posts at a time. If you wish to increase or decrease this number then use the parameter max-results=
The following query will return 50 posts based on query Facebook.

 http://www.mybloggertricks.com/search?q=facebook&max-results=50

8. Set Search Results Count and Sort by Date

In order to sort posts by date and also set the post count to 5 then use the following syntax:

 http://www.mybloggertricks.com/search?q=facebook&max-results=50&by-date=true


Syntax for Feed Search

The reason why you can easily download all your blog posts and comments and move them to another platform is because all your blog content is neatly stored inside atom or rss feeds. Consider feed as a repository containing all your blog content, be it posts, comments, pages or labels.

1. Posts FEED

A Feed containing all your blog posts has the following syntax:

 http://www.mybloggertricks.com/feeds/posts/default

2. Comments FEED

A feed containing all your blog comments has the following syntax:

 http://www.mybloggertricks.com/feeds/comments/default

3. Prevent Feed redirection to Feedburner

If you are redirected to feedburner then this means you are using feedburner to sent email updates to your subscriber. In order to prevent this use the parameter redirect=false as shown below:

Posts FEED
 http://www.mybloggertricks.com/feeds/posts/default?redirect=false
Comments FEED
 http://www.mybloggertricks.com/feeds/comments/default?redirect=false

4. Search Post Feed by Label

Now lets display all posts inside your blog feed containing the label SEO

 http://www.mybloggertricks.com/feeds/posts/default/-/SEO

5. Set Search Results count in Posts Feed

The following query will return 5 recent posts of your blog
 http://www.mybloggertricks.com/feeds/posts/default?max-results=5&redirect=false

6. Set Count while searching Posts Feed by label

The following query will display only 10 posts under the category/label SEO. No redirection parameter required here.
 http://www.mybloggertricks.com/feeds/posts/default/-/SEO?max-results=10

7. Set Search Results Count in Comments Feed

The following query will return 5 recent comments posted on your blog
 http://www.mybloggertricks.com/feeds/comments/default?max-results=5

8. Set Start Index and End Index in Posts Feed

Another interesting fact about feeds is that you can display posts from a specified index. For example If I want to display posts for SEO label and I want only to display the 10th and 11th post then I will set count restrictions as shown below:
 http://www.mybloggertricks.com/feeds/posts/default/-/SEO?start-index=10&max-results=2
The start-index tells where to start from and max-results show how many posts to display.
 Tip: This method was utilized on our Multiple Sitemap Generator Tool

9. Search for a keyword within a Label in Post Feed

We already discussed that query parameters do not work in Label Search directly but we can utilize this method in Post Feed Label search method. Though logically this method does not literally means that we are filtering keywords based on label search, we are instead filtering two keywords. One is the label name itself and second is the query keyword. Both these keywords are highlighted below:
 http://www.mybloggertricks.com/feeds/posts/default/-/SEO?q=Smartphone
In the above query we are filtering the keyword Smartphone on all posts containing the word SEO. This is the only working method I could try and our coming widget is based on this search method which I am sure you will love a lot.

taken from here  http://www.mybloggertricks.com/2014/01/17-best-search-techniques-in-blogger.html as personal notepad all copyright belong there.

Friday, November 14, 2014

Print full path of an executable command in Windows



If you using windows 7, do this :
where foo

where foo | clip
if older than win 7 do this
http://www.codeproject.com/Articles/3784/Whereis-for-Windows
in case you dont have account in codeproject, here external link from article above
(EXE)
http://www.mediafire.com/download/rlauxn3b4rto8my/whereis_bin.zip
(SOURCE C# .NET)
http://www.mediafire.com/download/zjjtwuj4u2e9aa5/whereis_src.zip

taken from:
http://superuser.com/questions/248334/how-can-i-print-full-path-of-an-executable-command-in-windows

Thursday, November 6, 2014

Cookies and stuff reference C# .NET



http://stackoverflow.com/questions/18667931/httpwebrequest-add-cookie-to-cookiecontainer-argumentexception-parameternam
https://parse.com/questions/curl-and-c
http://stackoverflow.com/questions/4248672/httpwebrequest-and-set-cookie-header-in-response-not-parsed-wp7
http://www.codeproject.com/Questions/773849/HttpWebRequest-always-returns-but-works-great-with
http://stackoverflow.com/questions/16465625/get-httponly-cookies-using-httpwebrequest
http://stackoverflow.com/questions/3062925/c-sharp-get-httponly-cookie
http://www.codewrecks.com/blog/index.php/2011/04/12/use-a-webbrowser-to-login-into-a-site-that-use-httponly-cookie/
http://www.codeproject.com/Articles/38616/Retrieve-HttpOnly-Session-Cookie-in-WebBrowser
http://www.codeproject.com/Articles/6554/How-to-use-HttpWebRequest-and-HttpWebResponse-in-N
http://ycouriel.blogspot.com/2010/07/webbrowser-and-httpwebrequest-cookies.html
http://stackoverflow.com/questions/15049877/c-sharp-getting-webbrowser-cookies-to-log-in


in .net you can do like this:

[DllImport("wininet.dll", CharSet = CharSet.Auto, SetLastError = true)]
static extern bool InternetGetCookieEx(string pchURL, string pchCookieName, StringBuilder pchCookieData, ref uint pcchCookieData, int dwFlags, IntPtr lpReserved);
[DllImport("wininet.dll", CharSet = CharSet.Auto, SetLastError = true)]
static extern int InternetSetCookieEx(string lpszURL, string lpszCookieName, string lpszCookieData, int dwFlags, IntPtr dwReserved);   
//const int INTERNET_COOKIE_THIRD_PARTY = 0x10;
const int INTERNET_COOKIE_HTTPONLY = 0x00002000;
private static CookieContainer GetUriCookieContainer(string uri)
{
    CookieContainer cookies = null;
    // Determine the size of the cookie
    uint datasize = 256;
    StringBuilder cookieData = new StringBuilder(256);
    if (!InternetGetCookieEx(uri, <<COOKIE_NAME_HERE>>, cookieData, ref datasize, INTERNET_COOKIE_HTTPONLY, IntPtr.Zero))
    {
        if (datasize < 0)
            return null;
        datasize = 1024;
        // Allocate stringbuilder large enough to hold the cookie
        cookieData = new StringBuilder(1024);
        if (!InternetGetCookieEx(uri, < 0)
    {
        cookies = new CookieContainer();
        cookies.SetCookies(new Uri(uri), cookieData.ToString().Replace(';', ','));
    }
    return cookies;
}

Tuesday, November 4, 2014

UTF8 unicode long path HACK [WINDOWS]

function ShortFileName (const FileName: WideString): Widestring;
    var aTmp: WideString;
    begin
      SetLength(aTmp,255);
      if Windows.GetShortPathNameW (PWideChar (FileName), @aTmp[1], 254) = 0
      then
        Result:= FileName
      else
        Result:=aTmp;
    end;

HOW TO USE:

 
procedure TForm1.Button1Click(Sender: TObject);
var sn:String;
begin
 If OpenDialog1.Execute then
 begin
   sn:=ShortFileName(UTF8Decode(OpenDialog1.FileName));
   Caption:=sn;
   if FileExists(sn) then Memo1.Lines.LoadFromFile(sn);
 end;
end;
 
Or
 
hiddenImage.Picture.LoadFromFile(UTF8ToConsole(ShortFileName(UTF8Decode('C\imagefiles.jgp'))));

Thursday, August 7, 2014

Many ways Submit web form on a GeckoWebBrowser? (GeckoFX)



First one
 document = geckoWebBrowser1.Document; 

(document.GetElementByTagName("form").First() as GeckoFormElement).submit()


I can give example in c# :
If you know id value for input tags and login button , you can do this:
 GeckoInputElement username = new GeckoInputElement(geckoWebBrowser1.Document.GetElementById("Username_ID").DomObject);
 GeckoInputElement Passwd = new GeckoInputElement(geckoWebBrowser1.Document.GetElementById("passwd_ID").DomObject);
 GeckoInputElement Loginbutton = new GeckoInputElement(geckoWebBrowser1.Document.GetElementById("login_button_ID").DomObject);
 username.Value = "username";
 Passwd.Value = "password";
 Loginbutton.Click();
and if you know name of input tags, try this:
GeckoInputElement username = new GeckoInputElement(geckoWebBrowser1.Document.GetElementsByName("email")[0].DomObject);
GeckoInputElement password = new GeckoInputElement(geckoWebBrowser1.Document.GetElementsByName("pass")[0].DomObject);
GeckoInputElement login = new GeckoInputElement(geckoWebBrowser1.Document.GetElemntByName("login_name")[0].DomObject);
username.Value = "username";
password.Value = "password";
login.Click();
and if you dont know any id or name of input tags and have class name, try this,
GeckoNodeCollection nod = geckoWebBrowser1.Document.GetElementsByClassName("classname");
        foreach (GeckoNode node in nod)
        {
            if (NodeType.Element == node.NodeType)
            {

                try
                {
                    GeckoInputElement ele = (GeckoInputElement)node;
                    ele.Click();
                }
                catch (Exception ex)
                {
                    string ep = ex.ToString();
                    GeckoHtmlElement ele = (GeckoHtmlElement)no2;
                    ele.Click();
                }                    
            }
        }  

Taken from http://stackoverflow.com/a/20756314 and http://stackoverflow.com/a/20769243 as personal notepad. Credit belong stackoverflow.com

Getting Meta content by name using HTMLAgility



 Getting Meta content by name using HTMLAgility
ItemTitle = TrimString(document.DocumentNode.SelectSingleNode("//meta[@name='title']").Attributes["content"].Value);

Saturday, June 28, 2014

Code handle dari BackgroundWorker.Complete




Code handle dari BackgroundWorker.Complete
private void wrok_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            if (e.Cancelled)
            {
                MessageBox.Show("Nggak bisa login gan, mungkin koneksi error");
            }
            else if (e.Error != null)
            {
                MessageBox.Show(e.Error.Message);
            }
            else
            {
                MessageBox.Show(e.Result.ToString());
            }
           
        }

Monday, June 16, 2014

Bug fix for Cookies container in C# .net 3.5

There is bug for .net 3.5 about cookies container, read here (CookieContainer domain handling issue (.NET 2.0-3.5)
) https://connect.microsoft.com/VisualStudio/feedback/details/541197/cookiecontainer-domain-handling-issue-net-2-0-3-5

I got three whole week with pain in my head about this issue. AND there is no solving problem for 3.5 ( its feeling like abandoned technology by microsoft ).

Anyways, here the solving code from http://dot-net-expertise.blogspot.com/2009/10/cookiecontainer-domain-handling-bug-fix.html

Here the solution:
  1. Don’t use .Add(Cookie), Use only .Add(Uri, Cookie) method.
  2. Call BugFix_CookieDomain each time you add a cookie to the container or before you use .GetCookie or before system use the container.
private void BugFix_CookieDomain(CookieContainer cookieContainer)
{
    System.Type _ContainerType = typeof(CookieContainer);
    Hashtable table = (Hashtable)_ContainerType.InvokeMember("m_domainTable",
                               System.Reflection.BindingFlags.NonPublic |
                               System.Reflection.BindingFlags.GetField |
                               System.Reflection.BindingFlags.Instance,
                               null,
                               cookieContainer,
                               new object[] { });
    ArrayList keys = new ArrayList(table.Keys);
    foreach (string keyObj in keys)
    {
        string key = (keyObj as string);
        if (key[0] == '.')
        {
            string newKey = key.Remove(0, 1);
            table[newKey] = table[keyObj];
        }
    }
}


Here example of piece my code, to help you better understanding.
Uri target = new Uri("http://yourwebsite.com/login");

HttpWebRequest Crequest = (HttpWebRequest)WebRequest.Create(target);

//reset MAINCookieContainer;
MAINCookieContainer = new CookieContainer();
Crequest.CookieContainer = MAINCookieContainer;
Crequest.Credentials = CredentialCache.DefaultCredentials;
((HttpWebRequest)Crequest).UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36";
Crequest.Method = "GET";
Crequest.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse Cresponse = (HttpWebResponse)Crequest.GetResponse();
foreach (Cookie cookie in Cresponse.Cookies)
{
 //add Cookie's to the MAINCookieContainer (the next HttpWebRequest will use them)

 MAINCookieContainer.Add(target, cookie);
 BugFix_CookieDomain(MAINCookieContainer);
}

Wednesday, April 2, 2014

Multi Threaded WebScraping in C#

Suggestions have been incorporated. Kindly Suggest, Vote, Comment to improve it

Introduction    

*All the code examples are for learning purpose. Any misuse is not encouraged. 
* Project with Source Code of most of the Examples has been added.
Web Scraping involves obtaining information of interest from the webpages. I tried to make a step by step guide starting from basic of webscraping using WebBrowser to a little bit advance topics like performing login and maintaining sessions via HTTPWebRequest. This is the first release of the article and there may be errors/mistakes. I welcome all the suggestions and would try to include them ASAP.
I have used the tutorial based Step by Step approach and web scrapping work starts from the first line of the tutorial. I have taken Example/Task Oriented method to keep it interested, 2-3 examples are followed by 2-3 Tasks to keep the learner motivated. I am assuming the users have basic knowledge of C# and Visual Studio Programming Environment.  

Contents 

The Contents I have covered are:
  • WebBrowser 
    • WebBrowser Download Event
    • Navigating To Olx's first Page
    • Accessing All Adds Shown at ...
    • Yahoo Signin Form Filling & Submission
    • Modifying WebBrowser Headers
    • Saving All Images of a WebPage
    • Solving Captcha Using API ...
    • Setting Proxy For WebBrowser
  • Regular Expressions 
    • Finding a Number in Text
    • Regex Operators
    • Finding Words in a Sentence
    • Numbers of Format ddd-ddddd
    • Finding Email Addresses in Text 
    • Finding IP Addresses in Text
    • A Regex Utility
  • WebClient
    • Downloading HTML as String
    • Downloading & Saving an Image
    • Blocking Mode of WebClient
    • Non - Blocking Mode of WebClient
    • Read / Write Streams
    • Query String for WebClient
    • Uploading File to URL 
    • Little more about WebClient ... 
  • BackGroundWorker
    • Running Time Consuming Function
    • Work Completion Report
    • Updating the Progress
    • Stopping the Worker
    • Multi Threaded App to Download Images ... 
  • HttpWebRequest/HttpWebResponse 
    • HTTP Request Headers
    • How the Sessions Work
    • HTTP Response Headers
    • Mozilla Live HTTP Headers
    • User Agent Strings
    • Getting Facebook Login Page HTML ...
    • Performing Login by HTTP requests
    • Custom HTTPWebRequest for Login... 
    • Understanding HTML Form Get/POST 
    • Getting Form Hidden Fields 
    • Preparing HTTP POST Data
    • Picture Upload by HTTP to Facebook ...
And a lot of relevant Tasks to keep the learner motivated to explore his innovation.

WebBrowser Control 

Top This control provides a built in full browser as a control. It enables the user to navigate Web pages inside your form.

Example: WebBrowser Download Event

  1. Add WebBrowser Control to the Form. Make it Dock in Parent Container
  2. Double click the WebBowser Control to Add WebDocumentCompleted Event

  3. Navigate Function is used to navigate to the given address
  4. Document completed event is fired once the document is completed loaded
  5. Now Run the program

  6. There are many solutions available to solve the above problem, like counting i-frames and then counting  number of times the Document Completed Event Fire. This is pretty much complex and the easiest one is  to Maintain History.
  7. Add a List<string> hist to the program, and modify the Document Competed Event as below:

Example: Navigate to OLX’s 1st Add’s Page  

  1. Before making any web scrapper, click bot etc, understanding of that website’s layout is necessary. After that following is important
    1. Finding Fields of Interest
    1. Narrowing Down the Text of Interest
    1. Finding tags with ids near the interested tags
  2. First Install Mozilla Firefox 15. Navigate to http://www.olx.com/cars-cat-378
  3. Write click on the First Add Link and click on the Inspect Element.
  4. You will see something like the image below


  5. To Visit the 1st Add, we needs its link address.  
  6. The anchor tag highlighted in above picture has no id, so if we use GetElementByTag(“a”) function, we will get a list of all the anchor tags, which will include links of other pages of olx, help, contact us etc.(so its not good option)
  7. So, try to Find the nearest Tag which has ID.
  8. On the Tags Bar, Keep Selecting Tags toward the Left until you find some tag with ID

  9. Once you reach the div tag with id the-list, you will see it is the container for all the Adds Links

  10. So all the anchor tags in div#the-list, are links to the individual add pages
  11. Following is the code to get it programmatically. 
  12. //Getting AddsBlock HtmlElement 
    HtmlElement he = webBrowser1.Document.GetElementById("the-list"); 
    
    //Getting Collection of all the Anchor Tags in AddsBlock 
     HtmlElementCollection hec = he.GetElementsByTagName("a"); 
  13. We want to navigate to 1st adds page   
  14. //Naviagting to 1st Add Page 
    //obtainign href value to get the page address  
    
    webBrowser1.Navigate(hec[0].GetAttribute("href"));

Example: Navigate to All the Adds shown on

http://www.olx.com/cars-cat-378
  1. You have seen how to Navigate to the 1st Add.
  2. To navigate to all the Pages, We need to store all the Adds Links in a List, so that later on we can visit those Adds
  3. To Do This Make A List That Stores href values of all the Add’s Links
  4. List<string>
     
    urls = new List<string>();
  5. Modify the Document Completed Event to add all the links to the URLs
  6. HtmlElement he = webBrowser1.Document.GetElementById("the-list");
    HtmlElementCollection hec = he.GetElementsByTagName("a");
    
    foreach(HtmlElement a in hec)
    {
        string href = a.GetAttribute("href");
        if(href != "http://www.olx.com/cars-cat-378")
        {
           if(!urls.Contains(href))
             urls.Add(href); 
     
        }
    }
  7. Why we are checking href != "http://www.olx.com/cars-cat-378" ? Because each individual Add Block contains a Link to the page on which it is being shown (that means to make accurate scrapper, you need to understand well what all is there and where is it).
  8. All the links are stored in urls list, now we need to make the browser automatically navigate to all of these
  9. if(urls.Count > 0)       
    {    
         string u = urls[0];      
         urls.RemoveAt(0);       
         webBrowser1.Navigate(u);      
         this.Text = "Links Remaining" + urls.Count.ToString();      
    }      
    else      
    {	MessageBox.Show("Complete");	}
Task 1: Modify The Above Code, make it browse next pages
Like: http://www.olx.com/cars-cat-378-p-2
http://www.olx.com/cars-cat-378-p-3
http://www.olx.com/cars-cat-378-p-4 and so on
Task 2: On Each Add Page, scrape owner name and Number(if given)
Task 3: Make The App, which scrapes specified number of individual adds from the given url of olx categorey.

Example: Yahoo Signin Form Filling and Submission 

  1. Navigate to http://mail.yahoo.com/
  2. Check the ID of the username and password textboxes (Use Inspect Element)
  3. Make a Button Click Event in the app
  4. htmlElement hu = webBrowser1.Document.GetElementById("username");
    hu.Focus();      
     
    hu.SetAttribute("Value","userName");      
    
    HtmlElement hp = webBrowser1.Document.GetElementById("passwd");
    hp.Focus();
    hp.SetAttribute("Value", "password"); 
  5. For  Sign in Button Click (actually we need to submit the form, so find Signin Form ID. Get its Element, and invoke submit function on it
HtmlElement hf = webBrowser1.Document.GetElementById("login_form");      
 
  hf.InvokeMember("submit"); 

Task 1: Findout how to Select Value of Dropdown List, CheckBox, Radio Button
. You can try Filling Yahoo Signup Page
Task 2: Perform Click on the Hyperlink

Example: In WebBrowser Control We can Add/Change the Headers. The Most important Header’s are Referrer and User-Agent. 

  1. User Agent header tells the Web Server about the Browser From which the Request was sent  
  2. Referrer Tells the Web Server, that From which web page the user was sent to the current web page
  1. To Change User-Agent Header 
	webBrowser1.Navigate("url", "_blank", null, "Referrer: sample user agent"); 
Task1: Browse to webBrowser1.Navigate("logme.mobi");
To see HTTPHeaders, then try modifying your User-Agent and Referrer
You can get a Complete List of User Agent Strings at http://www.useragentstring.com/pages/useragentstring.phpTask2: Vist www.google.com in C# app, with some Apple, Linux browser User Agent. Get Google Search Results non-javscript page.

Example: Saving All the Images of the Web Page 

  1. Add Reference to using mshtml;
  2. You can use Yahoo Sign up Page for Practice 
IHTMLDocument2 doc = (IHTMLDocument2)webBrowser1.Document.DomDocument;

IHTMLControlRange imgRange = (IHTMLControlRange)((HTMLBody)doc.body).createControlRange(); 

foreach (IHTMLImgElement img in doc.images)       {       
 
imgRange.add((IHTMLControlElement)img);       
 
imgRange.execCommand("Copy", false, null);  

try{ 
    using(Bitmap bmp = (Bitmap)Clipboard.GetDataObject().GetData(DataFormats.Bitmap))            
    bmp.Save(img.nameProp + ".jpg");     
   } 
catch (System.Exception ex)
   {  
   MessageBox.Show(ex.Message);          
   }       
}
The Above code will save all the images of the Webpage in current directory
Task: Find Pattern in the captcha name, modify the code to only save captcha

Example: Solving Captcha using DeathByCaptcha Api

  1. Add Reference to using DeathByCaptcha;
  2. Following code solves the captcha. 
  3. Client client = (Client)new SocketClient(capUser, capPwd);
    try
        {           
            Captcha captcha = client.Decode(path + capName, 50); 
    
    		if (null != captcha)             
    		{ 
    		    //Captcha Solved
    		    MessageBox.Show(captcha.Text);            
            }
    		else
    		{ 
    			//Captcha Not Solved Show Error Message            
    		}          
    	}          
    	catch(DeathByCaptcha.Exception ex)          {           
    	MessageBox.Show(ex.Message); 
    } 
  4. Study How to Report that Captcha.Text was wrong

Example: Setting Prxoy For WebBrowser

using Microsoft.Win32;  
RegistryKey reg = Registry.CurrentUser.OpenSubKey(
  "Software\\Microsoft\\Windows\\CurrentVersion\\InternetSettings", true);

registry.SetValue("ProxyEnable", 1); 

registry.SetValue("ProxyServer", "192.168.1.1:9876");

Regex  

Top A concise and flexible means of matching strings in the text
In C#, Regex, Match, MatchCollection classes are used for finding string patterns. These Clasess are in following Namespace.
using System.Text.RegularExpressions;

Example 1: Finding a Number in a text


Following are the Regex Operators:
[xyz]
A character set. Matches any one of the enclosed characters. For example, "[abc]" matches the "a" in "plain".
[^xyz]

A negative character set. Matches any character not enclosed. For example, "[^abc]" matches the "p" in "plain".
[a-z]
A range of characters. Matches any character in the specified range. For
example, "[a-z]" matches any lowercase alphabetic character in the range "a" through "z".
[^m-z]
A negative range characters. Matches any character not in the specified
range. For example, "[m-z]" matches any character not in the range "m" through "z".

*
Matches the preceding character zero or more times. For example, "zo*" matches either "z" or "zoo".
+
Matches the preceding character one or more times. For example, "zo+" matches "zoo" but not "z".
?
Matches the preceding character zero or one time. For example, "a?ve?" matches the "ve" in "never".
.
Matches any single character except a newline character.

Example 2: Finding Words in a sentence


The last word sentence is not in the match list, as it didnt have space after it.
{n}
n is a non-negative integer. Matches exactly n times. For example, "o{2}" does not match the "o" in "Bob," but matches the first two o's in "foooood".
{n,}
n is a non-negative integer. Matches at least n times. For example, "o{2,}" does not match the "o" in "Bob" and matches all the o's in "foooood." "o{1,}" is equivalent to "o+". "o{0,}" is equivalent to "o*".
{n,m}
m and n are non-negative integers. Matches at least n and at most m times. For example, "o{1,3}" matches the first three o's in "fooooood." "o{0,1}" is equivalent to "o?".

Example: Matching Telephone Number of the format ddd-ddddd. Where d means digit


Example: Finding email in the text

TheRegex of a normal email can be
	"\b[A-Za-z0-9_]+@[A-Za-z0-9_]+\.[A-Za-z0-9]{2,4}\b"
Where \b defines a blank space  [A-Za-z0-9_]+ defines username which may include repitition of anything from A-Z, 0-9, a-z and _(uderscore)  @[A-Za-z0-9_]+ defines hosting company name, for example yahoo \. Defines dot(.)  [A-Za-z0-9]{2,4} Matches Top Level doamin, like com, net, edu etc

Example: Regex to find IP address in the text

A basic regex to find ip address can be "\b[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\b"  But this wil match something of this type also 1925.68541.268.1 (that mean any number of deigits with 3 dots – and its not valid ip address)
An Other can be 
"\b[0-9]{1-3}\.[0-9]{1-3}\.[0-9]{1-3}\.[0-9]{1-3}\b" 
Now this wil not match a string whihc has more that 3 digits with dots. But it may matches 999.999.999.999 which is again invalid address
So a regex can be as complex as following
"b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\."+ "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\."+"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\."+"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"
There is always a trade of between complexity and accuracy. So depending upon you input text you may give one dimension more important over other.
Task 1: Anchor Tags of HTML are of vital importance in scrapping. The value of the link is placed in the href attribute as shown below. Write regex to Find href value

Answer 1: The Regex to this can be as simple as
	Regex r = new Regex("href=\"[^\"]+\"");
\”  is used to define double quote(“) as double quote is a special character so it needs to be written with slash(\).  Later on u can remove href=” and “ from the value of the match.
Task 2: Pins at Pinterest.com has following format of web addresses.
"/pin/125678645821924781/" "/pin/63894888434527857/"
"/pin/25825397833321410/" Write Regex to find pins addresses from the html of the www.pinterest.com
Task 3: Thumbnail images of the Ads are shown at http://www.olx.com/cars-cat-378

Write a Regex to Match Images Links
Task 4: Make Following Utility.

  1. Load Input Text File 
  2. Press Load Button
  3. Write Regex
  4. Press Execute
  5. All The Matches are show in Multiple Line Text Box with One Match Value Per Line

WebClient 

Top This class is found under using System.Net;. It provides various funtions to download files from the internet. It can be used to download HTML source of the webpages as string, as file. It supports downloading files as data bytes.
This class is very helpful in scrapping, as it lets the coder download only the html file where as using webbrowser for scrapping is simple, but not an efficient/speedy way.
Example: Dowloading Yahoo.com html source as string
  1. Create a Button and TextBox on the form
  1. In Button Click Event add the following code, and press the Button at run
  2. WebClient wc = new WebClient();  
    
    textBox1.Text = wc.DownloadString("http://www.yahoo.com");
  3. Here we are making Webclient Variable and then using its DownloadString method to download the html of the given url.
  4. The downloaded html is shown in the textbox1

Benefits of using Webclient
  1. Its easy to use
  2. Supports Various Methods for file and string downloading
  3. Efficient, uses much less bandwidth as compared WebBrowser
Once HTML source is downloaded, u can use Regex or 3rd party HTML Parsers to get required info from the HTML source.

Example 2: Downloading and Saving an Image

  1. Add the Following Code to Button Click Event, and press the Button at run

 WebClient wc = new WebClient(); 

 wc.DownloadFile("http://www.dotnetperls.com/one.png", "one.png");
  1. The image one.png will be downloaded and stored in the current directory

  1. The 1st argument is the url of the image and 2nd is the name of the image
  2. The same way, WebClient class provides methods for string and file uploading, but we wil use HTTPWebRequest class for that
  3. wc.DownloadData()Method provides downloading the data as bytes. This is useful where differnent encoding is used like UTF8 etc

Example: Blocking Mode of Webclient

  1. Adds Following code to the Button Click and press button at run

     WebClient wc = new WebClient(); 

   wc.DownloadData("http://www.olx.com");
  1. Just after pressing buttion, try to move the Form, and Form will go to Not responding

  1. Why is it so? The downloading string, file or data from internet is time consuming and webclient class is performaing download operation on the same Thread as the UI is. This causes UI to go Unresponsive
  1. This mean, no other task can be performed by the App, once WebClient is downloading. This is blocking Mode. Solution to this problem is using WebClient in Non – Blocking Mode

Example: Non– blocking Mode of webclient 

  1. wc.DownloadStringAsync()isused to perform the download operation on a separate thread. This causes the UI to remain responsive and can App can do other task meanwhile the downloading is performed
wc.DownloadStringAsync(new Uri("http://www.yahoo.com")); 
  1. This Downloads a String from resource, without blocking the calling thread.
  2. To Perform Asynchronous Download operation, user needs to define Download Completed Event, so that calling thread can be informed once Downloading is Complete
  3. In above case, we need to add DownloadStringCompleted Event.
  4. Following Piece of code will cause the Webclient to Asynchronous Dwonload string and will fire Download Completed event on completion
WebClient wc = new WebClient(); 


wc.DownloadStringCompleted+=new DownloadStringCompletedEventHandler(wc_DownloadStringCompleted); 

wc.DownloadStringAsync(new Uri("http://www.yahoo.com"));
  1. The Downloaded string is passed as argument to the Download Complete Event and can be accessed by following way

void wc_DownloadStringCompleted(object sender, DownloadStringCompletedEventArgs e)
{
      //Accessing the Downloaded String
        string html = e.Result;

     //Code to Use Downloaded String       

     textBox1.Text = html;
 }
  1. The Download Completed Event is Fired at Calling Thread, so u can easily Access UI elements

Example: Read / Write Streams

  1. Webclient class provides various Blocing and Non Blocking Methods to Access the Stream for direct Read and Write Operations
  2. Following Piece of code obtains read Stream in blocking mode
WebClient wc = new WebClient(); 

StreamReader sr = new StreamReader(wc.OpenRead("http://www.yahoo.com")); 
//Here You Can Perform IO 

//Operations like, Read, ReadLine   

//ReadBlock, ReadToEnd etc 

//Supported by StreamReader Class
  1. The Same Way Write stream can be obtained for Write Related IO operations

Example: QueryString for Webclinet

  1. Gets or sets a collection of query name/value pairs associated with the request.
  2. Query String is helpful in sending the parametres to the url by url posting mothed
  3. Search Result Page of Google has following format of Address
https://www.google.com.pk/search?q=search+phrase

  1. In above url, 1 is a parameter and search+phras is its value
  2. Following Example Shows how to use Query String for sending parameters and their values to a URL
string uriString = "http://www.google.com/search"; 

//Create a new WebClient instance.  

WebClient wc = new WebClient(); 

//Create a new NameValueCollection instance to hold the QueryString parameters and values.

NameValueCollection myQSC = new NameValueCollection();

//Add Parameters to the Collection      

myQSC.Add("q", "Search Phrase"); 

// Attach QueryString to the WebClient. 
     wc.QueryString = myQSC;  

//Download the search results Web page into 'searchresult.htm' 

wc.DownloadFile(uriString, "searchresult.htm");
  1. NameValueCollection class is under System.Collections.Specialized

Example: Uplading File To the URL

String uriString = "FileUploadPagePath";      

// Create a new WebClient instance.  

WebClient myWebClient = new WebClient();     

//Path to The File to Upload 
     string fileName = "File Path";      


// Upload the file to the URI.

//The 'UploadFile(uriString,fileName)' method

//implicitly uses HTTP POST method.      


byte[] responseArray = myWebClient.UploadFile(uriString, fileName);      

// Decode and display the response.      

textBox1.Text = "Response Received. " +  System.Text.Encoding.ASCII.GetString(responseArray);

Example: Additional Info for WebClient

  1. Setting Proxy
wc.Proxy = new WebProxy("ip:port");

  1. Adding Custom Headers
wc.Headers.Add(HttpRequestHeader.UserAgent, "user-agent");  

  1. Obtaining Respose Headers
WebHeaderCollection whc = wc.ResponseHeaders; 
Task 1: Add Refrrer Header.
Task 2: Read Response Code and Status from Response Header
Task 3: What is BaseAddress of the WebClient
Task 4: Use WebClient.QueryString to Do Search on Google
Task 5: Use WebClient.Upload to upload some File

BackGroundWorker

Top This class provides an easy way to run time-consuming operations on a background thread. The BackgroundWorker class enables you to check the state of the operation and it lets you cancel the operation.

Example: Running Time Consuming Function on BackGroundWorker

  1. For This Example We Are assuming Following Function a time consuming, and user need to run this function for various times which causes the UI to go unresponsive
private void  HeavyFunction()
{

System.Threading.Thread.Sleep(1000);
}
  1. Make a Form a shown Below with Start, Stop Button and Status Text. Add a BackGroundWorker from the Componnets to the Form

  1. Create an event handler for the background worker's DoWork event. The DoWork event handler is where you run the time-consuming operation on the background thread. You can make this Event By Double Clicking in the Event Pane for BackGroundWorker

  1. Any values that are passed to the background operation are passed in the Argument property of the  DoWorkEventArgs object that is passed to the event handler.

  1. Let's  Call the HeavyFunction 5 times in the backgroundWorker1_DoWork Event
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e) 
{  
for(int i = 0; i < 5; i++)
HeavyFunction(); 
 	}
  1. To Start the BackGroundWorker Work, we need to call RunWorkerAsync() Function of the backgroundWorker1. Call it in the Start Button Click Event
private void Start_Click(object sender, EventArgs e)  
{ 
backgroundWorker1.RunWorkerAsync();
}
  1. Once, the Start Button will be Clicked, the BackgroundWorker will start working, But UI will remain responsive.
  1. You have sucessfully learnt how to put Time Consuming Functions on Easily Maneged Separate Thread
  1. RunWorkerCompleted Event is Fired Once The Work is Complete
  2. The Event is Called on the Calle Thread(Thread From which the BackGroundWorker.RunWorkerAsync() was called). In our case, its UI thread

  3. To be notified, About BackGroundWorker Completion, add the Event RunWorkerCompleted

  1. RunWorkerCompleted Event Will be Fired on UI Thread, so we can easily Access All the UI Elements
private void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
   { 
   	Status.Text = "Work Complete";
}
  1. Now After 5 seconds of Pressing Start Button, the Status Label Text Will be set to Work Complete

  1. While Performing some Time Consuming Function on the BackGroundWorker, we may want to update the progress to the user. For example in a scenario of downloading several files, we may want to update UI to show how many files have been completed
  2. To perform such update, ReportProgress Function is called which raises the PogressChanged Event on the calle Thread.
  1. To Call Report Progrees, First you need to Add Progress Changed Event and set the WorkReportProgress Property to True

  1. In Report Progress Method 2 arguments can be passed, int ProgresssPercentage and object UserState. These Both arguments are available in ProgressChangedEventArgs ProgressPercentage and UserState Properties


  1. To Report Progress, Change the BackGroundWorker DoWork Event as Following
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e) 
{
for (int i = 0; i < 100; i++)  
{
	HeavyFunction();  

     backgroundWorker1.ReportProgress(i, " Heavy Function Done"); 
      } 
}
  1. To update UI in ProgressChanged Event, modify it as following
private void backgroundWorker1_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
     Status.Text = e.ProgressPercentage.ToString() + (string)e.UserState;
}
  1. Now Once you press the Start Button, the status will be updated with arguments passed in ReportProgress Method

  1. When the BackGroundWorker will finish working, the RunWorkerCompleted Event will be fired, so the status will be updated to Work Complete.

  1. To Stop the BackGroundWorker During the Work, We need to Set the Property  WorkerSupportsCancellation to True

  1. At any time during the Work, we can Stop the BackgroundWorker by calling CancelAsync() Function. Modify the Stop button Click Event as Following
   private void Stop_Click(object sender, EventArgs e) 
   { 

backgroundWorker1.CancelAsync(); 
   }
  1. Modfiy the DoWork Event as Following to Stop if Cancelling is Pending
  private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e) 
  { 
for (int i = 0; i < 1; i++) 
     { 
         if (backgroundWorker1.CancellationPending)
         { 
           e.Cancel = true; 
            break; 
         } 
        HeavyFunction(); 
        backgroundWorker1.ReportProgress(i, "Heavy Function Done"); 
       } 
   }
  1. To Update the UI with accurate info, u can modfiy BackGroundWorkerCompleted Event as following to  show that either BackGroundWorker was stopped or it completed the Work
private void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{ 
   if(e.Cancelled)
            Status.Text = "Work Stooped";
   else 
             Status.Text = "Work Complete";
     }
  1. If BackGroundWorker is Busy in some Task and user again press the Start Button, this is going to cause an error and throw and Exception. The IsBusy Propert tells either worker is busy or not, So Before Calling RunWorkerAsync() function, one must check that either BackGroundWorker is Busy in Work or Not. Following Code Does so 
private void Start_Click(object sender, EventArgs e) 
       { 
            if (!backgroundWorker1.IsBusy)
                    backgroundWorker1.RunWorkerAsync();
            else
                    MessageBox.Show("Busy in Work - Press Stop"); 
     }
  1. You can send Non UI objects as argument to the RunWorkerAsync function and then access it in the DoWork Event

Example: Make Multi Threaded App to download images from pinterest.com

  1. Design the UI as shown Blow


  1. Add a one BackGroundWorker, name it backGroundWorker1, add DoWork, ProgressChange and RunWorkCompleted Events. Set WorkerReportsProgress and WorkerSupportsCancellation Properties to  True
  2. Program Logic: We are going to use backGroundWorker1 to download html source of the http://www.pinterest.com using WebClient, then we will use regex to find urls of all the images and add it to a List<string> urls. The BackGroundWorkers equal to the number of threads set by the user will be created at run time, each of these backgroundworkers will take one url from List<string> urls, and download that image using WebClient.
  3. Add Following Code for the backgroundworker1 Events
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
WebClient wc = new WebClient();
string html = wc.DownloadString("http://www.pinterest.com");

Regex reg = new Regex("src=\"http://[^/]+/upload/[^\"]+"); 

MatchCollection mc = reg.Matches(html);

backgroundWorker1.ReportProgress(0,  mc.Count.ToString() + "Images Found");

System.Threading.Thread.Sleep(2000);  
            lock(urls) 
            { 
               foreach (Match m in mc) 
                {      
                 urls.Add(m.Value.Replace("src=\"","")); 
                } 
            } 
        }
private  void backgroundWorker1_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
Status.Text = (string)e.UserState; 
}
  1. In DoWork, we have just downloaded the html, used regex to get images links, and added it in the List<string> urls
  2. Now we need to make workers for downloading images. We will do this once user press that Start Button, Add Following Code to Start Button Click Event
private void Start_Click(object sender, EventArgs e) 
        { 
            int maxThrds;
            if(!int.TryParse(NoOfThreads.Text, out maxThrds)) 
            { 
             MessageBox.Show("Enter Correct Number of Threads");
               return;
            }
            if(maxThrds <= 0)
            {  
             MessageBox.Show("Enter 1 or more Threads"); 
              return;
            }
            if (!backgroundWorker1.IsBusy) 
            {
                for(int i = 0; i < maxThrds; i++)
                { 
    BackgroundWorker bgw = new BackgroundWorker();

    bgw.WorkerReportsProgress = true;  

    bgw.WorkerSupportsCancellation = true;                     

    bgw.DoWork += new DoWorkEventHandler(bgw_DoWork);                   

    bgw.ProgressChanged += new ProgressChangedEventHandler(bgw_ProgressChanged);

    bgw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bgw_RunWorkerCompleted);
 //Start The Worker 
                   bgw.RunWorkerAsync();
                }
               

backgroundWorker1.RunWorkerAsync();
            }
            else
            {

MessageBox.Show("Busy in Work");
            } 
        } 
  1. First we are checking for correct input, then we are making backgroundworkers at the run time.
  2. Once all properties of the run time threads are set, we are calling bgw.RunWorkerAsync for each worker. 
  1. Following is the Code for DoWork Event of the RunTime made Workers
private void bgw_DoWork(object sender, DoWorkEventArgs e)
        {

BackgroundWorker bgw = (BackgroundWorker)sender;
            while(true)
            {
             string imgLink = "";
                lock(urls)
                { 
                if(urls.Count > 0)
                   {
                    imgLink = urls[0];
                    urls.RemoveAt(0);
                    count++;
                   }
                   else
                   {   
                   System.Threading.Thread.Sleep(500);
                   }
               }
               if (imgLink != "")
               {

              string filename = imgLink.Substring(imgLink.LastIndexOf("/") + 1); 

              WebClient wc = new WebClient();

              wc.Headers.Add(HttpRequestHeader.Referer, "Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20100101  firefox/15.0.1"); 

              wc.DownloadFile(imgLink, filename); 

              bgw.ReportProgress(0, count.ToString() +  "Images Downloaded");  

             }
       }   
}
  1. In 1st line, we are casting sender to the BackGroundWorker object, so that we can ReportProgress for it. Then we have put all the code in a loop, in each iteration we are removing one url from List and then  putting it on Download
  2. If there is no link in List<string> urls, we have put the thread to sleep for 500ms
  3. Since many threads will be accessing List<string> urls, so have put it in lock.
Task 1: How to Stop Run Time Created Workers
Task 2: Modify backgroundworker1 to collect user defined number of images. For example 30, 100, 220(for more than 50, u have to scrape page 2,3,4 ....

Hint for Task 1: Following Options can be used

  1. Option 1: You can maintain a List of Run Tim Created Workers and then call CancelAsync() for each worker in the List. Then modify the code of each run time Worker to break The Loop if CancellationPending
  2. Option 2: Declare a Global Variable int rnd, assign it some random value in the start Button Click Event and pass it to BackGroundWorker DoWork Event as Argumet.
//Start Button Click Event
if(!backgroundWorker1.IsBusy) 
 { 

 rnd = new Random().Next(0, 99999);

 for (int i = 0; i < maxThrds; i++) 
  { 
  BackgroundWorker bgw = new BackgroundWorker();
  bgw.WorkerReportsProgress = true;
  bgw.WorkerSupportsCancellation = true;  
  bgw.DoWork += new DoWorkEventHandler(bgw_DoWork);
  bgw.ProgressChanged += new ProgressChangedEventHandler(bgw_ProgressChanged);
  bgw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bgw_RunWorkerCompleted);


  //Start The Worker, Pass rnd as Argument  

  bgw.RunWorkerAsync(rnd);
  }  

 backgroundWorker1.RunWorkerAsync();
} 
  1. Cast the rnd value to a local int variable, Modify the DoWork Event to work until rnd is not changed
        void bgw_DoWork(object sender, DoWorkEventArgs e) 
        { 
          int chk = (int)e.Argument; 
            while (chk == rnd) 
            { 
                //Do the Task             
            } 
        }
  1. In Stop Button Click Event, Assign some new value to rnd, which will cause all the runtime created workers to break from loop
private void Stop_Click(object sender, EventArgs e)
{

	rnd = new Random().Next(0, 9999);  		
}
Task 3: Think of some more options

HTTPWebRequest / HTTPWebResponse

Top
  1. Before Starting this, we need to understand a bit about main HTTP Headers and install few ADD On’s which help us in determining Layout, Packets and Altering the Packets for a Website.
  1. In an any of your Browser, Go to http://logme.mobi . You will get something like following

  1. This is the Data of your HTTP Headers, which your browser sent to the web server of the http://logme.mobi In this, the User – Agent (it defines which browser is used for browsing) and Connection Headers are important.
  2. Now Just Referesh the Page, and u will get something like following

  1. This Header defines the Cookie, what is Cookie? A Cookie is a small piece of information stored as a text file on your computer that a web server uses when you browse certain web sites.
  2. To maintain sessions, the cookie header is very important.
  3. How the Sessions Work? Once user request login page, few cookies are issued by the server, then user submits login info along with the cookies, in case of successful login, server issues a new set of cookies, which identifies the user as authentiated user to the server. Then for further requests to the server, these newly issued set of cookies is used. This way a session is maintained. At any time, if u clean the Cookies Header, u will be redirected to the Login Page.
A Typical Cookie Exchange
  1. Install Mozilla Firefox 15.0 and then install the Live HTTP Headers add on for it. You can get it from here. Run the LiveHTTPHeader, and Referesh the Page http://logme.mobi. You will see something like following


  1. The LiveHTTPHeader Add on shows the HTTPHeaders of all the Requests and Responses once you dosome browsing using Mozilla Firefox. This Tool is Helpful in determining the website’s HTTP Packet formats, specially it helps in knowing what all data is being posted once some POST Action is performed
  2. Now install the Add on Tamper Data from here. This Tool is helpful in modifying the content of HTTP Headers while browsing the web, this tool is of great use in determining that what fields and headers are compulsory for performing some HTTP POST Request and what all stuff we can skip out from a perticular post request. Once u run it, it will look like following

  1. Click on the Start Tamper Button. In address bar type useragentstring.com/ press Enter, As soon u press Enter Following Window Will open, asking for u to Tamper Data, Submit Request or Abort Request
  1. Click on Tamper Data, Then Following Window will open up

  1. In the User Agent Field, Enter Following and Press OK
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1

  1. Now submit all the subsequent requests and once the page will be loaded u will see website showing ur browser as Chrome where as you are using FireFox
  1. The same way, you can alter the POST method parameters.
HTTPWebRequest Class in .Net: This class is under System.Net namespace and it provides methods and
properties to make HTTP Request to a web server.

Example 1: Downloading HTML of the Facebook login page

  1. To make an object of this class, WebRequest.Create function is used HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(url);
  2. Open LiveHTTPHeaders, in browser, and browse to http://www.facebook.com
  1. The picture above, shows the HTTP Request Headers, lets make this in C#
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("https://www.facebook.com/");

request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/15.0";

request.Accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";

request.Headers.Add("Accept-Language: en-us,en;q=0.5");

request.Headers.Add("Accept-Encoding: gzip, deflate"); 

request.KeepAlive = true; 
  1. First line, creates an HTTPWebRequest Object to the given url, then we are adding the User-Agent and Accept Header to the HTTP packet by using properties. Not all of the Herders are Accessible via properties so user may need to Add Headers by Adding it to Headers Collection. Then we are adding Accept-Language and Accept-Encoding Headers by Adding it to Headers Collection.
  1. Next important Stuff is Adding the Cookie Container to the HTTPWebRequest Object, as we want to keep record of the Cookies sent by the server in response to the request. If no Cookie container is Added then we can not Access the Cookies in the Response Header.
     request.CookieContainer = new CookieContainer();
  1. Declare a CookieCollection variable Globally, all the received Cookies will be added in this Collection so that we can use received cookies for subsequent requests. If each time you use new Cookie Container, then its not possible to maintain session.
  2. CookieCollectioncookies = new CookieCollection();
  3. Now we are done with making required HTTPWebRequest Object. Before making HTTPWebResponse object, lets see what response we got in LiveHTTPHeader for the request which we sent by browser
  4. The 1st line shows the Code and Status. Then we are interested in Cookies Only. We a got 4-5 Cookies, which wil stored in browser Cookies foleder and will be sent with the next request. In case we perform   login, then these Cookies wil be sent with the HTTP Request which will be generated for login, and then for successful login, server will issue some additional Cookies(those Cookies wil contain info which wil make us authenticated users for subsequent Requests)
  5. Now lets make the HTTPWebResponse Object, its very simple
  6. HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 
  7. Once Response is Received, next thing is adding the Received Cookies to the globally defined CookieCollection. But Before that Lets see what all Cookies we received. Add the following Code after above line to see recevied Cookies
  8. string txt = "Cookies Count=" + response.Cookies.Count.ToString() + "\n"; 
    
    foreach (Cookie c in response.Cookies)
    {  
       txt += c.ToString() +  "\n"; 
    } 
    MessageBox.Show(txt); 
    //Adding Recevied Cookies To Collection
    cookies.Add(response.Cookies);
  9. This will show your cookies in a MessageBox
  1. Now the Response is recieved, Next Step can be downloding the Data from Stream, it can be HTML source code, some other file or may be nothing at all depending upon the url to which u made the request. In our case it is HTML source of the Facebook Login page.
StreamReader loginPage = new StreamReader(response.GetResponseStream()); 

string html = loginPage.ReadToEnd();
  1. This html source can be used to get some info by Regex or using some 3rd party HTML Parsing Library or stored in an html file as offline page.

Example 2: Performing Login to FaceBook

  1. Before diong login by C#, lets perform login in mozilla and analyze the HTTP Header by LiveHTTPHeaders. Start LiveHTTPHeader, Browse to http://www.facebook.com , enter username and password, click Login button. The HTTP Web Request sent by the Browser will look somthing like this
  1. The First Line is the URL to which your username and password being sent(later in Example 3, we will see how to find this url). Second line tells the HTTP method and version used, which is POST and 1.1 respectively.
  2. Then all the fields are just like normall HTTP Header as we saw in Example 1. The important stuff starts from Cookie Header, in Example 1, once we browse to http://www.facebook.com, there was no Cookie Header where as we received some Cookies in the Response Header, now when we click on the Login Button, the previously received set of Cookies is being sent in this Cookie Header.
  3. Next Header shows Content Type, there are two major content types used to POST data, application/x-www-form-urlencoded and multipart/form-data. You can find more info about these here 
  4. Next Header shows Content Length and in last line Content is being shown. You will see your email address and password in this line. Actually last Line shows the data which is being sent to the server by HTTP Post method.
  5. There are several other values also, later in Example, we will see what are these values and from where to obtain these Values ! ! !
  6. Lets examine the Response Header for the above Request.
  1. The Response Header shows a lot of Cookies, these are the Cookies which are issued by the server on  successful login, now for any subsequect request, the browser will send these Cookies to the server and in this way session wil be maintained
  2. Got to Tools->Clear Recent History and delete the Cookies, then try to browse to your facebook profile page, and u will see that u will be redirected to facebook login page. 
  1. Now lets create the same login Request header as we saw in above screen shot and test that either we are able to successfully log in or not
 
string getUrl = "https://www.facebook.com/login.php?login_attempt=1"; 

string postData = "lsd=AVo_jqIy&email=YourEmailAddress&pass=YourPassword&default_persistent=0& charset_test=%E2%82%AC%2C%C2%B4%2C%E2%82%AC%2C%C2%B4%2C%E6%B0%B4%2C%D0%94%2C%D0%84&timezone=-300&lgnrnd=072342_0iYK&lgnjs=1348842228&locale=en_US";

HttpWebRequest getRequest = (HttpWebRequest)WebRequest.Create(getUrl);  

getRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Firefox 15.0.0.1";

getRequest.CookieContainer = new CookieContainer(); 

//Adding Previously Received Cookies 

getRequest.CookieContainer.Add(cookies); 

getRequest.Method = WebRequestMethods.Http.Post;

getRequest.ProtocolVersion = HttpVersion.Version11;

getRequest.AllowAutoRedirect = false;

getRequest.ContentType = "application/x-www-form-urlencoded"; 

getRequest.Referer = "https://www.facebook.com"; 

getRequest.KeepAlive = true; 
  1. The getUrl is assigned to the address to which data will be posted, postData variable is copy of the Content from above HTTP Request Packet. Then we have created an HTTPWebRequest Object, and set  its  User-Agent Header
  2. The Cookies which we received in Response to the Request for http://www.facebook.com are added to the HTTPWebRequest object, if we dont add these Cookies, then instead of enterteaining our request for login, Server will redirect us to Login page. Next we are setting HTTP Method to Post and Version to 1.1(used for HTTPS).
  3. Setting the AllowAutoRedirect Property to false for requests in which we try to login is very important, if  this property is set to true, then the HTTPWebRequest object will follow the Redirection Responses. And
    during the redirections, you may lost access to the Cookies which server sent in response to Login Request.
  4. Now Lets send the Login Info to the Server. 
//Converting postData to Array of Bytes    

byte[] byteArray = Encoding.ASCII.GetBytes(postData); 
 
//Setting Content-Length Header of the Request

getRequest.ContentLength = byteArray.Length;

//Obtaining the Stream To Write Data

Stream newStream = getRequest.GetRequestStream();  

//Writing Data To Stream

newStream.Write(byteArray, 0, byteArray.Length);

newStream.Close();
  1. Data is written to stream, now lets get the Response and see what all Cookies we Receive
HttpWebResponse getResponse = (HttpWebResponse)getRequest.GetResponse();

string txt = "Cookies Count=" + getResponse.Cookies.Count.ToString() + "\n"; 

foreach (Cookie c in getResponse.Cookies) { 

    txt += c.ToString() + "\n";
} 
MessageBox.Show(txt);
  1. We successfully logged into the system and received 9 Cookies, the snapshot above shows very little info about the received Cookies, you can get more info by accessing the properties of the Cookies
  1. Add the received Cookies to globally defined CookieCollection so that it can be used in subsequent requests
  1. How to Check Login was Successfull or Not? Normally Cookies Count is an easy way to determine that Login was successfuly or not, to bemore sure, you can try getting HTML of Home Page, if you r not redirected to Login Page, that means u r successfully logged in.

Example 3: Custom HTTPWebRequest for Login

  1. In Last example, we just replayed the HTTP Packet which mozilla Browser generated. Now let see from where the POST Url and PostData fields were obtained. Log Off from FaceBook, and open the Login Page. Right Click on Email textBox and click on Inspect Element.
[Image]

  1. Following HTML pane will appear in the bottom. Click on the Form Element
[Image]

  1. Here you can see the action filed in the highlighted area, this filed tells the url on which data is to be posted.
  1. Below  the highlighted area u can see few input fields, in Example 2, postData u saw many fields other than the email and password, so basically these fields were being sent to the server along with the email and password. These are part of the login Form, and these must be sent to the server along with the login info. Facebook changes the values of these fields frequently, so you cant hardcode these field’s values in the software/app.
  2. Now  we will see how to obtain these values from the facebook login page source code.
  1. You can use Regex, string manipulation or some 3rd party HTML Parsing Library to obtain these fields  and their values. I am using HTML Agility Pack to get the Login form tag and its all child input tags, and finally preparing the postData
  2. string email="youremail"; 
    
    string passwd="yourpassword"; string postData = ""; //Load FB login Page HTML
    A.HtmlDocument doc = new A.HtmlDocument();
    
    doc.LoadHtml(fb_html); //Get Login Form Tag A.HtmlNode
    
    node = doc.GetElementbyId("login_form");
    
    node = node.ParentNode; //Get All Hidden Input Fields //Prepare Post Data
    int i = 0; foreach(A.HtmlNode h in node.Elements("input"); { 
    
    if(i>0) 
    {
       postData += "&";
    }
    if(i == 1)
    {
      postData += "email=" + email + "&";
      postData += "pass=" + passwd + "&";
    }
    
    postData += (h.GetAttributeValue("name", "") + "=" + 
       h.GetAttributeValue("value", ""));
    i++;
    }
  3. Now you can post the Data in same way as we did in Example 2. Once the Successful Login Cookies are recevied, Add it to Globally defined CookieCollection, then for any subsequent request, send these  Cookies with the HTTPWebRequest.

Example 4: Uploading Pic to the Profile

  1. We are going to upload picture to the profile using mobile version of the facebook, and leaving the upload to normall facebook as a task for the user.
  2. To upload pictures/files, multipart/form-data is used as Content Type.
  3. First lets examine the HTTP traffic for uploading the picture by LiveHTTPHeaders
  4. Login to http://m.facebook.com/, and upload a picture.
  5. You will see HTTP Request like following in LiveHTTPHeaders
[Image]
  1. By now you must be familiarize with the above HTTP Request Headers, the only thing different is the way of posting the data, instead of using application/x-www-form-urlencoded, we are using multipart/form-data. You can also observe the layout of the postData (just below the ContenType)
  1. Now Lets Examine from where these all fields like fb_dtsg, characterset etc came. Right Click on Upload Photo form on upload page and select Inspect element.
[Image]

  1. You can see all the fields are here under the form tag for photo upload. Again you can use Regex, string manipulation or HTMLAgilityPack to get the name and values of these fields. But 1st you need to get the HTML of this page
  2. HttpWebRequest req = (HttpWebRequest)WebRequest.Create("http://m.facebook.com/upload.php");
    req.CookieContainer = new CookieContainer();
    req.CookieContainer.Add(cookies);  
    req.AllowAutoRedirect=true;  
    req.UserAgent = "Mozilla/2.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/5.0.874.121";
    HttpWebResponse resp = (HttpWebResponse)req.GetResponse(); 
    StreamReader sr = new StreamReader(resp.GetResponseStream()); 
    string uploadHTML = sr.ReadToEnd();
  3. Let's get all these fields and add to a dictionary Collection. This class is available in System.Collections. Make a dictionary variable nvc 
  4. Dictionary<string, string> nvc = new Dictionary<string, string>();
  5. As you can see there is no ID for the photoupload form, so 1st use string manipulation to get the form tag html and then use HTMLAgilityPack to easily get all input tags. 
  6. uploadHTML = uploadHTML.Substring(uploadHTML.IndexOf("<form"));  
    uploadHTML = uploadHTML.Replace("<form","<formid=\"myform\" "); 
    uploadHTML = uploadHTML.Remove(uploadHTML.IndexOf("/form>") + 6);
    A.HtmlDocument doc = new A.HtmlDocument(); 
    doc.LoadHtml(html);  
    A.HtmlNode node = doc.GetElementbyId("myform"); 
    node = node.ParentNode;
    foreach (A.HtmlNode h in node.Elements("input"))
    {
        string key = h.GetAttributeValue("name", "");
    
        if (key != "")  
            nvc.Add(key, h.GetAttributeValue("value",""));  
    }
  7. We will use Following Function to upload photo
  8. HttpUploadFile("http://upload.facebook.com/mobile_upload.php", 
       "file1", "filename", @"filePath", "image/jpeg", nvc);
  9. The Details of the passes arguments is as following
  • Action URL of the Upload Form
  • Name of the input tag for the File to upload
  • Name of the file
  • Path to the file on your computer
  • File type, in this case its image with extension jpeg
  • A dictionary containing all the input tags name and values
  1. Following the complete piece of code for the HTTPUploadFile function
  2. public void HttpUploadFile(string url,string paramName, string filename,
        string filepath, string contentType,  Dictionary<string,string> nvc)
    {
     
    //Prepairing PostData Format
    string boundary = "---------------------------" + DateTime.Now.Ticks.ToString("x");
    byte[] boundarybytes = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "\r\n");
    
    //Creating Request to Action URL
    HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(url); 
    wr.ContentType = "multipart/form-data; boundary=" + boundary;            
    wr.KeepAlive = true;
    wr.CookieContainer = new CookieContainer();
    
    //Adding Cookies Received at Login
    wr.CookieContainer.Add(cookies); 
    wr.Method = WebRequestMethods.Http.Post;
    wr.UserAgent = "Mozilla/2.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/5.0.874.121";
    wr.AllowWriteStreamBuffering = true;
    wr.ProtocolVersion = HttpVersion.Version11; 
    wr.AllowAutoRedirect = true;
    wr.Referer = "Referer: http://m.facebook.com/upload.php";  
    
    //Obtaining Stream to Write Data
    Stream rs = wr.GetRequestStream();  
    string formdataTemplate = "Content-Disposition:
    form-data; name=\"{0}\"\r\n\r\n{1}"; 
    foreach (string key in nvc.Keys)
    { 
    rs.Write(boundarybytes, 0, boundarybytes.Length);
    string formitem = string.Format(formdataTemplate, key, nvc[key]); 
    byte[] formitembytes = System.Text.Encoding.UTF8.GetBytes(formitem);
    //Writing all the input tags values
    rs.Write(formitembytes, 0, formitembytes.Length);
    } 
    
    rs.Write(boundarybytes,0, boundarybytes.Length);
    
    //Writing File Contents
    string headerTemplate = "Content-Disposition: form-data; " + 
          "name=\"{0}\"; filename=\"{1}\"\r\nContent-Type:{2}\r\n\r\n";
    string header = string.Format(headerTemplate, paramName, filename, contentType);  
    byte[]headerbytes = System.Text.Encoding.UTF8.GetBytes(header);
    rs.Write(headerbytes, 0, headerbytes.Length);
    FileStream fileStream = new FileStream(filepath, FileMode.Open, FileAccess.Read);
    byte[] buffer = new byte[4096];
    int  bytesRead = 0;  
    
    while((bytesRead = fileStream.Read(buffer, 0, buffer.Length)) != 0)
    {
    rs.Write(buffer, 0,bytesRead);
    }
    fileStream.Close();
    
    //Completing the Data 
    byte[] trailer = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "--\r\n");
    rs.Write(trailer, 0, trailer.Length);
    rs.Close();
    
    //Receving Response
    HttpWebResponse wresp = (HttpWebResponse)wr.GetResponse();
    cookies.Add(wresp.Cookies);
    StreamReader sr = new StreamReader(wresp.GetResponseStream());
    string sourceCode = sr.ReadToEnd();
    StreamWriter sw = new StreamWriter("upload.html");
    sw.Write(sourceCode);
    sw.Close();
Task 1: Make Wall Posting Software for www.tagged.com
Task 2: Investigate some site which uses AJAX, to see how to use HTTPWebRequest, HTTPWebResponse for it

Task 3: Perform Login at some sites, using login cookies, view login protected pages

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


taken from http://www.codeproject.com/Articles/478485/Multi-Threaded-WebScraping-in-Csharp as my personal notepad