=波波日志 > Asp.Net/C#/WCF > C#源代码-查询alexa全球,中国排名,google pr,google收录和百度收录及快照=

C#源代码-查询alexa全球,中国排名,google pr,google收录和百度收录及快照

2011-12-7号更新:
1)更新了google pagerank 查询地址
2)更新google收录正则表达式
3)更新alexa国内排名正则表达式

  发一个C#源代码,可以查询alexa全球,中国排名,google pr,google收录和百度收录及快照,方便友链时查询这个网站是否被google或者百度惩罚过。

示例效果查看alexa排名,google page rank,google收录,百度收录和百度快照
源代码下载:ASP.NET百度权重,alexa排名,google page rank, google收录,百度收录和百度快照查询源代码

源代码如下

+展开
-C#
using System;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace BLL
{
    /// <summary>
    /// 下载信息url的html内容的类
    /// </summary>
    public class Net
    {
        /// <summary>
        /// 创建URI
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static Uri CreatUri(string u)
        {
            if (string.IsNullOrEmpty(u)) return null;
            else
            {
                u = u.Trim('/', ' ').ToLower();
                if (!u.StartsWith("http://") && !u.StartsWith("https://")) u = "http://" + u;
                try { return new Uri(u); }
                catch { return null; }
            }
        }
        /// <summary>
        /// 使用webclient
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string GetHtml(string url)
        {
            string html = null;
            WebClient wc = new WebClient();
            try { html = wc.DownloadString(url); }
            catch { }
            wc.Dispose();
            return html;
        }
        /// <summary>
        /// 使用HttpWebRequest对象
        /// </summary>
        /// <param name="url"></param>
        /// <param name="encoding">编码</param>
        /// <returns></returns>
        public static string GetHtml(string url,Encoding encoding)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
            string html = null;
            try
            {
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                StreamReader srd = new StreamReader(response.GetResponseStream(), encoding);
                html = srd.ReadToEnd();
                srd.Close();
                response.Close();
            }
            catch { }
            return html;
        }
    }
    /// <summary>
    /// 获取google pageran的类
    /// </summary>
    public class GooglePR
    {
        private static string tryMore(string strUrl)
        {
            //string sURL = "http://toolbarqueries.google.com/search?client=navclient-auto&ch=" + GetPR(strUrl) + "&ie=UTF-8&oe=UTF-8&features=Rank&q=info:" + System.Web.HttpUtility.UrlEncode(strUrl);
            string sURL = "http://toolbarqueries.google.com/tbr?client=navclient-auto&features=Rank&ch=" + GetPR(strUrl) + "&q=info:" + System.Web.HttpUtility.UrlEncode(strUrl);
            string result = Net.GetHtml(sURL);
            if (result != null)
            {
                result = result.Trim();
                if (result.IndexOf(':') > 0)
                {
                    string[] pr = result.Split(':');
                    if (pr.Length == 3) return pr[2].ToString();
                }
            }
            return "0";
        }
        public static string GetPageRank(string strUrl)
        {
            string pr = "0";
            if (!string.IsNullOrEmpty(strUrl))
            {
                Uri u = Net.CreatUri(strUrl);
                if (u != null)
                {
                    string host = u.Host, path = u.PathAndQuery;
                    if (path == "/")
                    {
                        //查询主域名,非路径.其他2级域名只进行一次查询.
                        //如果是顶级域名或者www2级域名,得到的值为0时,查询www2级域名或者顶级域名,然后再试http://
                        string[] arr = host.Split('.');
                        if (arr[0] == "www" || arr.Length == 2)
                        {
                            pr = tryMore(host);
                            if (pr == "0")
                            {
                                pr = tryMore(arr.Length == 2 ? "www." + host : host.Replace("www."""));
                                if (pr == "0")
                                {
                                    pr = tryMore("http://" + host);
                                    if (pr == "0") pr = tryMore("http://" + (arr.Length == 2 ? "www." + host : host.Replace("www.""")));
                                }
                            }
                        }
                        else
                        {
                            pr = tryMore(u.Host);
                            if (pr == "0") pr = tryMore("http://" + u.Host);
                        }
                    }
                    else//查询路径
                    {
                        pr = tryMore(u.Host + path);
                        if (pr == "0") pr = tryMore("http://" + u.Host + path);
                    }
                }
            }
            return pr;
        }
        private static string GetPR(string url)
        {
            url = "info:" + url;
            string ch = GoogleCH(str_asc(url)).ToString();
            ch = "6" + ch;
            return ch;
        }
        private static int[] str_asc(string str)
        {
            if (str == null || str == string.Empty) return null;
            int[] result = new int[str.Length];
            for (int i = 0; i < str.Length; i++) result[i] = (int)str[i];
            return result;
        }
        private static long yiweitwo(long a, long b)
        {
            long z = 0x80000000;
            if ((z & a) != 0)
            {
                a = (a >> 1);
                a &= (~z);
                a |= 0x40000000;
                a = ((int)a >> (int)(b - 1));
            }
            else
            {
                a = ((int)a >> (int)b);
            }
            return a;
        }
        private static int[] yiwei(long a, long b, long c)
        {
            a -= b; a -= c; a ^= (yiweitwo(c, 13));
            b -= c; b -= a; b ^= (a << 8);
            c -= a; c -= b; c ^= (yiweitwo(b, 13));
            a -= b; a -= c; a ^= (yiweitwo(c, 12));
            b -= c; b -= a; b ^= (a << 16);
            c -= a; c -= b; c ^= (yiweitwo(b, 5));
            a -= b; a -= c; a ^= (yiweitwo(c, 3));
            b -= c; b -= a; b ^= (a << 10);
            c -= a; c -= b; c ^= (yiweitwo(b, 15));
            return new int[] { (int)a, (int)b, (int)c };

        }
        private static int GoogleCH(int[] url)
        {
            int length = url.Length;
            long a = 0x9E3779B9;
            long b = 0x9E3779B9;
            long c = 0xE6359A60;
            int k = 0;
            int len = length;
            int[] mid;
            while (len >= 12)
            {
                a += (url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24));
                b += (url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24));
                c += (url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24));
                mid = yiwei(a, b, c);
                a = mid[0]; b = mid[1]; c = mid[2];
                k += 12;
                len -= 12;
            }
            c += length;
            switch (len)
            {
                case 11:
                    {
                        c += (url[k + 10] << 24);
                        c += (url[k + 9] << 16);
                        c += (url[k + 8] << 8);
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }
                case 10:
                    {
                        c += (url[k + 9] << 16);
                        c += (url[k + 8] << 8);
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }

                case 9:
                    {
                        c += (url[k + 8] << 8);
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 8:
                    {
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 7:
                    {

                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 6:
                    {
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }

                case 5:
                    {
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 4:
                    {
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 3:
                    {
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }
                case 2:
                    {
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }

                case 1:
                    {
                        a += (url[k + 0]);
                        break;
                    }

            }
            mid = yiwei(a, b, c);
            return mid[2];
        }
    }
    /// <summary>
    /// 获取Alexa排名
    /// </summary>
    public class Alexa
    {
        /// <summary>
        /// 通过API接口获取数据,只能获取全球排名
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static string API(string u)
        {
            string rank = "NaN";
            try
            {
                string xmlString = BLL.Net.GetHtml("http://data.alexa.com/data/+wQ411en8000lA?cli=10&dat=snba&ver=7.0&cdt=alx_vw=20&"
                    + "wid=12206&act=00000000000&ss=1680x1050&bw=964&t=0&ttl=35371&vis=1&rq=4&url=" + u);
                Match m = Regex.Match(xmlString, "<POPULARITY URL=\"\\s*.+?\\s*\" TEXT=\"\\s*(\\d+)\\s*\"/>"
                    , RegexOptions.IgnoreCase | RegexOptions.Compiled);
                if (m.Groups[1].Value != "") rank = m.Groups[1].Value;
            }
            catch { }
            return rank;
        }
        /// <summary>
        /// 直接从alexa的网站下载html代码分析,获取中国及世界排名,返回内容为json
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static string Latest(string u)
        {
            string rank = "{rank:'{0}',cnrank:'{1}'}";
            try
            {
                string htmlString = BLL.Net.GetHtml("http://www.alexa.com/siteinfo/" + u);
                Match m = Regex.Match(htmlString, "<th>3\\s*month</th>\\s*<td class=\"avg\\s*\">\\s*([\\d,]+)\\s*</td>"
                    , RegexOptions.IgnoreCase | RegexOptions.Compiled);
                rank = rank.Replace("{0}", m.Groups[1].Value);
                m = Regex.Match(htmlString, "class=\"data\">\\s*<img[^>]+>\\s*([\\d,]+)\\s*</div>"
                    , RegexOptions.IgnoreCase | RegexOptions.Compiled);
                rank = rank.Replace("{1}", m.Groups[1].Value);
            }
            catch { rank = "{rank:'NaN',cnrank:'NaN'}"; }
            return rank;
        }
    }
    /// <summary>
    /// 获取google和百度收录数量,为百时同时返回快找时间
    /// </summary>
    public class GoogleBaiduIndex
    {
        /// <summary>
        /// 获取数据
        /// </summary>
        /// <param name="u"></param>
        /// <param name="isGG"></param>
        /// <param name="isJson">是否返回json格式的数据,否则返回用|分隔的数据</param>
        /// <returns></returns>
        public static string IndexGoogleBaidu(string u, bool isGG, bool isJson)
        {
            string rst = "", html = "";
            u = (isGG ? "http://www.google.com.hk/search?q=" : "http://www.baidu.com/s?wd=") + "site%3A" + u;
            html = BLL.Net.GetHtml(u, isGG ? Encoding.GetEncoding(950) : Encoding.GetEncoding(936));
            if (html != null)
            {//找到约 4,210 条结果
                Regex r = new Regex(isGG ? @"共約\s*<b>([\d,]+)</b>\s*筆" : @"找到相关结果约?\s*([\d,]+)\s*个", RegexOptions.Compiled);
                Match m = r.Match(html);
                if (isGG) rst = isJson ? "{gg:'" + m.Groups[1].Value + "'}" : m.Groups[1].Value;
                else rst = isJson ? "{bd:'" + m.Groups[1].Value + "',bdTime:'"
                    + Regex.Match(html, @"/\s*(\d{4}(-\d{1,2}){2})", RegexOptions.Compiled).Groups[1].Value + "'}"
                : m.Groups[1].Value + "|" + Regex.Match(html, @"/\s*(\d{4}(-\d{1,2}){2})", RegexOptions.Compiled).Groups[1].Value;
            }
            else rst = "{success:false}";
            return rst;
        }
    }
    /// <summary>
    /// 获取百度权重
    /// </summary>
    public class BaiduWeight
    {
        /// <summary>
        /// 获取权重
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static string Weight(string u)
        {
            string w = "n";
            if (!string.IsNullOrEmpty(u))
            {
                Uri uri = Net.CreatUri(u);
                if (uri != null)
                {
                    string html = Net.GetHtml("http://www.aizhan.com/getbr.php?url=" + uri.Host.Replace("www.""") + "&style=1");
                    if (html != null)
                        w = Regex.Match(html, @">([n\d])</a>", RegexOptions.IgnoreCase | RegexOptions.Compiled).Groups[1].Value;
                    if (w == "") w = "n";
                }
            }
            return w;
        }
    }
}


类别:Asp.Net/C#/WCF 作者:波波 日期:2011-03-28 【评论:9】 
 

    • hua

  • 日期:2011-6-28 17:34:04  IP:124.160.*.*
    非常感谢楼主!谢谢啊!
    管理员回复(2011-6-28 18:26:27)
    O(∩_∩)O~,不客气~

    • HI

  • 日期:2011-8-22 11:39:48  IP:116.55.*.*
    功能 实现了,但代码好乱,看得废神
    管理员回复(2011-8-22 13:36:20)
    O(∩_∩)O~,最好拷贝到你的编辑器里面看比较好。。

    • haobaby

  • 日期:2011-12-6 17:02:41  IP:112.111.*.*
    非常感谢楼主!
    里面的Alexa国内排名;google PR:google收录数量:百度权重
    上面几个不可以 为什么
    管理员回复(2011-12-6 19:33:10)
    google修改过pr获取的地址了,忘记更新上传。

    明天早上有空更新一下下载。呵呵

    示例修改过了,只是忘记了更新下载
    http://www.code-design.cn/eg/pagerank/

    • haobaby

  • 日期:2011-12-7 11:31:51  IP:112.111.*.*
    嗯 谢谢~~~
    管理员回复(2011-12-7 14:04:01)
    下载和内容已经更新。。~


  • 日期:2011-12-15 10:55:58  IP:112.66.*.*
    多谢分享,正需要
    管理员回复(2011-12-15 19:24:02)
    呵呵~


  • 日期:2011-12-15 10:57:35  IP:112.66.*.*
    有没有关键词“百度排名”查询代码啊?
    管理员回复(2012-2-7 17:12:43)
    如果是站点的关键字排名需要有蜘蛛的支持,如爱站网的百度排名和百度权重,猜测是大量收录网站然后分析出来的。

    当然如果你意思是输入某个关键词+域名来实现,那么这个很简单,只需要服务器端xhr发送关键字请求百度搜索结果即可分析得出结果


  • 日期:2011-12-15 10:59:07  IP:112.66.*.*
    想要蜘蛛爬行代码
    管理员回复(2011-12-19 20:39:59)
    这里有一个网站的蜘蛛示例,并且结合了lucene.net拆分关键字,挺不错的
    射日工作室


  • 日期:2012-1-20 16:19:11  IP:61.167.*.*
    非常感谢,这个代码对我很有用!
    管理员回复(2012-1-21 9:56:04)
    呵呵,不客气。。

发表留言
  • *昵称:
  • 头像:
  • 电子邮件: [留下您的邮件,方便管理员回复您。]
  • 个人网站: *验证码:
声明:本网站尊重并保护知识产权,根据《信息网络传播权保护条例》,如果我们转载或引用的作品侵犯了您的权利,请通知我们,我们会及时删除!
Powered by showbo,©2012,桂ICP备05005887号 京公网安备1101055090