close

    網友問了個問題,我天真的認為是UrlEncode()就可以解決,直到心冷大指點後,才知道事情不是我想的那麼簡單。經過測試後,不管用什麼方法都沒辦法正確的用WebClient去抓資料,google了老半天也找不到有用的資訊。結果無意中發現這篇,看來跟這個名為punycode的東西有關,有了關鍵字,呵~~人生就變彩色的了,一下子就找到有用的文章。

    看起來是因為大多數的 DNS 伺服器僅支援 ASCII 字元,所以用中文字下去解析時就會發生錯誤,因此才出現punycode這個東西。以下是google到的解法

 

1.自己將中文轉成punycode

   1: public partial class Form1 : Form {
   2:     public Form1() {
   3:         InitializeComponent();
   4:     }
   5:  
   6:     private void Form1_Load(object sender, EventArgs e) {
   7:         string strPunycode = "http://xn--" + Punycode.encode("正負2度c") + ".tw/";
   8:         WebClient client = new WebClient();
   9:         string html = client.DownloadString(strPunycode);
  10:     }
  11: }
  12:  
  13: class Punycode {
  14:     /* Punycode parameters */
  15:     static int TMIN = 1;
  16:     static int TMAX = 26;
  17:     static int BASE = 36;
  18:     static int INITIAL_N = 128;
  19:     static int INITIAL_BIAS = 72;
  20:     static int DAMP = 700;
  21:     static int SKEW = 38;
  22:     static char DELIMITER = '-';
  23:     /**
  24:      * Punycodes a unicode string.
  25:      *
  26:      * @param input Unicode string.
  27:      * @return Punycoded string.
  28:      */
  29:     public static String encode(String input) {
  30:         int n = INITIAL_N;
  31:         int delta = 0;
  32:         int bias = INITIAL_BIAS;
  33:         StringBuilder output = new StringBuilder();
  34:         // Copy all basic code points to the output
  35:         int b = 0;
  36:         for (int i = 0; i < input.Length; i++) {
  37:             char c = input[i];
  38:             if (isBasic(c)) {
  39:                 output.Append(c);
  40:                 b++;
  41:             }
  42:         }
  43:         // Append delimiter
  44:         if (b > 0) {
  45:             output.Append(DELIMITER);
  46:         }
  47:         int h = b;
  48:         while (h < input.Length) {
  49:             int m = int.MaxValue;
  50:             // Find the minimum code point >= n
  51:             for (int i = 0; i < input.Length; i++) {
  52:                 int c = input[i];
  53:                 if (c >= n && c < m) {
  54:                     m = c;
  55:                 }
  56:             }
  57:             if (m - n > (int.MaxValue - delta) / (h + 1)) {
  58:                 throw new Exception("OVERFLOW");
  59:             }
  60:             delta = delta + (m - n) * (h + 1);
  61:             n = m;
  62:             for (int j = 0; j < input.Length; j++) {
  63:                 int c = input[j];
  64:                 if (c < n) {
  65:                     delta++;
  66:                     if (0 == delta) {
  67:                         throw new Exception("OVERFLOW");
  68:                     }
  69:                 }
  70:                 if (c == n) {
  71:                     int q = delta;
  72:                     for (int k = BASE; ; k += BASE) {
  73:                         int t;
  74:                         if (k <= bias) {
  75:                             t = TMIN;
  76:                         } else if (k >= bias + TMAX) {
  77:                             t = TMAX;
  78:                         } else {
  79:                             t = k - bias;
  80:                         }
  81:                         if (q < t) {
  82:                             break;
  83:                         }
  84:                         output.Append((char)digit2codepoint(t + (q - t) % (BASE - t)));
  85:                         q = (q - t) / (BASE - t);
  86:                     }
  87:                     output.Append((char)digit2codepoint(q));
  88:                     bias = adapt(delta, h + 1, h == b);
  89:                     delta = 0;
  90:                     h++;
  91:                 }
  92:             }
  93:             delta++;
  94:             n++;
  95:         }
  96:         return output.ToString();
  97:     }
  98:     /**
  99:      * Decode a punycoded string.
 100:      *
 101:      * @param input Punycode string
 102:      * @return Unicode string.
 103:      */
 104:     public static String decode(String input) {
 105:         int n = INITIAL_N;
 106:         int i = 0;
 107:         int bias = INITIAL_BIAS;
 108:         StringBuilder output = new StringBuilder();
 109:         int d = input.LastIndexOf(DELIMITER);
 110:         if (d > 0) {
 111:             for (int j = 0; j < d; j++) {
 112:                 char c = input[j];
 113:                 if (!isBasic(c)) {
 114:                     throw new Exception("BAD_INPUT");
 115:                 }
 116:                 output.Append(c);
 117:             }
 118:             d++;
 119:         } else {
 120:             d = 0;
 121:         }
 122:         while (d < input.Length) {
 123:             int oldi = i;
 124:             int w = 1;
 125:             for (int k = BASE; ; k += BASE) {
 126:                 if (d == input.Length) {
 127:                     throw new Exception("BAD_INPUT");
 128:                 }
 129:                 int c = input[d++];
 130:                 int digit = codepoint2digit(c);
 131:                 if (digit > (int.MaxValue - i) / w) {
 132:                     throw new Exception("OVERFLOW");
 133:                 }
 134:                 i = i + digit * w;
 135:                 int t;
 136:                 if (k <= bias) {
 137:                     t = TMIN;
 138:                 } else if (k >= bias + TMAX) {
 139:                     t = TMAX;
 140:                 } else {
 141:                     t = k - bias;
 142:                 }
 143:                 if (digit < t) {
 144:                     break;
 145:                 }
 146:                 w = w * (BASE - t);
 147:             }
 148:             bias = adapt(i - oldi, output.Length + 1, oldi == 0);
 149:             if (i / (output.Length + 1) > int.MaxValue - n) {
 150:                 throw new Exception("OVERFLOW");
 151:             }
 152:             n = n + i / (output.Length + 1);
 153:             i = i % (output.Length + 1);
 154:             output.Insert(i, (char)n);
 155:             i++;
 156:         }
 157:         return output.ToString();
 158:     }
 159:     public static int adapt(int delta, int numpoints, bool first) {
 160:         if (first) {
 161:             delta = delta / DAMP;
 162:         } else {
 163:             delta = delta / 2;
 164:         }
 165:         delta = delta + (delta / numpoints);
 166:         int k = 0;
 167:         while (delta > ((BASE - TMIN) * TMAX) / 2) {
 168:             delta = delta / (BASE - TMIN);
 169:             k = k + BASE;
 170:         }
 171:         return k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
 172:     }
 173:     public static bool isBasic(char c) {
 174:         return c < 0x80;
 175:     }
 176:     public static int digit2codepoint(int d) {
 177:         if (d < 26) {
 178:             // 0..25 : 'a'..'z'
 179:             return d + 'a';
 180:         } else if (d < 36) {
 181:             // 26..35 : '0'..'9';
 182:             return d - 26 + '0';
 183:         } else {
 184:             throw new Exception("BAD_INPUT");
 185:         }
 186:     }
 187:     public static int codepoint2digit(int c) {
 188:         if (c - '0' < 10) {
 189:             // '0'..'9' : 26..35
 190:             return c - '0' + 26;
 191:         } else if (c - 'a' < 26) {
 192:             // 'a'..'z' : 0..25
 193:             return c - 'a';
 194:         } else {
 195:             throw new Exception("BAD_INPUT");
 196:         }
 197:     }
 198: }

 

  2.直接使用config檔的設定

.在C:\WINDOWS\Microsoft.NET\Framework\v2.0.50727\CONFIG\machine.config加入這一段


   1: <section name="uri" type="System.Configuration.UriSection, System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />

 

.在app.config中加入


   1: <configuration>
   2:   <uri>
   3:   <idn enabled="All" />
   4:   <iriParsing enabled="true" />
   5:   </uri>
   6: </configuration>

 

 

然後程式碼完全不用動


   1: public partial class Form1 : Form {
   2:     public Form1() {
   3:         InitializeComponent();
   4:     }
   5:  
   6:     private void Form1_Load(object sender, EventArgs e) {
   7:         string url = "http://正負2度c.tw/";
   8:         WebClient client = new WebClient();
   9:         string html = client.DownloadString(url);
  10:     }
  11: }

 

 

 

 

 

以上就屬第二種最方便,也說明了.NET真的是好用到一個不行啊~~

arrow
arrow
    全站熱搜

    AnferneeHardaway 發表在 痞客邦 留言(0) 人氣()