互联网应用开发 · 1fde34b9
Hide whitespace changes
Inline Side-by-side

Showing with 104 additions and 0 deletions

code.md code.md +104 -0

No files found.
--- a/code.md
+++ b/code.md
+```python
+# 测试代码
+def get_daoyou_chaxun_code(
+        user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
+        cookie='ASPSESSIONIDAACBQDRC=CLLNDELDHNFCMPNHJGFPMDOP; ASPSESSIONIDACACRCRD=EOJHODNDAKFGHKEHEBECAKFB'
+):
+    # 打开图片
+    import io
+    import requests
+
+    r = requests.get('http://daoyou-chaxun.cnta.gov.cn/single_info/validatecode.asp', headers={
+        "User-Agent": user_agent,
+        "Cookie":cookie
+    }, timeout=20)
+    imageData = r.content
+    data_stream = io.BytesIO(imageData)
+    image = Image.open(data_stream)
+    enhancer = ImageEnhance.Contrast(image)
+    enhancer.enhance(3)
+
+    # 将图片转换成灰度图片
+    image = image.convert("L")
+
+    # 二值化
+    twoValue(image)
+
+    # 分割图片
+    next_line = True
+    last_empty_line = True
+    start_xs = []
+    end_xs = []
+    for x in range(image.size[0]):
+        empty_line = True
+        for y in range(image.size[1]):
+            L = image.getpixel((x, y))
+            if L < 200:
+                empty_line = False
+                next_line = True
+        if not empty_line:
+            if last_empty_line:
+                start_xs.append(x)
+        if empty_line and next_line:
+            end_xs.append(x - 1)
+            next_line = False
+        if empty_line:
+            last_empty_line = True
+        else:
+            last_empty_line = False
+
+    # 数字特征库
+    signature_list = [
+        # 0
+        [(0, 0, 0), (0, 9, 0), (5, 0, 0), (5, 9, 0), (0, 1, 1), (0, 2, 1), (0, 3, 1), (0, 4, 1), (0, 5, 1), (0, 6, 1),
+         (0, 7, 1), (0, 8, 1)],
+        # 1
+        [(0, 0, 0), (0, 1, 1), (0, 9, 1), (0, 5, 0)],
+        # 2
+        [(0, 7, 0), (1, 7, 1), (1, 6, 0), (2, 6, 1)],
+        # 3
+        [(0, 4, 0), (1, 4, 0), (2, 4, 1)],
+        # 4
+        [(0, 9, 0), (1, 9, 0), (2, 9, 0), (3, 9, 1), (4, 9, 1), (5, 9, 1)],
+        # 5
+        [(0, 4, 1), (0, 5, 0), (0, 6, 0), (0, 7, 0), (0, 8, 1), (0, 9, 0)],
+        # 6
+        [(0, 0, 0), (0, 1, 0), (1, 0, 0), (2, 0, 1)],
+        # 7
+        [(0, 0, 1), (0, 1, 1), (0, 2, 0), (0, 3, 0), (0, 4, 0), (0, 5, 0), (0, 6, 0), (1, 8, 1), (1, 9, 1), (1, 7, 0)],
+        # 8
+        [(0, 0, 0), (0, 9, 0), (5, 0, 0), (0, 4, 0), (1, 0, 1), (0, 1, 1)],
+        # 9
+        [(0, 4, 1), (0, 5, 0), (0, 6, 0), (0, 7, 0), (0, 8, 0), (0, 9, 0), (1, 9, 1), (2, 9, 1), (3, 9, 1)],
+    ]
+
+    result = []
+    for i in range(len(start_xs)):
+        s_offset = start_xs[i]
+        e_offset = end_xs[i]
+        if (e_offset - s_offset) == 4:
+            result.append('1')
+            continue
+        num = 0
+        for signature in signature_list:
+            # print signature
+            if num == 1:  # 数字1不判断,因为前边已经判断过了
+                num += 1
+                continue
+            # print '特征:',num
+            match = True
+            for pp in signature:
+                x = pp[0]
+                y = pp[1]
+                is_p = pp[2]
+                if checkIsPoint(image, (s_offset + x, y)) is not is_p:
+                    match = False
+                    break
+            if match:
+                result.append(str(num))
+                break
+
+            num += 1
+    return result
+
+```
\ No newline at end of file