Skip to content

GitLab

  • Projects
  • Groups
  • Snippets
  • Help
    • Loading...
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
    • Contribute to GitLab
  • Sign in / Register
K
kb
  • Project overview
    • Project overview
    • Details
    • Activity
    • Releases
  • Repository
    • Repository
    • Files
    • Commits
    • Branches
    • Tags
    • Contributors
    • Graph
    • Compare
  • Issues 2
    • Issues 2
    • List
    • Boards
    • Labels
    • Service Desk
    • Milestones
  • Merge requests 0
    • Merge requests 0
  • Operations
    • Operations
    • Incidents
  • Analytics
    • Analytics
    • Repository
    • Value Stream
  • Wiki
    • Wiki
  • Members
    • Members
  • Activity
  • Graph
  • Create a new issue
  • Commits
  • Issue Boards
Collapse sidebar
  • granite
  • kb
  • Wiki
    • Data_stream
    • Organ
  • enterprise

Last edited by songzp Sep 02, 2021
Page history

enterprise

数据来源

事业单位在线

http://www.gjsy.gov.cn/sydwfrxxcx/

主体类型

company_major_type: 4

爬虫相关

爬虫工作流转表

主要业务字段

字段名 注释 样例
company_name 事业单位名称 乐东黎族自治县第二小学
province_short 省份(英文缩写) HAIN
legal_person 法定代表人/负责人 罗人鹏
credit_no 统一信用代码 12468843428892871M
capital 开办资金 35.2万元
company_type 登记类型 事业单位
authority 登记管理机关 乐东黎族自治县事业单位登记管理局
company_status 单位状态 已废止
business_scope 宗旨和业务范围 实施小学义务教育,促进基础教育发展;小学学历教育和相关社会服务。
operation_startdate 有效期起始日期 2020-08-24
operation_enddate 有效期截止日期 2025-08-24
company_address 住址 乐东黎族自治县抱由镇吉祥路
capital_source 经费来源 财政补助(全额拨款)
organizer 举办单位 乐东黎族自治县教育局

上线MongoDB数据格式

{
     "spider_name": "enterprise_captcha_spider",
     "task_type": "事业单位在线",
     "spider_start_time": "2021-06-17 10:21:47",
     "task_result": 1000,
     "task_params": {
          "province": "SAX",
          "company_status": "废止",
          "company_name": "乐东黎族自治县第二小学",
          "credit_no": "12468843428892871M",
          "submit_time": "2021-06-16 19:29:44",
          "search_key": "12468843428892871M"
     },
     "companyinfo_item": {
          "province": "海南省",                    -- 省份
          "legal_person": "罗人鹏",                -- 法定代表人姓名
          "data_source": 1,
          "credit_no": "12468843428892871M",       -- 统一社会信用代码
          "lastupdatetime": "2021-06-17 10:21:53", -- 数据最后更新时间
          "province_code": 1024,                   -- 省份(数字编号)
          "operation_enddate": "",                 -- 经营结束日期
          "operation_startdate": "",               -- 经营开始日期
          "capital": "35.2万元",                   -- 开办资金
          "company_type": "事业单位",              -- 登记类型固定值为 事业单位
          "authority": "乐东黎族自治县事业单位登记管理局", -- 登记管理机关
          "company_status": "已废止",              -- 单位状态
          "business_scope": "实施小学义务教育,促进基础教育发展;小学学历教育和相关社会服务。", -- 宗旨和业务范围
          "create_time": "2021-06-17 10:21:53",
          "company_name": "乐东黎族自治县第二小学", -- 事业单位名称
          "province_short": "HAIN",                -- 省份(英文缩写)
          "company_address": "乐东黎族自治县抱由镇吉祥路", --住所
          "organizer": "乐东黎族自治县教育局",     -- 登记管理机关
          "data_status": 1,
          "capital_source": "财政补助(全额拨款)"   -- 经费来源
     },
     "result_code": "00000111",
     "mark": ""
}

上线后的数据格式

{
    "spider_name": "enterprise_captcha_spider",
    "task_type": "事业单位在线",
    "spider_start_time": "2021-08-19 20:02:25",
    "task_result": 1000,
    "task_params": {
        "province": "SAX",
        "search_key": "12610222766342952Y",
        "company_name": "宜君县法律援助中心",
        "credit_no": "12610222766342952Y"
    },
    "companyinfo_item": {
        "province": "陕西省",
        "legal_person": "张建军",
        "data_source": 1,
        "credit_no": "12610222766342952Y",
        "lastupdatetime": "2021-08-19 20:02:30",
        "province_code": 1010,
        "operation_enddate": "2023-03-09 00:00:00",
        "operation_startdate": "2018-03-09 00:00:00",
        "capital": "21万元",
        "company_type": "事业单位",
        "authority": "宜君县事业单位登记管理局",
        "company_status": "正常",
        "business_scope": "保障经济困难的公民获得必要的法律服务",
        "establish_date": "2018-03-09 00:00:00",
        "create_time": "2021-06-19 20:33:00",
        "company_name": "宜君县法律援助中心",
        "province_short": "SAX",
        "company_address": "宜君县宜阳北街",
        "organizer": "宜君县司法局",
        "data_status": 1,
        "capital_source": "全额拨款(全额拨款)",
        "org_code": "766342952",
        "company_name_digest": "3d5ac291be5de9a6dca29e209aaf5902",
        "have_changed": "no",
        "capital_src": "21万元",
        "company_major_type": 4,
        "company_minor_type": "4100",
        "legal_person_type": 0,
        "n_company_status": "正常",
        "area_code": "610000",
        "city_code": "610200",
        "city": "铜川市",
        "district_code": "610222",
        "district": "宜君县"
    },
    "result_code": "00000111",
    "mark": "",
    "deleted_company_name_digest": [
        
    ],
    "sync_status": "update",
    "online_spend_time": 1.1163170337677002
}

写bson文件后的格式

{
      "company_minor_type": "4100",
      "area_code": "610000",
      "company_address": "宜君县宜阳北街",
      "have_changed": "no",
      "city": "铜川市",
      "company_major_type": 4,
      "province": "陕西省",
      "operation_startdate": "2018-03-09 00:00:00",
      "company_status": "正常",
      "authority": "宜君县事业单位登记管理局",
      "company_name": "宜君县法律援助中心",
      "district_code": "610222",
      "create_time": "2021-06-19 20:33:00",
      "organizer": "宜君县司法局",
      "data_status": 1,
      "lastupdatetime": "2021-08-19 20:02:30",
      "province_short": "SAX",
      "capital_source": "全额拨款(全额拨款)",
      "n_company_status": "正常",
      "legal_person_type": 0,
      "business_scope": "保障经济困难的公民获得必要的法律服务",
      "credit_no": "12610222766342952Y",
      "district": "宜君县",
      "legal_person": "张建军",
      "company_name_digest": "3d5ac291be5de9a6dca29e209aaf5902",
      "capital": "21万元",
      "establish_date": "2018-03-09 00:00:00",
      "data_source": 1,
      "city_code": "610200",
      "operation_enddate": "2023-03-09 00:00:00",
      "company_type": "事业单位",
      "org_code": "766342952",
      "province_code": 1010,
      "capital_src": "21万元",
      "__metadata": {
        "task_type": "事业单位在线",
        "task_params": {
          "province": "SAX",
          "search_key": "12610222766342952Y",
          "company_name": "宜君县法律援助中心",
          "credit_no": "12610222766342952Y"
        },
        "deleted_company_name_digest": [],
        "source": "crawler_json",
        "update_status_mask": "00000111",
        "spider_start_time": "2021-08-19 20:02:25",
        "result_code": "00000111",
        "spider_name": "enterprise_captcha_spider",
        "online_spend_time": 4.955765962600708,
        "sync_status": "update",
        "data_type": "IC",
        "mark": "",
        "task_result": 1000
      },
    }

特殊主体库表结构

CREATE TABLE `special_enterprise` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '自增主键',
  `province_short` varchar(5) DEFAULT NULL COMMENT '省份(英文缩写)',
  `company_name` varchar(255) NOT NULL COMMENT '事业单位名称',
  `company_id` char(32) NOT NULL COMMENT '主体唯一键',
  `credit_no` varchar(50) DEFAULT NULL COMMENT '统一信用代码',
  `company_type` varchar(50) DEFAULT NULL COMMENT '类型',
  `legal_person` varchar(255) DEFAULT NULL COMMENT '法定代表人/负责人',
  `company_address` varchar(300) DEFAULT NULL COMMENT '联系地址',
  `capital` varchar(50) DEFAULT NULL COMMENT '开办资金(万元)',
  `company_status` varchar(50) DEFAULT NULL COMMENT '单位状态',
  `authority` varchar(255) DEFAULT NULL COMMENT '登记管理机关',
  `organizer` varchar(255) DEFAULT NULL COMMENT '举办单位',
  `operation_startdate` varchar(50) DEFAULT NULL COMMENT '有效期起始日期',
  `operation_enddate` varchar(50) DEFAULT NULL COMMENT '有效期截止日期',
  `business_scope` text DEFAULT NULL COMMENT '宗旨和业务范围',
  `capital_source` varchar(100) DEFAULT NULL COMMENT '经费来源',
  `use_flag` tinyint(4) DEFAULT '0' COMMENT '使用标记,0有效,10废弃删除',
  `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '入库时间',
  `update_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `idx_digest` (`company_id`),
  KEY `idx_company_name` (`company_name`),
  KEY `idx_credit_no` (`credit_no`),
  KEY `idx_create_time` (`create_time`),
  KEY `idx_update_time` (`update_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
Clone repository
  • README
  • basic_guidelines
  • basic_guidelines
    • basic_guidelines
    • dev_guide
    • project_build
    • 开发流程
  • best_practice
  • best_practice
    • AlterTable
    • RDS
    • azkaban
    • create_table
    • design
    • elasticsearch
    • elasticsearch
      • ES运维
    • logstash
View All Pages