Python 爬取数据并保存到Oracle数据库

发表: 2018-01-29 浏览: 3066

Python

环境win7 Python 3.6

工具PyCharm2017

直接命令 pip install cx_Oracle

导入cx_Oracle

import cx_Oracle

1.Python连接数据库写法有多种百度很多

try:
    tns = cx_Oracle.makedsn('localhost','1521','ORABI')
    db = cx_Oracle.connect('username','password',tns)
    print('连接成功...')
except Exception as e:
    print(e)
    print('连接失败...')

2.建立游标cursor 并执行查询，建表，插入，删除

创建游标执行SQL 并提交关闭游标关闭连接

建表

cr = db.cursor()

sql = ("CREATE TABLE tb_products("

           "titles char(100),"

           "shops char(30),"

           "deals char(20),"

           "prices char(10),"

           "locations char(50))")

cr.execute(sql)

db.commit()

cr.close()

db.close()

查询数据

cr = db.cursor()
sql = 'select * from GB_USERINFO'
cr.execute(sql)
list = cr.fetchall()
for i in list:
    print(i)
cr.close()

db.close()

插入数据

cr = db.cursor()
sql = ("insert into tb_products(TITLES,SHOPS,DEALS,PRICES,LOCATIONS) VALUES(:1,:2,:3,:4,:5)")

其中这边需要注意网上大多都是关于写入MYSQL数据库 VALUES后面所带参数为%s 但是这样写Oracle会报

ora-01036: 非法的变量名/编号错误所以这边应为 :1 ,:2

result 可接收爬取值传入values后面的参数内

cr.execute(sql, result)
db.commit()

其中爬取数据中若带有特殊字符也应处理掉，否则可能会出现转码错误

贴上完整代码爬取代码参考天善崔庆才大神的

# coding=utf-8
import cx_Oracle
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from config import *
from urllib.parse import quote
from pyquery import PyQuery as pq


browser = webdriver.Chrome()
try:
    tns = cx_Oracle.makedsn('localhost','1521','ORABI')
    db = cx_Oracle.connect('username','password',tns)
    print('连接成功...')
except Exception as e:
    print(e)
    print('连接失败...')
# browser = webdriver.PhantomJS(service_args=SERVICE_ARGS)
'''
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options=chrome_options)
'''
wait = WebDriverWait(browser, 10)


def index_page(page):
    """
    抓取索引页
    :param page: 页码
    """
    print('正在爬取第', page, '页')
    try:
        url = 'https://s.taobao.com/search?q=' + quote(KEYWORD)
        browser.get(url)
        if page > 1:
            input = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager div.form > input')))
            submit = wait.until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager div.form > span.btn.J_Submit')))
            input.clear()
            input.send_keys(page) #send_keys() 中 k 是小写
            submit.click()
        wait.until(
            EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager li.item.active > span'), str(page)))
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.m-itemlist .items .item')))
        get_product()
    except TimeoutException:
        index_page(page)



def get_product():
    """
    抓取产品数据

    """
    html = browser.page_source
    doc = pq(html)
    items = doc('#mainsrp-itemlist .items .item').items()
    for item in items:

        product = (
            item.find('.title').text(),
            item.find('.shop').text(),
            item.find('.deal-cnt').text(),
            item.find('.price').text().replace('¥',''),
            item.find('.location').text()
        )

        print(product)
        save_to_oracle(product)



def save_to_oracle(result):
    """
    把抓取的数据写入数据库
    :param result:
    :return:
    """
    cr = db.cursor()
    sql = ("insert into tb_products(TITLES,SHOPS,DEALS,PRICES,LOCATIONS) VALUES(:1,:2,:3,:4,:5)")
    try:
        cr.execute(sql, result)
        db.commit()
        print('保存成功!')
    except Exception as e:
        print(e)
        print('保存失败...')


#创建表
def create_table():
    """
    在数据库中创建表tb_products
    :return:
    """
    cr = db.cursor()
    sql = ("CREATE TABLE tb_products("
           "titles char(100),"
           "shops char(30),"
           "deals char(20),"
           "prices char(10),"
           "locations char(50))")
    try:
        cr.execute(sql)
        db.commit()
        print('建表成功!')
    except:
        print('建表失败...')



#查询
def selectsql():
    """
    测试查询表数据
    :return:
    """
    cr = db.cursor()
    sql = 'select * from GB_USERINFO'
    cr.execute(sql)
    list = cr.fetchall()
    for i in list:
        print(i)
    cr.close()

#
def main():
    #create_table（）
    for i in range(1, MAX_PAGE + 1):
        index_page(i)

    db.close()

if __name__ == '__main__':
    main()

config.py内代码

MAX_PAGE = 100
KEYWORD = 'ipad'

0 个评论

要回复文章请先登录或注册