Python+Pandas+SQLite

Python环境下SQLite的使用

#!/usr/bin/env python
# -*-coding:utf-8 -*-

import os
import re
import linecache
import math
# import time
# import datetime
import shutil
import numpy as np
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
os.chdir(os.path.split(os.path.realpath(__file__))[0])

#删除数据库
dir = './test.db'
if os.path.exists(dir):
    os.remove(dir)

#新建数据库
con = sqlite3.connect('./test.db')
# con = sqlite3.connect(':memory:') #在内存中创建
cur = con.cursor()

#创建表
cur.execute('CREATE TABLE IF NOT EXISTS hoge (hoge1, hoge2)')
cur.execute('CREATE TABLE IF NOT EXISTS fuga (fuga1, fuga2, fuga3)')

#获取表名
cur.execute('SELECT name from sqlite_master where type="table";')
print(cur.fetchall())

#获取列
cur.execute('PRAGMA TABLE_INFO(hoge)')
print(cur.fetchall())

#写记录
cur.execute('CREATE TABLE hoge2(id INTEGER PRIMARY KEY AUTOINCREMENT, name STRING, age INTEGER)')
cur.execute('INSERT INTO hoge2(name, age) values("Taro", 12)')
cur.execute('INSERT INTO hoge2(name) values("Hanako")')
cur.execute('INSERT INTO hoge2(id, name, age) values(20, "Akiko", 2)')
cur.execute('INSERT INTO hoge2(name, age) values("Jiro", 10)')
cur.execute('INSERT INTO hoge2 values(3, "Julia", 15)')
cur.execute('SELECT * FROM hoge2')
print(cur.fetchall())

#使用?阻止SQL注入攻击
sql = 'INSERT INTO hoge2 VALUES(?, ?, ?)'
data = ((4, 'John', 30))
cur.execute(sql, data)
cur.execute('SELECT * FROM hoge2')
print(cur.fetchall())

#一次增加多行记录
data = [(5, "Taro", 18),
(None, "Bob", 19),
(None, "Masa", 16),
(None, "Jiro", 35),
(None, "Satoshi", 40)]
cur.executemany('INSERT INTO hoge2 VALUES(?,?,?);',data);
cur.execute('SELECT * FROM hoge2')
print(cur.fetchall())

#修改记录
cur.execute('update hoge2 set age=20 where name="Julia"')
cur.execute('SELECT * FROM hoge2')
print(cur.fetchall())

#删除记录
cur.execute('delete from hoge2 where age>14')
cur.execute('SELECT * FROM hoge2')
print(cur.fetchall())

#读取指定行
cur.execute('SELECT * FROM hoge2')
print(cur.fetchone()) #第一行
cur.execute('SELECT * FROM hoge2')
print(cur.fetchmany(2)) #前两行

#读取指定列
cur.execute('SELECT id, name FROM hoge2') #读取id, name列
print(cur.fetchall())

#读取符合条件的记录
cur.execute('SELECT * FROM hoge2 WHERE age <= 12')
print(cur.fetchall())
cur.execute('SELECT * FROM hoge2 WHERE name = "Hanako"')
print(cur.fetchall())
cur.execute('SELECT * FROM hoge2 WHERE name <> "Jiro"')
print(cur.fetchall())
cur.execute('SELECT * FROM hoge2 order by age asc') #按照年龄顺序排序
print(cur.fetchall())
cur.execute('SELECT * FROM hoge2 order by age desc') #按照年龄降序排序
print(cur.fetchall())

#增加一列
cur.execute('ALTER TABLE hoge ADD COLUMN hoge3')
cur.execute('PRAGMA TABLE_INFO(hoge)')
print(cur.fetchall())

#重命名列
cur.execute('ALTER TABLE hoge RENAME TO hoge_tmp')
cur.execute('CREATE TABLE hoge (hogehoge1, hogehoge2)')
cur.execute('INSERT INTO hoge (hogehoge1, hogehoge2) SELECT hoge1, hoge2 FROM hoge_tmp')
cur.execute('DROP TABLE hoge_tmp')
cur.execute('PRAGMA TABLE_INFO(hoge)')
print(cur.fetchall())

#删除列
cur.execute('ALTER TABLE hoge RENAME TO hoge_tmp')
cur.execute('CREATE TABLE hoge (hogehoge2)')
cur.execute('INSERT INTO hoge (hogehoge2) SELECT hoge2 FROM hoge_tmp')
cur.execute('DROP TABLE hoge_tmp')
cur.execute('PRAGMA TABLE_INFO(hoge)')
print(cur.fetchall())

#重命名表
cur.execute('ALTER TABLE hoge rename to hogehoge')
cur.execute('ALTER TABLE fuga rename to fugafuga')
cur.execute('SELECT name from sqlite_master where type="table";')
print(cur.fetchall())

#删除表
cur.execute('DROP TABLE hogehoge')
cur.execute('SELECT name from sqlite_master where type="table";')
print(cur.fetchall())

con.commit()
cur.close()
con.close()

注:(1)重命名列和删除列需要通过复制表来进行。(2)一次增加多行记录,使用executemany而非execute方法。


使用pandas.DataFrame读写数据库

df = pd.DataFrame({'a': np.arange(5),
                   'b': np.arange(5) + 1,
                   'c': np.arange(5) + 2})
print(df)

conn = sqlite3.connect(':memory:')
cur = conn.cursor()

df.to_sql('sample', conn, if_exists='replace')

cur.execute('SELECT * FROM sample')
print(cur.fetchall())

df2 = pd.read_sql('SELECT * FROM sample', conn).set_index('index')
print(df2)

cur.close()
conn.close()

Pandas与SQLite联用

#!/usr/bin/env python
# -*-coding:utf-8 -*-

import os
import re
import linecache
import math
# import time
# import datetime
import shutil
import numpy as np
import pandas as pd
import sqlite3 as sql
import matplotlib.pyplot as plt
os.chdir(os.path.split(os.path.realpath(__file__))[0])

#Pandas获取数据
# weather = pd.read_csv('https://github.com/alanjones2/dataviz/raw/master/londonweather.csv')
weather = pd.read_csv('londonweather.csv')
print(weather)

#创建数据库
dir = './weather.db'
if os.path.exists(dir):
    os.remove(dir)
conn = sql.connect('weather.db') 
weather.to_sql('weather', conn)

#查询数据库并返回结果
weather = pd.read_sql('SELECT * FROM weather', conn)
print(weather)

#获取2010年、1960年的天气数据
y2010 = pd.read_sql('SELECT * FROM weather WHERE Year == 2010', conn)
print(y2010)
y1960 = pd.read_sql('SELECT * FROM weather WHERE Year == 1960', conn)
print(y1960)

#绘制2010年、1960年的Tmax图
ax2010 = y2010.plot(y='Tmax') 
ax = y1960.plot(y='Tmax',color = 'red', ax=ax2010) 
ax.legend(['2010','1960'])
plt.show()

#获取Tmax大于25°C的记录
high = pd.read_sql('SELECT Year,Month,Tmax FROM weather WHERE Tmax > 25', conn)
print(high)

#对Tmax大于25°C的记录降序排序
high = pd.read_sql('SELECT Year,Month,Tmax FROM weather WHERE Tmax > 25 ORDER BY Tmax DESC', conn)
print(high)

#对Tmax大于25°C的记录绘制直方图
high.plot.hist(y='Month', xticks=high['Month'])
plt.show()

#绘制所有年份6月Tmax的条形图
july = pd.read_sql('SELECT Year,Month,Tmax FROM weather WHERE month == 6', conn)
july.plot.bar(x='Year', y='Tmax', figsize=(20,5));
plt.show()

参考

Python Pandas and SQLite

pythonでSQLite3を使う手順のメモ

Python sqlite3 – Tutorial and Programs

PythonでSQLite3を扱うための具体的なサンプルコード

Pythonで簡単にデータベースを扱う(SQLite3)

Python 数据库的Connection、Cursor两大对象

Python读取sqlite数据并导入DataFrame的两种方法