pandas sql
# 概述
pandas.io.sql 模块提供了一组查询包装器,以便于数据检索和减少对特定于 DB 的 API 的依赖性
# 主要方法
read_sql_table(table_name, con[, schema, …]) 将 SQL 数据库表读入 DataFrame
read_sql_query(sql, con[, index_col, …]) 将 SQL 查询读入 DataFrame
read_sql(sql, con[, index_col, …]) 将 SQL 查询或数据库表读入 DataFrame
DataFrame.to_sql(name, con[, schema, …]) 将存储在 DataFrame 中的记录写入 SQL 数据库
# 主要引擎
engine = create_engine('postgresql://scott:tiger@localhost:5432/mydatabase')
engine = create_engine('mysql+mysqldb://scott:tiger@localhost/foo')
engine = create_engine('oracle://scott:tiger@127.0.0.1:1521/sidname')
engine = create_engine('mssql+pyodbc://mydsn')
# sqlite://<nohostname>/<path>
# where <path> is relative:
engine = create_engine('sqlite:///foo.db')
# or absolute, starting with a slash:
engine = create_engine('sqlite:////absolute/path/to/foo.db')
2
3
4
5
6
7
8
9
10
11
12
13
14
# 创建链接数据库引擎
import pandas as pd
from pandas import DataFrame
from sqlalchemy import create_engine
engine = create_engine("mysql+pymysql://root:root@localhost:3306/test", encoding="utf8")
# create_engine()有个参数为echo,默认为False,如果指定为True,则会输出查询过程
2
3
4
5
# 读取
pandas.read_sql_table(table_name, con, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None)
data1 = pd.read_sql_table('user', con=engine)
data1
2
read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None,chunksize=None)
该方法可以执行增删改查操作
# 3.1 查询1
select_sql1 = "select * from user"
data2 = pd.read_sql_query(select_sql1, con=engine)
data2
2
3
4
# 查询2
select_sql2 = "select name,age from user where age > 20"
data3 = pd.read_sql_query(select_sql2, con=engine)
data3
2
3
4
# 3.2 增加
insert_sql = "insert into user (name, age, sex) values %(data)s"
params = {'data': ['赵六', '36', '男']}
pd.read_sql_query(insert_sql, con=engine, params=params)
2
3
4
# 3.3 修改
update_sql = "update user set age=50 where name=%(data)s"
params = {'data':'赵六'}
pd.read_sql_query(update_sql, con=engine, params=params)
2
3
4
# 3.4 删除
delete_sql = "delete from user where name=%(data)s"
params = {'data': '赵六'}
pd.read_sql_query(delete_sql, con=engine, params=params)
2
3
4
pandas.read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None)
综合前两个方法,只是为了兼容。用法类似
# 写入
to_sql(name, con, flavor='sqlite', schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None)
if_exists='replace',如果数据库中有 test_cjk 表,则替换。
if_exists='append',如果数据库中有 test_cjk 表,则在表后面添加。
if_exists='fail',如果数据库中有 test_cjk 表,则在写入失败。
data = [['Jack', 40, '男'],
['Bob', 45, '男'],
['Linda', 28, '女']]
columns = ['name', 'age', 'sex']
df = DataFrame(data, columns=columns)
df
2
3
4
5
6
res = df.to_sql('user', con=engine, if_exists='append', index=False)
res
2