第129集_SQLAlchemy高级用法

1. 关系映射进阶

1.1 一对一关系

一对一关系是指两个表之间只有一个匹配的记录。在SQLAlchemy中，可以使用uselist=False参数来定义一对一关系。

from sqlalchemy import create_engine, Column, Integer, String, ForeignKey
from sqlalchemy.orm import declarative_base, relationship

Base = declarative_base()

class User(Base):
    __tablename__ = 'users'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    # 一对一关系
    profile = relationship("Profile", uselist=False, back_populates="user")

class Profile(Base):
    __tablename__ = 'profiles'
    
    id = Column(Integer, primary_key=True)
    bio = Column(String(200))
    user_id = Column(Integer, ForeignKey('users.id'), unique=True)
    # 反向关系
    user = relationship("User", back_populates="profile")

1.2 一对多关系

一对多关系是最常见的关系类型，一个记录可以关联多个其他记录。

class Post(Base):
    __tablename__ = 'posts'
    
    id = Column(Integer, primary_key=True)
    title = Column(String(100))
    content = Column(String)
    user_id = Column(Integer, ForeignKey('users.id'))
    
    # 多对一关系
    author = relationship("User", back_populates="posts")

class User(Base):
    __tablename__ = 'users'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    
    # 一对多关系
    posts = relationship("Post", back_populates="author")

1.3 多对多关系

多对多关系需要一个中间表来管理两个表之间的关联。

from sqlalchemy import Table

# 创建中间表
student_course = Table(
    'student_course',
    Base.metadata,
    Column('student_id', Integer, ForeignKey('students.id'), primary_key=True),
    Column('course_id', Integer, ForeignKey('courses.id'), primary_key=True)
)

class Student(Base):
    __tablename__ = 'students'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    
    # 多对多关系
    courses = relationship("Course", secondary=student_course, back_populates="students")

class Course(Base):
    __tablename__ = 'courses'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    
    # 多对多关系
    students = relationship("Student", secondary=student_course, back_populates="courses")

2. 高级查询技术

2.1 子查询

子查询允许在查询中嵌套另一个查询。

from sqlalchemy import select, func

# 子查询：获取每个用户的最新帖子
latest_posts = (
    select(
        Post.user_id,
        func.max(Post.id).label('latest_post_id')
    )
    .group_by(Post.user_id)
    .subquery()
)

# 查询最新帖子的详细信息
query = (
    select(Post)
    .join(latest_posts, Post.id == latest_posts.c.latest_post_id)
)

# 执行查询
with Session() as session:
    results = session.execute(query).scalars().all()
    for post in results:
        print(f"用户 {post.author.name} 的最新帖子: {post.title}")

2.2 连接查询

连接查询用于从多个表中获取相关数据。

# 内连接
query = select(User, Post).join(Post, User.id == Post.user_id)

# 左连接
query = select(User, Post).outerjoin(Post, User.id == Post.user_id)

# 执行查询
with Session() as session:
    results = session.execute(query).all()
    for user, post in results:
        if post:
            print(f"用户 {user.name} 有帖子: {post.title}")
        else:
            print(f"用户 {user.name} 没有帖子")

2.3 窗口函数

窗口函数用于在查询结果集中进行排名、分组统计等操作。

from sqlalchemy import over, desc

# 使用窗口函数获取每个用户的帖子排名
query = (
    select(
        Post,
        func.row_number().over(
            partition_by=Post.user_id,
            order_by=desc(Post.id)
        ).label('post_rank')
    )
)

# 执行查询
with Session() as session:
    results = session.execute(query).all()
    for post, rank in results:
        print(f"用户 {post.author.name} 的帖子 {post.title} 排名: {rank}")

3. 加载策略

3.1 延迟加载

延迟加载是SQLAlchemy的默认加载策略，只有在访问关系属性时才会执行查询。

with Session() as session:
    # 只查询用户，不查询帖子
    user = session.query(User).filter_by(name='Alice').first()
    print("已查询用户")
    
    # 这里才会执行查询帖子的SQL
    posts = user.posts
    print(f"用户有 {len(posts)} 个帖子")

3.2 预加载（Eager Loading）

预加载可以减少SQL查询次数，提高性能。

from sqlalchemy.orm import joinedload, selectinload

# 使用joinedload（左连接）预加载
with Session() as session:
    user = session.query(User).options(joinedload(User.posts)).filter_by(name='Alice').first()
    # 这里不会再执行额外查询
    print(f"用户有 {len(user.posts)} 个帖子")

# 使用selectinload（子查询）预加载
with Session() as session:
    users = session.query(User).options(selectinload(User.posts)).all()
    for user in users:
        print(f"用户 {user.name} 有 {len(user.posts)} 个帖子")

3.3 选择加载

选择加载允许根据条件选择不同的加载策略。

from sqlalchemy.orm import defaultload

# 默认延迟加载，特定关系预加载
with Session() as session:
    user = session.query(User).options(
        defaultload(User.posts).joinedload()
    ).filter_by(name='Alice').first()
    print(f"用户有 {len(user.posts)} 个帖子")

4. 迁移管理

4.1 Alembic介绍

Alembic是SQLAlchemy的官方迁移工具，用于管理数据库模式的变更。

4.2 安装Alembic

pip install alembic

4.3 初始化Alembic

alembic init alembic

4.4 配置Alembic

编辑alembic.ini文件，配置数据库连接：

# 示例：SQLite连接
[alembic]
script_location = alembic

# 配置数据库URL
# sqlite:///mydatabase.db
# mysql+pymysql://user:password@localhost/mydatabase
# postgresql+psycopg2://user:password@localhost/mydatabase

# 示例配置
[alembic]
script_location = alembic

sqlalchemy.url = sqlite:///sqlalchemy_orm_demo.db

4.5 创建迁移脚本

# 自动生成迁移脚本
 alembic revision --autogenerate -m "Add User and Post tables"

# 手动创建迁移脚本
alembic revision -m "Add Profile table"

4.6 执行迁移

# 升级到最新版本
alembic upgrade head

# 升级到特定版本
alembic upgrade 1234567890abc

# 降级一个版本
alembic downgrade -1

# 降级到基础版本
alembic downgrade base

5. 高级关系模式

5.1 自引用关系

自引用关系用于表示表内部的层次结构。

class Category(Base):
    __tablename__ = 'categories'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(50))
    parent_id = Column(Integer, ForeignKey('categories.id'))
    
    # 自引用关系
    children = relationship("Category", backref="parent", remote_side=[id])

# 使用自引用关系
with Session() as session:
    # 创建分类
    electronics = Category(name='Electronics')
    computers = Category(name='Computers', parent=electronics)
    phones = Category(name='Phones', parent=electronics)
    
    session.add_all([electronics, computers, phones])
    session.commit()
    
    # 查询分类及其子分类
    root_categories = session.query(Category).filter_by(parent_id=None).all()
    for category in root_categories:
        print(f"分类: {category.name}")
        for child in category.children:
            print(f"  - {child.name}")

5.2 多态关系

多态关系允许不同类型的对象共享相同的关系。

from sqlalchemy.ext.declarative import declared_attr

class BaseModel(Base):
    __abstract__ = True
    
    id = Column(Integer, primary_key=True)
    type = Column(String(50))
    
    @declared_attr
    def __mapper_args__(cls):
        if cls.__name__ == 'BaseModel':
            return {}
        return {
            'polymorphic_identity': cls.__name__.lower(),
            'polymorphic_on': 'type'
        }

class Article(BaseModel):
    __tablename__ = 'articles'
    
    title = Column(String(100))
    content = Column(String)

class Image(BaseModel):
    __tablename__ = 'images'
    
    filename = Column(String(100))
    size = Column(Integer)

class Comment(Base):
    __tablename__ = 'comments'
    
    id = Column(Integer, primary_key=True)
    content = Column(String)
    target_id = Column(Integer)
    target_type = Column(String(50))
    
    # 多态关系
    target = relationship(
        BaseModel,
        primaryjoin="and_(Comment.target_id == BaseModel.id, Comment.target_type == BaseModel.type)"
    )

6. 自定义类型和扩展

6.1 自定义类型

可以创建自定义的SQLAlchemy类型来处理特殊数据。

import json
from sqlalchemy.types import TypeDecorator, TEXT

class JSONType(TypeDecorator):
    impl = TEXT
    
    def process_bind_param(self, value, dialect):
        if value is None:
            return None
        return json.dumps(value)
    
    def process_result_value(self, value, dialect):
        if value is None:
            return None
        return json.loads(value)

# 使用自定义JSON类型
class Product(Base):
    __tablename__ = 'products'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(100))
    attributes = Column(JSONType)  # 使用自定义JSON类型

# 使用自定义类型
with Session() as session:
    product = Product(
        name='Smartphone',
        attributes={'color': 'black', 'storage': '128GB', 'camera': '48MP'}
    )
    session.add(product)
    session.commit()
    
    # 查询产品
    product = session.query(Product).filter_by(name='Smartphone').first()
    print(f"产品属性: {product.attributes}")
    print(f"颜色: {product.attributes['color']}")

6.2 事件监听

SQLAlchemy提供了事件系统，可以监听各种数据库事件。

from sqlalchemy import event
from sqlalchemy.orm import Session

# 监听before_insert事件
@event.listens_for(User, 'before_insert')
def before_user_insert(mapper, connection, target):
    print(f"即将插入用户: {target.name}")
    # 可以在这里修改数据
    target.name = target.name.capitalize()

# 监听after_update事件
@event.listens_for(User, 'after_update')
def after_user_update(mapper, connection, target):
    print(f"用户已更新: {target.name}")

# 监听session的before_commit事件
@event.listens_for(Session, 'before_commit')
def before_commit(session):
    print(f"会话即将提交，有 {len(session.dirty)} 个脏对象")

7. 性能优化

7.1 使用索引

为经常查询的列添加索引可以提高查询性能。

from sqlalchemy import Index

class User(Base):
    __tablename__ = 'users'
    
    id = Column(Integer, primary_key=True)
    username = Column(String(50), unique=True)
    email = Column(String(100), unique=True)
    age = Column(Integer)
    
    # 添加复合索引
    __table_args__ = (
        Index('idx_user_age_name', 'age', 'username'),
    )

7.2 批量操作

使用批量操作可以减少数据库交互次数。

# 批量插入
with Session() as session:
    users = [
        User(name='User1', email='user1@example.com'),
        User(name='User2', email='user2@example.com'),
        User(name='User3', email='user3@example.com')
    ]
    # 使用add_all进行批量插入
    session.add_all(users)
    session.commit()

# 批量更新
with Session() as session:
    # 批量更新所有年龄大于30的用户
    result = session.query(User).filter(User.age > 30).update({'age': User.age + 1})
    session.commit()
    print(f"更新了 {result} 个用户")

# 批量删除
with Session() as session:
    # 批量删除所有年龄小于18的用户
    result = session.query(User).filter(User.age < 18).delete()
    session.commit()
    print(f"删除了 {result} 个用户")

7.3 使用原生SQL

对于复杂查询，可以使用原生SQL提高性能。

# 使用原生SQL查询
with engine.connect() as conn:
    result = conn.execute("SELECT * FROM users WHERE age > 30")
    for row in result:
        print(row)

# 使用ORM执行原生SQL
with Session() as session:
    users = session.execute(
        "SELECT * FROM users WHERE age > :age",
        {'age': 30}
    ).scalars().all()
    for user in users:
        print(user)

8. 总结

本集我们学习了SQLAlchemy的高级用法，包括：

关系映射进阶：一对一、一对多、多对多关系的定义和使用
高级查询技术：子查询、连接查询、窗口函数
加载策略：延迟加载、预加载、选择加载
迁移管理：使用Alembic管理数据库模式变更
高级关系模式：自引用关系、多态关系
自定义类型和扩展：创建自定义类型、事件监听
性能优化：索引、批量操作、原生SQL

这些高级特性使SQLAlchemy成为一个功能强大的ORM框架，可以满足复杂应用的需求。在下一集中，我们将学习数据库综合项目，将所学知识应用到实际项目中。