Skip to content

Python 高级特性

深入学习装饰器、生成器、上下文管理器等 Python 高级特性

📖 学习目标

  • 掌握装饰器的原理和使用
  • 理解生成器和迭代器
  • 学会使用上下文管理器
  • 熟练使用列表推导式和生成器表达式
  • 理解 Python 的类型系统
  • 掌握魔术方法的使用

预计学习时间:3-4 天
难度:⭐⭐⭐

1. 装饰器(Decorators)

装饰器是 Python 的一个强大特性,用于在不修改原函数的情况下扩展其功能。

1.1 函数装饰器基础

python
# 最简单的装饰器
def my_decorator(func):
    def wrapper(*args, **kwargs):
        print("Before function")
        result = func(*args, **kwargs)
        print("After function")
        return result
    return wrapper

# 使用装饰器
@my_decorator
def say_hello(name):
    print(f"Hello, {name}!")
    return f"Greeted {name}"

# 等价于:
# say_hello = my_decorator(say_hello)

result = say_hello("Alice")
# 输出:
# Before function
# Hello, Alice!
# After function

1.2 保留函数元数据

python
from functools import wraps

def my_decorator(func):
    @wraps(func)  # 保留原函数的元数据
    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)
    return wrapper

@my_decorator
def greet(name):
    """问候函数"""
    return f"Hello, {name}"

print(greet.__name__)  # 'greet' 而非 'wrapper'
print(greet.__doc__)   # '问候函数'

1.3 带参数的装饰器

python
def repeat(times):
    """重复执行装饰器"""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            results = []
            for _ in range(times):
                results.append(func(*args, **kwargs))
            return results
        return wrapper
    return decorator

@repeat(times=3)
def say_hello():
    print("Hello!")
    return "Done"

result = say_hello()
# 输出 Hello! 三次
# result = ['Done', 'Done', 'Done']

1.4 类装饰器

python
class CountCalls:
    """统计函数调用次数的装饰器"""
    def __init__(self, func):
        self.func = func
        self.count = 0
    
    def __call__(self, *args, **kwargs):
        self.count += 1
        print(f"Call {self.count} of {self.func.__name__}")
        return self.func(*args, **kwargs)

@CountCalls
def process_data(data):
    return data * 2

process_data(10)  # Call 1 of process_data
process_data(20)  # Call 2 of process_data
print(process_data.count)  # 2

1.5 Dify 中的装饰器实例

python
# 来自 Dify 源码的真实例子

# 1. 登录验证装饰器
from flask import request
from functools import wraps

def login_required(func):
    @wraps(func)
    def decorated(*args, **kwargs):
        token = request.headers.get('Authorization')
        if not token:
            return {'error': 'Unauthorized'}, 401
        # 验证 token...
        return func(*args, **kwargs)
    return decorated

@login_required
def get_user_data():
    return {'data': 'user info'}

# 2. 性能监控装饰器
import time

def timing(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(f"{func.__name__} took {end - start:.2f}s")
        return result
    return wrapper

@timing
def process_document(doc):
    # 处理文档...
    pass

# 3. 缓存装饰器
from functools import lru_cache

@lru_cache(maxsize=128)
def expensive_computation(n):
    """使用内置的 LRU 缓存"""
    return sum(range(n))

2. 生成器(Generators)

生成器是一种特殊的迭代器,可以按需生成值,节省内存。

2.1 生成器函数

python
# 普通函数 vs 生成器函数
def normal_range(n):
    """普通函数:立即生成所有值"""
    result = []
    for i in range(n):
        result.append(i)
    return result

def generator_range(n):
    """生成器函数:按需生成值"""
    for i in range(n):
        yield i  # 使用 yield 而非 return

# 使用生成器
gen = generator_range(5)
print(next(gen))  # 0
print(next(gen))  # 1

# 遍历生成器
for num in generator_range(5):
    print(num)

# 内存对比
import sys
normal = normal_range(1000000)
print(sys.getsizeof(normal))  # 很大,约 8MB

gen = generator_range(1000000)
print(sys.getsizeof(gen))     # 很小,约 200 bytes

2.2 生成器表达式

python
# 列表推导式
squares_list = [x**2 for x in range(10)]  # 立即生成列表

# 生成器表达式(惰性求值)
squares_gen = (x**2 for x in range(10))   # 按需生成

# 生成器表达式在处理大数据时更高效
# 计算大文件的总行数
# total_lines = sum(1 for line in open('huge_file.txt'))

2.3 生成器的高级用法

python
# 1. 生成器委托(yield from)
def generator1():
    yield 1
    yield 2

def generator2():
    yield 3
    yield 4

def combined_generator():
    yield from generator1()
    yield from generator2()

list(combined_generator())  # [1, 2, 3, 4]

# 2. 双向通信(send 方法)
def echo_generator():
    while True:
        received = yield
        print(f"Received: {received}")

gen = echo_generator()
next(gen)  # 启动生成器
gen.send("Hello")   # Received: Hello
gen.send("World")   # Received: World

# 3. 协程式的生成器
def moving_average():
    """计算移动平均值"""
    total = 0.0
    count = 0
    average = None
    while True:
        value = yield average
        total += value
        count += 1
        average = total / count

ma = moving_average()
next(ma)  # 启动
print(ma.send(10))  # 10.0
print(ma.send(20))  # 15.0
print(ma.send(30))  # 20.0

2.4 Dify 中的生成器应用

python
# Dify 中处理流式响应的生成器
def stream_response(messages):
    """流式生成 LLM 响应"""
    for message in messages:
        # 处理消息
        chunk = process_message(message)
        yield chunk  # 逐个返回响应块

# 文档分块生成器
def chunk_document(document, chunk_size=500):
    """将文档分成多个块"""
    text = document.get_text()
    for i in range(0, len(text), chunk_size):
        yield text[i:i + chunk_size]

# 批量处理生成器
def batch_processor(items, batch_size=10):
    """批量处理数据"""
    batch = []
    for item in items:
        batch.append(item)
        if len(batch) == batch_size:
            yield batch
            batch = []
    if batch:  # 处理剩余项
        yield batch

3. 迭代器(Iterators)

3.1 迭代器协议

python
# 自定义迭代器
class Counter:
    def __init__(self, start, end):
        self.current = start
        self.end = end
    
    def __iter__(self):
        """返回迭代器对象本身"""
        return self
    
    def __next__(self):
        """返回下一个值"""
        if self.current >= self.end:
            raise StopIteration
        self.current += 1
        return self.current - 1

# 使用自定义迭代器
counter = Counter(0, 5)
for num in counter:
    print(num)  # 0, 1, 2, 3, 4

# 手动迭代
counter = Counter(0, 3)
print(next(counter))  # 0
print(next(counter))  # 1
print(next(counter))  # 2
# print(next(counter))  # StopIteration

3.2 内置迭代工具

python
from itertools import *

# 1. count - 无限计数器
counter = count(start=1, step=2)
print(next(counter))  # 1
print(next(counter))  # 3

# 2. cycle - 循环迭代
colors = cycle(['red', 'green', 'blue'])
for i, color in enumerate(colors):
    if i >= 6:
        break
    print(color)  # red, green, blue, red, green, blue

# 3. chain - 连接多个可迭代对象
list(chain([1, 2], [3, 4], [5, 6]))  # [1, 2, 3, 4, 5, 6]

# 4. islice - 切片迭代器
list(islice(count(), 5))  # [0, 1, 2, 3, 4]

# 5. groupby - 分组
from itertools import groupby
data = [
    {'type': 'A', 'value': 1},
    {'type': 'A', 'value': 2},
    {'type': 'B', 'value': 3},
]
for key, group in groupby(data, key=lambda x: x['type']):
    print(key, list(group))

4. 上下文管理器(Context Managers)

4.1 with 语句

python
# 传统方式打开文件
file = open('data.txt', 'r')
try:
    content = file.read()
finally:
    file.close()

# 使用上下文管理器
with open('data.txt', 'r') as file:
    content = file.read()
# 文件自动关闭,即使发生异常

# 多个上下文管理器
with open('input.txt') as infile, open('output.txt', 'w') as outfile:
    content = infile.read()
    outfile.write(content.upper())

4.2 自定义上下文管理器

python
# 方式1:使用类
class DatabaseConnection:
    def __init__(self, db_name):
        self.db_name = db_name
        self.connection = None
    
    def __enter__(self):
        """进入上下文时调用"""
        print(f"Connecting to {self.db_name}")
        self.connection = f"Connection to {self.db_name}"
        return self.connection
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """退出上下文时调用"""
        print(f"Closing connection to {self.db_name}")
        self.connection = None
        # 返回 True 表示已处理异常,False 表示继续传播异常
        return False

# 使用
with DatabaseConnection("mydb") as conn:
    print(f"Using {conn}")
# 自动关闭连接

# 方式2:使用 contextlib
from contextlib import contextmanager

@contextmanager
def database_connection(db_name):
    """使用生成器创建上下文管理器"""
    print(f"Connecting to {db_name}")
    connection = f"Connection to {db_name}"
    try:
        yield connection
    finally:
        print(f"Closing connection to {db_name}")

with database_connection("mydb") as conn:
    print(f"Using {conn}")

4.3 Dify 中的上下文管理器

python
# Dify 中的实际应用

# 1. 数据库会话管理
from contextlib import contextmanager
from sqlalchemy.orm import Session

@contextmanager
def get_db_session():
    """数据库会话上下文管理器"""
    session = Session()
    try:
        yield session
        session.commit()
    except Exception:
        session.rollback()
        raise
    finally:
        session.close()

# 使用
with get_db_session() as session:
    user = session.query(User).filter_by(id=1).first()

# 2. 临时修改配置
@contextmanager
def temporary_config(key, value):
    """临时修改配置"""
    old_value = config.get(key)
    config.set(key, value)
    try:
        yield
    finally:
        config.set(key, old_value)

with temporary_config('debug', True):
    # 临时启用调试模式
    do_something()
# 自动恢复原配置

# 3. 性能监控
@contextmanager
def monitor_performance(operation_name):
    """监控操作性能"""
    import time
    start = time.time()
    try:
        yield
    finally:
        duration = time.time() - start
        print(f"{operation_name} took {duration:.2f}s")

with monitor_performance("Document processing"):
    process_document()

5. 列表推导式和推导表达式

5.1 列表推导式

python
# 基本形式
squares = [x**2 for x in range(10)]

# 带条件
evens = [x for x in range(20) if x % 2 == 0]

# 嵌套循环
pairs = [(x, y) for x in range(3) for y in range(3)]
# [(0,0), (0,1), (0,2), (1,0), (1,1), (1,2), (2,0), (2,1), (2,2)]

# 复杂表达式
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = [num for row in matrix for num in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]

# if-else 在推导式中
result = [x if x > 0 else 0 for x in [-1, 2, -3, 4]]
# [0, 2, 0, 4]

5.2 字典推导式

python
# 创建字典
squares = {x: x**2 for x in range(5)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

# 反转字典
original = {'a': 1, 'b': 2, 'c': 3}
reversed_dict = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b', 3: 'c'}

# 筛选字典
users = {
    'alice': {'age': 25, 'active': True},
    'bob': {'age': 30, 'active': False},
    'charlie': {'age': 35, 'active': True}
}
active_users = {
    name: info for name, info in users.items()
    if info['active']
}

5.3 集合推导式

python
# 创建集合
unique_squares = {x**2 for x in [-2, -1, 0, 1, 2]}
# {0, 1, 4}

# 字符串处理
text = "Hello World"
unique_chars = {char.lower() for char in text if char.isalpha()}
# {'d', 'e', 'h', 'l', 'o', 'r', 'w'}

5.4 Dify 中的推导式应用

python
# 1. 提取模型ID
models = [
    {'id': 'gpt-4', 'provider': 'openai'},
    {'id': 'claude-3', 'provider': 'anthropic'},
]
model_ids = [model['id'] for model in models]

# 2. 构建配置字典
config_keys = ['api_key', 'endpoint', 'timeout']
default_values = [None, 'https://api.example.com', 30]
config = {k: v for k, v in zip(config_keys, default_values)}

# 3. 过滤有效文档
documents = [doc for doc in all_documents if doc.is_valid()]

# 4. 转换数据格式
messages = [
    {'role': 'user', 'content': msg.content}
    for msg in conversation.messages
    if msg.role in ['user', 'assistant']
]

6. 类型提示(Type Hints)

6.1 基础类型提示

python
from typing import List, Dict, Tuple, Optional, Union, Any

# 函数参数和返回值类型
def greet(name: str, age: int) -> str:
    return f"{name} is {age} years old"

# 集合类型
def process_items(items: List[str]) -> Dict[str, int]:
    return {item: len(item) for item in items}

# Optional 表示可以是 None
def find_user(user_id: int) -> Optional[Dict[str, Any]]:
    # 可能返回字典或 None
    return None

# Union 表示多种可能的类型
def parse_value(value: Union[str, int, float]) -> float:
    return float(value)

# 元组类型
def get_coordinates() -> Tuple[float, float]:
    return (10.5, 20.3)

6.2 高级类型提示

python
from typing import TypeVar, Generic, Callable, Protocol

# 泛型类型
T = TypeVar('T')

def first_element(items: List[T]) -> Optional[T]:
    return items[0] if items else None

# 泛型类
class Stack(Generic[T]):
    def __init__(self) -> None:
        self.items: List[T] = []
    
    def push(self, item: T) -> None:
        self.items.append(item)
    
    def pop(self) -> T:
        return self.items.pop()

# 可调用类型
def apply_function(func: Callable[[int, int], int], a: int, b: int) -> int:
    return func(a, b)

# Protocol(结构子类型)
class Drawable(Protocol):
    def draw(self) -> None: ...

def render(obj: Drawable) -> None:
    obj.draw()

6.3 Dify 中的类型提示

python
# Dify 实际代码示例
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field

class ModelConfig(BaseModel):
    """模型配置"""
    provider: str = Field(..., description="模型供应商")
    model_name: str = Field(..., description="模型名称")
    api_key: Optional[str] = Field(None, description="API密钥")
    parameters: Dict[str, Any] = Field(default_factory=dict)

class Message(BaseModel):
    """消息"""
    role: str
    content: str
    metadata: Optional[Dict[str, Any]] = None

def invoke_model(
    config: ModelConfig,
    messages: List[Message],
    stream: bool = False
) -> Union[str, Generator[str, None, None]]:
    """调用模型"""
    pass

7. 魔术方法(Magic Methods)

7.1 常用魔术方法

python
class Vector:
    """向量类,演示魔术方法"""
    
    def __init__(self, x, y):
        """构造函数"""
        self.x = x
        self.y = y
    
    def __repr__(self):
        """开发者友好的表示"""
        return f"Vector({self.x}, {self.y})"
    
    def __str__(self):
        """用户友好的表示"""
        return f"({self.x}, {self.y})"
    
    def __add__(self, other):
        """加法:v1 + v2"""
        return Vector(self.x + other.x, self.y + other.y)
    
    def __mul__(self, scalar):
        """标量乘法:v * 2"""
        return Vector(self.x * scalar, self.y * scalar)
    
    def __eq__(self, other):
        """相等比较:v1 == v2"""
        return self.x == other.x and self.y == other.y
    
    def __len__(self):
        """长度"""
        return int((self.x**2 + self.y**2)**0.5)
    
    def __getitem__(self, index):
        """索引访问:v[0]"""
        if index == 0:
            return self.x
        elif index == 1:
            return self.y
        raise IndexError("Index out of range")
    
    def __call__(self):
        """可调用对象:v()"""
        return (self.x, self.y)

# 使用
v1 = Vector(1, 2)
v2 = Vector(3, 4)

print(v1 + v2)      # Vector(4, 6)
print(v1 * 2)       # Vector(2, 4)
print(v1 == v2)     # False
print(v1[0])        # 1
print(v1())         # (1, 2)

7.2 容器魔术方法

python
class CustomList:
    """自定义列表类"""
    
    def __init__(self, items=None):
        self.items = items or []
    
    def __len__(self):
        return len(self.items)
    
    def __getitem__(self, index):
        return self.items[index]
    
    def __setitem__(self, index, value):
        self.items[index] = value
    
    def __delitem__(self, index):
        del self.items[index]
    
    def __contains__(self, item):
        """in 运算符"""
        return item in self.items
    
    def __iter__(self):
        """迭代"""
        return iter(self.items)

# 使用
lst = CustomList([1, 2, 3])
print(len(lst))      # 3
print(lst[0])        # 1
print(2 in lst)      # True
for item in lst:
    print(item)

8. 实践项目

项目 1:分析 Dify 装饰器

python
# 在 Dify 源码中找到并分析以下装饰器:
# 1. @setup_required
# 2. @validate_args
# 3. @cloud_edition_billing_resource_check

# 参考文件:
# - api/controllers/console/auth/login.py
# - api/libs/login.py

项目 2:实现自定义装饰器

python
# 实现以下装饰器:
# 1. 重试装饰器(失败时自动重试)
# 2. 超时装饰器(超时则抛出异常)
# 3. 日志装饰器(记录函数调用和参数)

import time
import functools

def retry(max_attempts=3, delay=1):
    """重试装饰器"""
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            for attempt in range(max_attempts):
                try:
                    return func(*args, **kwargs)
                except Exception as e:
                    if attempt == max_attempts - 1:
                        raise
                    time.sleep(delay)
        return wrapper
    return decorator

@retry(max_attempts=3, delay=2)
def fetch_data():
    # 模拟不稳定的网络请求
    import random
    if random.random() < 0.7:
        raise Exception("Network error")
    return "Data"

项目 3:创建上下文管理器

python
# 创建一个性能监控上下文管理器
# 要求:
# 1. 记录代码块的执行时间
# 2. 记录内存使用情况
# 3. 如果执行时间超过阈值,发出警告

import time
import psutil
from contextlib import contextmanager

@contextmanager
def performance_monitor(name, threshold=1.0):
    """性能监控上下文管理器"""
    # 实现代码...
    pass

9. 思考题

  1. 装饰器和 AOP(面向切面编程)有什么关系?
  2. 生成器和迭代器的区别是什么?什么时候用生成器?
  3. 为什么要使用上下文管理器?它解决了什么问题?
  4. 列表推导式和 map/filter 哪个更 Pythonic?
  5. 类型提示(Type Hints)在运行时会影响程序性能吗?

10. 检查清单

  • [ ] 能够编写和使用装饰器
  • [ ] 理解生成器的工作原理
  • [ ] 能够创建自定义上下文管理器
  • [ ] 熟练使用各种推导式
  • [ ] 理解并使用类型提示
  • [ ] 掌握常用的魔术方法

11. 下一步

12. 参考资源