Python 高级特性
深入学习装饰器、生成器、上下文管理器等 Python 高级特性
📖 学习目标
- 掌握装饰器的原理和使用
- 理解生成器和迭代器
- 学会使用上下文管理器
- 熟练使用列表推导式和生成器表达式
- 理解 Python 的类型系统
- 掌握魔术方法的使用
预计学习时间:3-4 天
难度:⭐⭐⭐
1. 装饰器(Decorators)
装饰器是 Python 的一个强大特性,用于在不修改原函数的情况下扩展其功能。
1.1 函数装饰器基础
python
# 最简单的装饰器
def my_decorator(func):
def wrapper(*args, **kwargs):
print("Before function")
result = func(*args, **kwargs)
print("After function")
return result
return wrapper
# 使用装饰器
@my_decorator
def say_hello(name):
print(f"Hello, {name}!")
return f"Greeted {name}"
# 等价于:
# say_hello = my_decorator(say_hello)
result = say_hello("Alice")
# 输出:
# Before function
# Hello, Alice!
# After function1.2 保留函数元数据
python
from functools import wraps
def my_decorator(func):
@wraps(func) # 保留原函数的元数据
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
@my_decorator
def greet(name):
"""问候函数"""
return f"Hello, {name}"
print(greet.__name__) # 'greet' 而非 'wrapper'
print(greet.__doc__) # '问候函数'1.3 带参数的装饰器
python
def repeat(times):
"""重复执行装饰器"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
results = []
for _ in range(times):
results.append(func(*args, **kwargs))
return results
return wrapper
return decorator
@repeat(times=3)
def say_hello():
print("Hello!")
return "Done"
result = say_hello()
# 输出 Hello! 三次
# result = ['Done', 'Done', 'Done']1.4 类装饰器
python
class CountCalls:
"""统计函数调用次数的装饰器"""
def __init__(self, func):
self.func = func
self.count = 0
def __call__(self, *args, **kwargs):
self.count += 1
print(f"Call {self.count} of {self.func.__name__}")
return self.func(*args, **kwargs)
@CountCalls
def process_data(data):
return data * 2
process_data(10) # Call 1 of process_data
process_data(20) # Call 2 of process_data
print(process_data.count) # 21.5 Dify 中的装饰器实例
python
# 来自 Dify 源码的真实例子
# 1. 登录验证装饰器
from flask import request
from functools import wraps
def login_required(func):
@wraps(func)
def decorated(*args, **kwargs):
token = request.headers.get('Authorization')
if not token:
return {'error': 'Unauthorized'}, 401
# 验证 token...
return func(*args, **kwargs)
return decorated
@login_required
def get_user_data():
return {'data': 'user info'}
# 2. 性能监控装饰器
import time
def timing(func):
@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
result = func(*args, **kwargs)
end = time.time()
print(f"{func.__name__} took {end - start:.2f}s")
return result
return wrapper
@timing
def process_document(doc):
# 处理文档...
pass
# 3. 缓存装饰器
from functools import lru_cache
@lru_cache(maxsize=128)
def expensive_computation(n):
"""使用内置的 LRU 缓存"""
return sum(range(n))2. 生成器(Generators)
生成器是一种特殊的迭代器,可以按需生成值,节省内存。
2.1 生成器函数
python
# 普通函数 vs 生成器函数
def normal_range(n):
"""普通函数:立即生成所有值"""
result = []
for i in range(n):
result.append(i)
return result
def generator_range(n):
"""生成器函数:按需生成值"""
for i in range(n):
yield i # 使用 yield 而非 return
# 使用生成器
gen = generator_range(5)
print(next(gen)) # 0
print(next(gen)) # 1
# 遍历生成器
for num in generator_range(5):
print(num)
# 内存对比
import sys
normal = normal_range(1000000)
print(sys.getsizeof(normal)) # 很大,约 8MB
gen = generator_range(1000000)
print(sys.getsizeof(gen)) # 很小,约 200 bytes2.2 生成器表达式
python
# 列表推导式
squares_list = [x**2 for x in range(10)] # 立即生成列表
# 生成器表达式(惰性求值)
squares_gen = (x**2 for x in range(10)) # 按需生成
# 生成器表达式在处理大数据时更高效
# 计算大文件的总行数
# total_lines = sum(1 for line in open('huge_file.txt'))2.3 生成器的高级用法
python
# 1. 生成器委托(yield from)
def generator1():
yield 1
yield 2
def generator2():
yield 3
yield 4
def combined_generator():
yield from generator1()
yield from generator2()
list(combined_generator()) # [1, 2, 3, 4]
# 2. 双向通信(send 方法)
def echo_generator():
while True:
received = yield
print(f"Received: {received}")
gen = echo_generator()
next(gen) # 启动生成器
gen.send("Hello") # Received: Hello
gen.send("World") # Received: World
# 3. 协程式的生成器
def moving_average():
"""计算移动平均值"""
total = 0.0
count = 0
average = None
while True:
value = yield average
total += value
count += 1
average = total / count
ma = moving_average()
next(ma) # 启动
print(ma.send(10)) # 10.0
print(ma.send(20)) # 15.0
print(ma.send(30)) # 20.02.4 Dify 中的生成器应用
python
# Dify 中处理流式响应的生成器
def stream_response(messages):
"""流式生成 LLM 响应"""
for message in messages:
# 处理消息
chunk = process_message(message)
yield chunk # 逐个返回响应块
# 文档分块生成器
def chunk_document(document, chunk_size=500):
"""将文档分成多个块"""
text = document.get_text()
for i in range(0, len(text), chunk_size):
yield text[i:i + chunk_size]
# 批量处理生成器
def batch_processor(items, batch_size=10):
"""批量处理数据"""
batch = []
for item in items:
batch.append(item)
if len(batch) == batch_size:
yield batch
batch = []
if batch: # 处理剩余项
yield batch3. 迭代器(Iterators)
3.1 迭代器协议
python
# 自定义迭代器
class Counter:
def __init__(self, start, end):
self.current = start
self.end = end
def __iter__(self):
"""返回迭代器对象本身"""
return self
def __next__(self):
"""返回下一个值"""
if self.current >= self.end:
raise StopIteration
self.current += 1
return self.current - 1
# 使用自定义迭代器
counter = Counter(0, 5)
for num in counter:
print(num) # 0, 1, 2, 3, 4
# 手动迭代
counter = Counter(0, 3)
print(next(counter)) # 0
print(next(counter)) # 1
print(next(counter)) # 2
# print(next(counter)) # StopIteration3.2 内置迭代工具
python
from itertools import *
# 1. count - 无限计数器
counter = count(start=1, step=2)
print(next(counter)) # 1
print(next(counter)) # 3
# 2. cycle - 循环迭代
colors = cycle(['red', 'green', 'blue'])
for i, color in enumerate(colors):
if i >= 6:
break
print(color) # red, green, blue, red, green, blue
# 3. chain - 连接多个可迭代对象
list(chain([1, 2], [3, 4], [5, 6])) # [1, 2, 3, 4, 5, 6]
# 4. islice - 切片迭代器
list(islice(count(), 5)) # [0, 1, 2, 3, 4]
# 5. groupby - 分组
from itertools import groupby
data = [
{'type': 'A', 'value': 1},
{'type': 'A', 'value': 2},
{'type': 'B', 'value': 3},
]
for key, group in groupby(data, key=lambda x: x['type']):
print(key, list(group))4. 上下文管理器(Context Managers)
4.1 with 语句
python
# 传统方式打开文件
file = open('data.txt', 'r')
try:
content = file.read()
finally:
file.close()
# 使用上下文管理器
with open('data.txt', 'r') as file:
content = file.read()
# 文件自动关闭,即使发生异常
# 多个上下文管理器
with open('input.txt') as infile, open('output.txt', 'w') as outfile:
content = infile.read()
outfile.write(content.upper())4.2 自定义上下文管理器
python
# 方式1:使用类
class DatabaseConnection:
def __init__(self, db_name):
self.db_name = db_name
self.connection = None
def __enter__(self):
"""进入上下文时调用"""
print(f"Connecting to {self.db_name}")
self.connection = f"Connection to {self.db_name}"
return self.connection
def __exit__(self, exc_type, exc_val, exc_tb):
"""退出上下文时调用"""
print(f"Closing connection to {self.db_name}")
self.connection = None
# 返回 True 表示已处理异常,False 表示继续传播异常
return False
# 使用
with DatabaseConnection("mydb") as conn:
print(f"Using {conn}")
# 自动关闭连接
# 方式2:使用 contextlib
from contextlib import contextmanager
@contextmanager
def database_connection(db_name):
"""使用生成器创建上下文管理器"""
print(f"Connecting to {db_name}")
connection = f"Connection to {db_name}"
try:
yield connection
finally:
print(f"Closing connection to {db_name}")
with database_connection("mydb") as conn:
print(f"Using {conn}")4.3 Dify 中的上下文管理器
python
# Dify 中的实际应用
# 1. 数据库会话管理
from contextlib import contextmanager
from sqlalchemy.orm import Session
@contextmanager
def get_db_session():
"""数据库会话上下文管理器"""
session = Session()
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
# 使用
with get_db_session() as session:
user = session.query(User).filter_by(id=1).first()
# 2. 临时修改配置
@contextmanager
def temporary_config(key, value):
"""临时修改配置"""
old_value = config.get(key)
config.set(key, value)
try:
yield
finally:
config.set(key, old_value)
with temporary_config('debug', True):
# 临时启用调试模式
do_something()
# 自动恢复原配置
# 3. 性能监控
@contextmanager
def monitor_performance(operation_name):
"""监控操作性能"""
import time
start = time.time()
try:
yield
finally:
duration = time.time() - start
print(f"{operation_name} took {duration:.2f}s")
with monitor_performance("Document processing"):
process_document()5. 列表推导式和推导表达式
5.1 列表推导式
python
# 基本形式
squares = [x**2 for x in range(10)]
# 带条件
evens = [x for x in range(20) if x % 2 == 0]
# 嵌套循环
pairs = [(x, y) for x in range(3) for y in range(3)]
# [(0,0), (0,1), (0,2), (1,0), (1,1), (1,2), (2,0), (2,1), (2,2)]
# 复杂表达式
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = [num for row in matrix for num in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]
# if-else 在推导式中
result = [x if x > 0 else 0 for x in [-1, 2, -3, 4]]
# [0, 2, 0, 4]5.2 字典推导式
python
# 创建字典
squares = {x: x**2 for x in range(5)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# 反转字典
original = {'a': 1, 'b': 2, 'c': 3}
reversed_dict = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b', 3: 'c'}
# 筛选字典
users = {
'alice': {'age': 25, 'active': True},
'bob': {'age': 30, 'active': False},
'charlie': {'age': 35, 'active': True}
}
active_users = {
name: info for name, info in users.items()
if info['active']
}5.3 集合推导式
python
# 创建集合
unique_squares = {x**2 for x in [-2, -1, 0, 1, 2]}
# {0, 1, 4}
# 字符串处理
text = "Hello World"
unique_chars = {char.lower() for char in text if char.isalpha()}
# {'d', 'e', 'h', 'l', 'o', 'r', 'w'}5.4 Dify 中的推导式应用
python
# 1. 提取模型ID
models = [
{'id': 'gpt-4', 'provider': 'openai'},
{'id': 'claude-3', 'provider': 'anthropic'},
]
model_ids = [model['id'] for model in models]
# 2. 构建配置字典
config_keys = ['api_key', 'endpoint', 'timeout']
default_values = [None, 'https://api.example.com', 30]
config = {k: v for k, v in zip(config_keys, default_values)}
# 3. 过滤有效文档
documents = [doc for doc in all_documents if doc.is_valid()]
# 4. 转换数据格式
messages = [
{'role': 'user', 'content': msg.content}
for msg in conversation.messages
if msg.role in ['user', 'assistant']
]6. 类型提示(Type Hints)
6.1 基础类型提示
python
from typing import List, Dict, Tuple, Optional, Union, Any
# 函数参数和返回值类型
def greet(name: str, age: int) -> str:
return f"{name} is {age} years old"
# 集合类型
def process_items(items: List[str]) -> Dict[str, int]:
return {item: len(item) for item in items}
# Optional 表示可以是 None
def find_user(user_id: int) -> Optional[Dict[str, Any]]:
# 可能返回字典或 None
return None
# Union 表示多种可能的类型
def parse_value(value: Union[str, int, float]) -> float:
return float(value)
# 元组类型
def get_coordinates() -> Tuple[float, float]:
return (10.5, 20.3)6.2 高级类型提示
python
from typing import TypeVar, Generic, Callable, Protocol
# 泛型类型
T = TypeVar('T')
def first_element(items: List[T]) -> Optional[T]:
return items[0] if items else None
# 泛型类
class Stack(Generic[T]):
def __init__(self) -> None:
self.items: List[T] = []
def push(self, item: T) -> None:
self.items.append(item)
def pop(self) -> T:
return self.items.pop()
# 可调用类型
def apply_function(func: Callable[[int, int], int], a: int, b: int) -> int:
return func(a, b)
# Protocol(结构子类型)
class Drawable(Protocol):
def draw(self) -> None: ...
def render(obj: Drawable) -> None:
obj.draw()6.3 Dify 中的类型提示
python
# Dify 实际代码示例
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field
class ModelConfig(BaseModel):
"""模型配置"""
provider: str = Field(..., description="模型供应商")
model_name: str = Field(..., description="模型名称")
api_key: Optional[str] = Field(None, description="API密钥")
parameters: Dict[str, Any] = Field(default_factory=dict)
class Message(BaseModel):
"""消息"""
role: str
content: str
metadata: Optional[Dict[str, Any]] = None
def invoke_model(
config: ModelConfig,
messages: List[Message],
stream: bool = False
) -> Union[str, Generator[str, None, None]]:
"""调用模型"""
pass7. 魔术方法(Magic Methods)
7.1 常用魔术方法
python
class Vector:
"""向量类,演示魔术方法"""
def __init__(self, x, y):
"""构造函数"""
self.x = x
self.y = y
def __repr__(self):
"""开发者友好的表示"""
return f"Vector({self.x}, {self.y})"
def __str__(self):
"""用户友好的表示"""
return f"({self.x}, {self.y})"
def __add__(self, other):
"""加法:v1 + v2"""
return Vector(self.x + other.x, self.y + other.y)
def __mul__(self, scalar):
"""标量乘法:v * 2"""
return Vector(self.x * scalar, self.y * scalar)
def __eq__(self, other):
"""相等比较:v1 == v2"""
return self.x == other.x and self.y == other.y
def __len__(self):
"""长度"""
return int((self.x**2 + self.y**2)**0.5)
def __getitem__(self, index):
"""索引访问:v[0]"""
if index == 0:
return self.x
elif index == 1:
return self.y
raise IndexError("Index out of range")
def __call__(self):
"""可调用对象:v()"""
return (self.x, self.y)
# 使用
v1 = Vector(1, 2)
v2 = Vector(3, 4)
print(v1 + v2) # Vector(4, 6)
print(v1 * 2) # Vector(2, 4)
print(v1 == v2) # False
print(v1[0]) # 1
print(v1()) # (1, 2)7.2 容器魔术方法
python
class CustomList:
"""自定义列表类"""
def __init__(self, items=None):
self.items = items or []
def __len__(self):
return len(self.items)
def __getitem__(self, index):
return self.items[index]
def __setitem__(self, index, value):
self.items[index] = value
def __delitem__(self, index):
del self.items[index]
def __contains__(self, item):
"""in 运算符"""
return item in self.items
def __iter__(self):
"""迭代"""
return iter(self.items)
# 使用
lst = CustomList([1, 2, 3])
print(len(lst)) # 3
print(lst[0]) # 1
print(2 in lst) # True
for item in lst:
print(item)8. 实践项目
项目 1:分析 Dify 装饰器
python
# 在 Dify 源码中找到并分析以下装饰器:
# 1. @setup_required
# 2. @validate_args
# 3. @cloud_edition_billing_resource_check
# 参考文件:
# - api/controllers/console/auth/login.py
# - api/libs/login.py项目 2:实现自定义装饰器
python
# 实现以下装饰器:
# 1. 重试装饰器(失败时自动重试)
# 2. 超时装饰器(超时则抛出异常)
# 3. 日志装饰器(记录函数调用和参数)
import time
import functools
def retry(max_attempts=3, delay=1):
"""重试装饰器"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_attempts - 1:
raise
time.sleep(delay)
return wrapper
return decorator
@retry(max_attempts=3, delay=2)
def fetch_data():
# 模拟不稳定的网络请求
import random
if random.random() < 0.7:
raise Exception("Network error")
return "Data"项目 3:创建上下文管理器
python
# 创建一个性能监控上下文管理器
# 要求:
# 1. 记录代码块的执行时间
# 2. 记录内存使用情况
# 3. 如果执行时间超过阈值,发出警告
import time
import psutil
from contextlib import contextmanager
@contextmanager
def performance_monitor(name, threshold=1.0):
"""性能监控上下文管理器"""
# 实现代码...
pass9. 思考题
- 装饰器和 AOP(面向切面编程)有什么关系?
- 生成器和迭代器的区别是什么?什么时候用生成器?
- 为什么要使用上下文管理器?它解决了什么问题?
- 列表推导式和 map/filter 哪个更 Pythonic?
- 类型提示(Type Hints)在运行时会影响程序性能吗?
10. 检查清单
- [ ] 能够编写和使用装饰器
- [ ] 理解生成器的工作原理
- [ ] 能够创建自定义上下文管理器
- [ ] 熟练使用各种推导式
- [ ] 理解并使用类型提示
- [ ] 掌握常用的魔术方法