深度学习有用的库以及介绍

在看其他源代码时以及自己写代码时可能用到的有用的库以及一些常用写法.

python自带的库

itertools

用于迭代器的工具itertools —- 为高效循环而创建迭代器的函数 — Python 3.12.2 文档

print(list(itertools.chain([1, 23], [3, 1])))
for i in itertools.count(10, 2):
    if i > 20:
        break
    print(i, end=' ')
for item in itertools.cycle(['a', 'b', 'c']):
    if input('Press enter to continue (q to quit): ').strip().lower() == 'q':
        break
    print(item)
print(list(itertools.repeat('hello', 5)))
print(list(itertools.chain([1, 2, 3], [4, 5, 6], [7, 8])))
print(list(itertools.compress('ABCDEF', [1, 0, 1, 0, 1, 1])))
print(list(itertools.filterfalse(lambda x: x % 2 == 0, range(10))))
print(list(itertools.combinations('ABCD', 2)))

functools

10.2. functools — Higher-order functions and operations on callable objects — Python 3.6.3 documentation用于处理函数的工具

from functools import partial

def power(base, exponent):
    return base ** exponent

square = partial(power, exponent=2)
cube = partial(power, exponent=3)

print(square(2))  # 输出: 4
print(cube(2))    # 输出: 8
from functools import lru_cache

@lru_cache(maxsize=128)
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

print(fibonacci(30))  # 计算速度快，因为使用了缓存
from functools import reduce

result = reduce(lambda x, y: x + y, [1, 2, 3, 4])
print(result)  # 输出: 10
from functools import wraps

def my_decorator(f):
    @wraps(f)
    def wrapper(*args, **kwargs):
        print("Calling decorated function")
        return f(*args, **kwargs)
    return wrapper

@my_decorator
def example():
    """Docstring for example."""
    print("Called example function")

example()
# Calling decorated function
# Called example function
print(example.__name__)  # 输出: example
print(example.__doc__)   # 输出: Docstring for example.

collections

collections —- 容器数据类型 — Python 3.12.6 文档集合工具

from collections import namedtuple

Point = namedtuple('Point', ['x', 'y'])
p = Point(11, y=22)
print(p.x, p.y)  # 输出: 11 22
print(p)         # 输出: Point(x=11, y=22)

from collections import deque

d = deque([1, 2, 3])
d.append(4)
d.appendleft(0)
print(d)  # 输出: deque([0, 1, 2, 3, 4])

d.pop()
d.popleft()
print(d)  # 输出: deque([1, 2, 3])

from collections import Counter

c = Counter('abracadabra')
print(c)  # 输出: Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

# 常用方法
print(c.most_common(2))  # 输出: [('a', 5), ('b', 2)]
print(list(c.elements()))  # 输出: ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'r', 'r', 'c', 'd']
from collections import OrderedDict

d = OrderedDict()
d['a'] = 1
d['b'] = 2
d['c'] = 3
print(d)  # 输出: OrderedDict([('a', 1), ('b', 2), ('c', 3)])

# 移动键到末尾
d.move_to_end('b', last=True)
print(d)  # 输出: OrderedDict([('a', 1), ('c', 3), ('b', 2)])
from collections import defaultdict

d = defaultdict(int)
d['a'] += 1
d['b'] += 2
print(d)  # 输出: defaultdict(<class 'int'>, {'a': 1, 'b': 2})

# 使用列表作为默认值
d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
print(d)  # 输出: defaultdict(<class 'list'>, {'a': [1, 2]})
from collections import ChainMap

dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
chain = ChainMap(dict1, dict2)
print(chain['a'])  # 输出: 1
print(chain['b'])  # 输出: 2 （优先查找第一个字典）
print(chain['c'])  # 输出: 4

深度学习库

timm

timm (huggingface.co)提供许多现有的图像深度学习模型.

import timm 
import torch

model = timm.create_model('resnet34')
x     = torch.randn(1, 3, 224, 224)
model(x).shape

model_names = timm.list_models(pretrained=True)
pprint(model_names)

torchvision也提供了很多模型

einops✨✨✨

arogozhnikov/einops: Flexible and powerful tensor operations for readable and reliable code (for pytorch, jax, TF and others) (github.com)

from einops import rearrange, reduce, repeat
# rearrange elements according to the pattern
output_tensor = rearrange(input_tensor, 't b c -> b c t')
# combine rearrangement and reduction
output_tensor = reduce(input_tensor, 'b c (h h2) (w w2) -> b h w c', 'mean', h2=2, w2=2)
# copy along a new axis
output_tensor = repeat(input_tensor, 'h w -> h w c', c=3)

使用einops原因

1 2	y = x.view(x.shape[0], -1) y = rearrange(x, 'b c h w -> b (c h w)')

虽然这两行在某些上下文中执行相同的任务,但第二行提供有关输入和输出的信息。换句话说,einops关注的是接口:输入和输出是什么,而不是如何计算输出

einx✨✨✨

fferflo/einx: Universal Tensor Operations in Einstein-Inspired Notation for Python.

einx是一个Python库，它提供了一个通用接口来在Numpy， PyTorch， Jax和Tensorflow等框架中制定张量操作

提供一组类似numpy命名的基本张量操作：einx.{sum|max|where|add|dot|flip|get_at|…}
使用einx符号来表示基本运算的向量化。Einx表示法受到einops的启发，但引入了一些新的概念，如[]括号表示法和完全可组合性，允许将其用作张量操作的通用语言

import einx
x = {np.asarray|torch.as_tensor|jnp.asarray|...}(...) # Create some tensor

einx.sum("a [b]", x)                              # Sum-reduction along second axis
einx.flip("... (g [c])", x, c=2)                  # Flip pairs of values along the last axis
einx.mean("b [...] c", x)                         # Spatial mean-pooling
einx.multiply("a..., b... -> (a b)...", x, y)     # Kronecker product
einx.sum("b (s [ds])... c", x, ds=(2, 2))         # Sum-pooling with 2x2 kernel
einx.add("a, b -> a b", x, y)                     # Outer sum
einx.dot("a [b], [b] c -> a c", x, y)             # Matmul

einx.get_at("b [h w] c, b i [2] -> b i c", x, y)  # Gather values at coordinates

einx.rearrange("b (q + k) -> b q, b k", x, q=2)   # Split
einx.rearrange("b c, 1 -> b (c + 1)", x, [42])    # Append number to each channel

                                                  # Apply custom operations:
einx.vmap("b [s...] c -> b c", x, op=np.mean)     # Spatial mean-pooling
einx.vmap("a [b], [b] c -> a c", x, y, op=np.dot) # Matmul

from torch import einsum

torch.einsum — PyTorch 2.4 documentation

沿着使用基于einsum约定的符号指定的维度,对输入操作数元素的乘积求和

Einsum允许计算许多常见的多维线性代数数组操作,通过基于einsum约定的速记格式表示它们,由方程给出.一般的思想是用一些下标标记输入操作数的每个维度,并定义哪些下标是输出的一部分.然后通过将操作数元素沿着下标不属于输出的维度的乘积相加来计算输出

# trace
torch.einsum('ii', torch.randn(4, 4))

# diagonal
torch.einsum('ii->i', torch.randn(4, 4))

# outer product
x = torch.randn(5)
y = torch.randn(4)
torch.einsum('i,j->ij', x, y)

# batch matrix multiplication
As = torch.randn(3, 2, 5)
Bs = torch.randn(3, 5, 4)
torch.einsum('bij,bjk->bik', As, Bs)

transformers

🤗 Transformers (huggingface.co) 下载SOTA模型,能根据任务或者直接指定模型下载

from transformers import pipeline

transcriber = pipeline(task="automatic-speech-recognition")

from transformers import Swinv2Config, Swinv2Model

# Initializing a Swinv2 microsoft/swinv2-tiny-patch4-window8-256 style configuration
configuration = Swinv2Config()

# Initializing a model (with random weights) from the microsoft/swinv2-tiny-patch4-window8-256 style configuration
model = Swinv2Model(configuration)

# Accessing the model configuration
configuration = model.config

accelerate

Accelerate (huggingface.co)

Accelerate是一个库，通过添加四行代码，可以使相同的PyTorch代码在任何分布式配置中运行!简而言之，大规模的训练和推理变得简单、高效和适应性强。

+ from accelerate import Accelerator
+ accelerator = Accelerator()

+ model, optimizer, training_dataloader, scheduler = accelerator.prepare(
+     model, optimizer, training_dataloader, scheduler
+ )

  for batch in training_dataloader:
      optimizer.zero_grad()
      inputs, targets = batch
      inputs = inputs.to(device)
      targets = targets.to(device)
      outputs = model(inputs)
      loss = loss_function(outputs, targets)
+     accelerator.backward(loss)
      optimizer.step()
      scheduler.step()

一些常用代码

seed = 2024
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

import math

import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import LambdaLR


def get_cosine_schedule_with_warmup(
	optimizer: Optimizer,
	num_warmup_steps: int,
	num_training_steps: int,
	num_cycles: float = 0.5,
	last_epoch: int = -1,
):
	"""
	Create a schedule with a learning rate that decreases following the values of the cosine function between the
	initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
	initial lr set in the optimizer.

	Args:
		optimizer (:class:`~torch.optim.Optimizer`):
		The optimizer for which to schedule the learning rate.
		num_warmup_steps (:obj:`int`):
		The number of steps for the warmup phase.
		num_training_steps (:obj:`int`):
		The total number of training steps.
		num_cycles (:obj:`float`, `optional`, defaults to 0.5):
		The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
		following a half-cosine).
		last_epoch (:obj:`int`, `optional`, defaults to -1):
		The index of the last epoch when resuming training.

	Return:
		:obj:`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""
	def lr_lambda(current_step):
		# Warmup
		if current_step < num_warmup_steps:
			return float(current_step) / float(max(1, num_warmup_steps))
		# decadence
		progress = float(current_step - num_warmup_steps) / float(
			max(1, num_training_steps - num_warmup_steps)
		)
		return max(
			0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
		)

	return LambdaLR(optimizer, lr_lambda, last_epoch)

Sekyoro的博客小屋