实用模块-Python篇

算法竞赛中常以极端数据和数据量检验代码质量,一般写法很容易TLE或MLE,而Python标准库提供了很多功能强大的函数,我们可以用以下方式优化代码的时空复杂度:

输入

快速读取

import sys
data_1 = sys.stdin.read() #一次性读取所有数据,返回字符串
data_2 = sys.stdin.readline() #读取一行,返回以'\n'结尾的字符串
data_3 = sys.stdin.readlines() #读取所有数据,按行分割,返回以'\n'结尾的字符串列表

列表推导式写法:

1	`data = [line for line in sys.stdin] #与data_3完全等价`

记忆递归

functools模块的lru_cache装饰器可实现记忆缓存功能,免去函数递归调用时的大量重复计算
以简单的阶乘和斐波那契数列为例:

from functools import lru_cache

@lru_cache(maxsize = None)
def fact(n):
    if n == 1:
        return 1
    else:
        return n*fact(n-1)

@lru_cache(maxsize = None)
def fib(n):
    if n <= 2:
        return 1
    else:
        return f(n-1) + f(n-2)

排列组合

itertools模块中的函数能高效实现为可迭代对象生成全排列/组合功能:

import itertools

data = [1,2,3]
perm_data = itertools.permutations(data,r = len(data)) #返回可迭代对象,可遍历取出所有排列,元组形式,r参数可指定长度

comb_data = itertools.combinations(data,2) #元组形式返回所有无重复组合

comb_with_rep_data = itertools.combinations_with_replacement(data,a) #元组形式返回所有有重复组合

无限迭代

itertools模块中含有多个无限迭代器:

import itertools

#无限自增
for i in itertools.count(1,2): #(起点,步长)
    print(i)  #1 3 5 7

#循环
for i in itertools.cycle([1,2,3,4]):
    print(i)  #1 2 3 4 1 2 3 4...

#重复
for i in itertools.repeat(1,4):
    print(i) #1,1,1,1

数学操作

math库作为Python数学专用标准库,集成了大量数学操作函数:

import math

math.inf #无穷大值
math.pi #圆周率
math.sqrt(x) #x的平方根
math.pow(a,b) #a的b次幂
math.ceil(x) #x向上取整
math.floor(x) #x向下取整
math.gcd(m,n) #最大公约数
math.factorial(n) #n的阶乘

✅ 注意：math.gcd() 只返回非负数，且只支持两个数。

collections库

collections库提供了多个高效容器,在算法题中常用于计数、建图、队列和分组统计等场景。

Counter可以快速统计可迭代对象中每个元素出现的次数,访问不存在的键返回0而非KeyError:

from collections import Counter

nums = [1, 2, 2, 3, 3, 3]
cnt = Counter(nums)

print(cnt[1])            # 1
print(cnt[3])            # 3
print(cnt[4])            # 0
print(cnt.most_common(2))# [ (3,3), (2,2) ]

常用操作:

from collections import Counter

s = "abacaba"
cnt = Counter(s)
cnt["a"] += 1
cnt["b"] -= 1

elements = list(cnt.elements())

两个Counter之间支持加减运算:

from collections import Counter

a = Counter("aab")
b = Counter("abb")

print(a + b)  # Counter({'a':3, 'b':3})
print(a - b)  # Counter({'a':1})
print(a & b)  # Counter({'a':1, 'b':1})
print(a | b)  # Counter({'a':2, 'b':2})

✅ 判断两个字符串是否为字母异位词,直接用Counter(a) == Counter(b)即可。

defaultdict可以为不存在的键自动创建默认值,常用于邻接表建图和分组:

from collections import defaultdict

graph = defaultdict(list)
edges = [(1, 2), (1, 3), (2, 4)]

for u, v in edges:
    graph[u].append(v)
    graph[v].append(u)

print(graph[1])  # [2, 3]
print(graph[5])  # []

用于计数:

from collections import defaultdict

cnt = defaultdict(int)
nums = [1, 2, 2, 3, 3, 3]
for x in nums:
    cnt[x] += 1

print(cnt)  # defaultdict(<class 'int'>, {1: 1, 2: 2, 3: 3})

用于分组:

from collections import defaultdict

groups = defaultdict(list)
words = ["apple", "ant", "banana", "boat"]

for word in words:
    groups[word[0]].append(word)

print(groups)

deque是双端队列,两端增删均为O(1),适合BFS、滑动窗口等场景:

from collections import deque

q = deque()
q.append(1)
q.append(2)
q.appendleft(0)

print(q.popleft())  # 0
print(q.pop())      # 2

BFS广度优先搜索模板:

from collections import deque

def bfs(start, graph):
    q = deque([start])
    vis = {start}

    while q:
        u = q.popleft()
        for v in graph[u]:
            if v not in vis:
                vis.add(v)
                q.append(v)

带层数统计的BFS:

from collections import deque

def bfs_level(start, graph):
    q = deque([start])
    dist = {start: 0}

    while q:
        u = q.popleft()
        for v in graph[u]:
            if v not in dist:
                dist[v] = dist[u] + 1
                q.append(v)
    return dist

固定长度队列（自动淘汰旧元素）:

from collections import deque

q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
q.append(4)  # 1被自动挤出

print(q)  # deque([2, 3, 4])

✅ list在头部插入和删除是O(n),deque两端操作均为O(1),队列类问题优先考虑deque。

OrderedDict在需要调整键值位置时比普通dict更方便:

from collections import OrderedDict

od = OrderedDict()
od["a"] = 1
od["b"] = 2
od["c"] = 3

od.move_to_end("a")            # 将"a"移到末尾
print(od)                      # OrderedDict([('b',2), ('c',3), ('a',1)])

od.popitem(last=True)          # LIFO弹出
od.popitem(last=False)         # FIFO弹出

namedtuple可以创建轻量级的不可变数据对象,支持下标和属性两种访问方式:

from collections import namedtuple

Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)

print(p.x)   # 3
print(p[0])  # 3

Edge = namedtuple("Edge", ["u", "v", "w"])
e = Edge(1, 2, 5)
print(e.u, e.v, e.w)  # 1 2 5

heapq库

heapq实现了最小堆（优先队列）,可在O(log n)时间内插入和弹出最小值:

import heapq

heap = []
heapq.heappush(heap, 5)
heapq.heappush(heap, 1)
heapq.heappush(heap, 3)
heapq.heappush(heap, 7)

print(heapq.heappop(heap))  # 1
print(heapq.heappop(heap))  # 3

查看堆顶元素:

import heapq

heap = [3, 1, 5, 2]
heapq.heapify(heap)          # O(n)原地建堆

print(heap[0])               # 1

Python只有最小堆,要模拟最大堆可以把值取负存入:

import heapq

nums = [3, 1, 5, 2, 4]
max_heap = []

for x in nums:
    heapq.heappush(max_heap, -x)

print(-heapq.heappop(max_heap))  # 5
print(-heapq.heappop(max_heap))  # 4

取最大/最小的K个元素:

import heapq

nums = [3, 1, 5, 2, 4, 7, 6]

print(heapq.nlargest(3, nums))  # [7, 6, 5]
print(heapq.nsmallest(3, nums)) # [1, 2, 3]

堆排序:

import heapq

def heap_sort(arr):
    heap = arr[:]
    heapq.heapify(heap)
    return [heapq.heappop(heap) for _ in range(len(heap))]

print(heap_sort([3, 1, 4, 1, 5, 9]))  # [1, 1, 3, 4, 5, 9]

合并多个有序序列:

import heapq

a = [1, 4, 7]
b = [2, 5, 8]
c = [3, 6, 9]

merged = heapq.merge(a, b, c)
print(list(merged))  # [1, 2, 3, 4, 5, 6, 7, 8, 9]

✅ heapq默认是最小堆,需要最大堆时取负数入堆;nlargest/nsmallest适合K较小时的TopK问题。