第14章:几何验证与修复
在实际 GIS 应用中,几何数据经常由于采集误差、格式转换或编辑操作而产生无效几何体。无效几何体会导致空间运算(如交集、缓冲区)产生错误结果甚至程序崩溃。Shapely 提供了完整的几何验证与修复工具,本章将详细介绍 OGC 有效性规则、常见的无效几何场景,以及自动和手动修复策略。
OGC 几何有效性规则
有效性规则概述
OGC(Open Geospatial Consortium)定义了一套严格的几何有效性规则,不同几何类型有不同的要求:
# OGC 有效性规则汇总
rules = """
几何类型 有效性规则
─────────────────────────────────────────────────────
Point - 坐标不为 NaN
- 至少有 2 维坐标 (x, y)
LineString - 至少 2 个不同的点
- 不能自相交(is_simple 检查)
LinearRing - 首尾点相同(闭合)
- 至少 4 个坐标(含闭合点)
- 不能自相交
Polygon - 外环闭合且有效
- 外环方向一致
- 孔洞在外环内部
- 孔洞不互相重叠
- 孔洞不与外环重叠(可接触)
- 不自相交
MultiPolygon - 每个多边形独立有效
- 多边形之间不重叠(可接触)
MultiLineString - 每条线独立有效
MultiPoint - 允许重复点
"""
print(rules)
多边形有效性规则详解
from shapely import Polygon, is_valid, is_valid_reason
# 规则1:外环不能自相交
valid_poly = Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])
print(f"正常多边形有效: {is_valid(valid_poly)}")
# 规则2:孔洞必须在外环内
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
inner = [(11, 11), (12, 11), (12, 12), (11, 12)] # 孔洞在外环外
poly_bad_hole = Polygon(outer, [inner])
print(f"孔洞在外: {is_valid(poly_bad_hole)}")
print(f"原因: {is_valid_reason(poly_bad_hole)}")
# 规则3:孔洞不能重叠
hole1 = [(2, 2), (5, 2), (5, 5), (2, 5)]
hole2 = [(3, 3), (6, 3), (6, 6), (3, 6)] # 与 hole1 重叠
poly_overlap_holes = Polygon(outer, [hole1, hole2])
print(f"孔洞重叠: {is_valid(poly_overlap_holes)}")
print(f"原因: {is_valid_reason(poly_overlap_holes)}")
线串的简单性
from shapely import LineString, is_valid, is_simple
# 简单线:不自相交
simple_line = LineString([(0, 0), (5, 5), (10, 0)])
print(f"简单线 is_valid: {is_valid(simple_line)}")
print(f"简单线 is_simple: {is_simple(simple_line)}")
# 自相交的线
self_intersecting = LineString([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"\n自相交线 is_valid: {is_valid(self_intersecting)}")
print(f"自相交线 is_simple: {is_simple(self_intersecting)}")
# 注意:LineString 的 is_valid 通常为 True
# 自相交通过 is_simple 来检查
is_valid:检查有效性
基本用法
from shapely import is_valid, Point, LineString, Polygon
# 点
print(f"Point(0, 0) 有效: {is_valid(Point(0, 0))}")
# 正常线
print(f"LineString 有效: {is_valid(LineString([(0, 0), (1, 1)]))}")
# 正常多边形
print(f"Polygon 有效: {is_valid(Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]))}")
# 蝴蝶结(自相交多边形)
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"蝴蝶结 有效: {is_valid(bowtie)}")
批量验证
from shapely import is_valid, Polygon
import numpy as np
# 创建一组多边形(部分无效)
polygons = np.array([
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), # 有效
Polygon([(0, 0), (10, 10), (10, 0), (0, 10)]), # 无效(蝴蝶结)
Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), # 有效
Polygon([(0, 0), (1, 0), (0, 1), (1, 1), (0, 0)]), # 无效(自相交)
])
# 批量检查
validity = is_valid(polygons)
print(f"有效性数组: {validity}")
# 筛选无效几何
invalid_mask = ~validity
invalid_geoms = polygons[invalid_mask]
print(f"\n无效几何体数量: {len(invalid_geoms)}")
for geom in invalid_geoms:
print(f" {geom.wkt[:60]}...")
is_valid_reason:详细无效原因
基本用法
from shapely import is_valid_reason, Polygon
# 有效几何体
valid = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
print(f"有效几何: {is_valid_reason(valid)}")
# "Valid Geometry"
# 蝴蝶结
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"蝴蝶结: {is_valid_reason(bowtie)}")
# "Self-intersection[5 5]"
常见无效原因
from shapely import is_valid, is_valid_reason, Polygon, LineString
# 1. Self-intersection(自相交)
bowtie = Polygon([(0, 0), (2, 2), (2, 0), (0, 2)])
print(f"1. 自相交:")
print(f" {is_valid_reason(bowtie)}")
# 2. Ring Self-intersection(环自相交)
ring_self = Polygon([(0, 0), (5, 0), (5, 5), (3, -1), (0, 5)])
print(f"\n2. 环自相交:")
print(f" {is_valid_reason(ring_self)}")
# 3. Too few points(点太少)
# 通过 WKT 构造无效几何
from shapely import from_wkt
try:
degen = from_wkt("POLYGON ((0 0, 1 1, 0 0))")
if degen is not None:
print(f"\n3. 点太少:")
print(f" {is_valid_reason(degen)}")
except Exception as e:
print(f"\n3. 构造失败: {e}")
# 4. Hole outside shell(孔洞在外环外)
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
hole_outside = [(20, 20), (21, 20), (21, 21), (20, 21)]
poly_hole_out = Polygon(outer, [hole_outside])
print(f"\n4. 孔洞在外环外:")
print(f" {is_valid_reason(poly_hole_out)}")
# 5. Nested holes(嵌套孔洞)
hole_a = [(1, 1), (9, 1), (9, 9), (1, 9)]
hole_b = [(2, 2), (8, 2), (8, 8), (2, 8)] # 在 hole_a 内
poly_nested = Polygon(outer, [hole_a, hole_b])
print(f"\n5. 嵌套孔洞:")
print(f" {is_valid_reason(poly_nested)}")
批量获取无效原因
from shapely import is_valid, is_valid_reason, Polygon
import numpy as np
polygons = np.array([
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (2, 2), (2, 0), (0, 2)]),
Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
])
reasons = is_valid_reason(polygons)
for i, (poly, reason) in enumerate(zip(polygons, reasons)):
status = "✓" if "Valid" in reason else "✗"
print(f"[{status}] 多边形{i}: {reason}")
is_simple:简单性检查
线的简单性
from shapely import is_simple, LineString
# 简单线(不自相交)
simple = LineString([(0, 0), (1, 1), (2, 0)])
print(f"简单线: is_simple = {is_simple(simple)}")
# 非简单线(自相交 / 8 字形)
not_simple = LineString([(0, 0), (2, 2), (2, 0), (0, 2)])
print(f"自相交线: is_simple = {is_simple(not_simple)}")
# 端点重合的线(闭合线)
closed = LineString([(0, 0), (1, 0), (1, 1), (0, 0)])
print(f"闭合线: is_simple = {is_simple(closed)}")
多几何体的简单性
from shapely import is_simple, MultiPoint, MultiLineString
# MultiPoint 总是简单的(即使有重复点)
mp = MultiPoint([(0, 0), (0, 0), (1, 1)])
print(f"MultiPoint(含重复点): is_simple = {is_simple(mp)}")
# MultiLineString
ml = MultiLineString([
[(0, 0), (1, 1)],
[(0, 1), (1, 0)] # 与第一条交叉
])
print(f"交叉的 MultiLineString: is_simple = {is_simple(ml)}")
make_valid:自动修复无效几何
基本用法
from shapely import make_valid, is_valid, Polygon
# 蝴蝶结多边形(自相交)
bowtie = Polygon([(0, 0), (2, 2), (2, 0), (0, 2)])
print(f"修复前: is_valid = {is_valid(bowtie)}")
print(f"修复前: {bowtie.wkt}")
# 自动修复
fixed = make_valid(bowtie)
print(f"\n修复后: is_valid = {is_valid(fixed)}")
print(f"修复后类型: {fixed.geom_type}")
print(f"修复后: {fixed.wkt}")
修复各种无效几何
from shapely import make_valid, is_valid, is_valid_reason, Polygon
# 场景1:蝴蝶结(自相交)
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
fixed1 = make_valid(bowtie)
print(f"蝴蝶结修复:")
print(f" 类型: {bowtie.geom_type} → {fixed1.geom_type}")
print(f" 面积: {bowtie.area:.1f} → {fixed1.area:.1f}")
# 场景2:孔洞与外环重叠
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
hole = [(-2, -2), (5, -2), (5, 5), (-2, 5)] # 超出外环
poly_bad = Polygon(outer, [hole])
fixed2 = make_valid(poly_bad)
print(f"\n孔洞越界修复:")
print(f" 原因: {is_valid_reason(poly_bad)}")
print(f" 修复后类型: {fixed2.geom_type}")
print(f" 修复后有效: {is_valid(fixed2)}")
# 场景3:尖刺多边形
spike = Polygon([
(0, 0), (5, 0), (5, 10), # 正常部分
(5.01, 5), (5, 5), # 尖刺
(0, 10)
])
fixed3 = make_valid(spike)
print(f"\n尖刺多边形修复:")
print(f" 修复后有效: {is_valid(fixed3)}")
print(f" 修复后类型: {fixed3.geom_type}")
修复前后对比
from shapely import make_valid, is_valid, Polygon
# 创建一个复杂的无效多边形
invalid_poly = Polygon([
(0, 0), (10, 0), (10, 10), (5, 3),
(0, 10), (5, 7), (0, 0)
])
print("修复前后对比:")
print(f" 有效性: {is_valid(invalid_poly)}")
print(f" 面积: {invalid_poly.area:.2f}")
print(f" 类型: {invalid_poly.geom_type}")
fixed = make_valid(invalid_poly)
print(f"\n 修复后有效: {is_valid(fixed)}")
print(f" 修复后面积: {fixed.area:.2f}")
print(f" 修复后类型: {fixed.geom_type}")
# 如果修复结果是 MultiPolygon,遍历各部分
if fixed.geom_type == 'MultiPolygon':
for i, part in enumerate(fixed.geoms):
print(f" 部分{i}: 面积={part.area:.2f}")
elif fixed.geom_type == 'GeometryCollection':
for i, part in enumerate(fixed.geoms):
print(f" 部分{i}: {part.geom_type}, 面积={part.area:.2f}")
常见无效几何场景
蝴蝶结多边形(自相交)
from shapely import Polygon, is_valid, is_valid_reason, make_valid
# 蝴蝶结:两个三角形共用一个交点
bowtie = Polygon([
(0, 0), (4, 4), (4, 0), (0, 4), (0, 0)
])
print("蝴蝶结多边形:")
print(f" 有效: {is_valid(bowtie)}")
print(f" 原因: {is_valid_reason(bowtie)}")
# 修复:分割为两个三角形
fixed = make_valid(bowtie)
print(f" 修复结果: {fixed.geom_type}")
if hasattr(fixed, 'geoms'):
for i, g in enumerate(fixed.geoms):
if g.area > 0:
print(f" 部分{i}: {g.geom_type}, 面积={g.area:.2f}")
尖刺多边形
from shapely import Polygon, is_valid, make_valid
# 带有零宽度尖刺的多边形
spike_poly = Polygon([
(0, 0), (10, 0), (10, 10),
(5, 10), (5, 15), (5, 10), # 尖刺:向上延伸再返回
(0, 10), (0, 0)
])
print("尖刺多边形:")
print(f" 有效: {is_valid(spike_poly)}")
fixed = make_valid(spike_poly)
print(f" 修复后: {fixed.geom_type}")
print(f" 修复后面积: {fixed.area:.2f}")
孔洞超出外环
from shapely import Polygon, is_valid, is_valid_reason, make_valid
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
# 孔洞部分超出外环
bad_hole = [(-2, 3), (5, 3), (5, 7), (-2, 7)]
poly = Polygon(outer, [bad_hole])
print("孔洞超出外环:")
print(f" 有效: {is_valid(poly)}")
print(f" 原因: {is_valid_reason(poly)}")
fixed = make_valid(poly)
print(f" 修复后类型: {fixed.geom_type}")
print(f" 修复后有效: {is_valid(fixed)}")
重复顶点
from shapely import Polygon, is_valid, LineString
# 重复顶点通常不导致无效,但可能引起问题
poly_dup = Polygon([
(0, 0), (5, 0), (5, 0), # 重复点
(5, 5), (0, 5), (0, 0)
])
print("含重复顶点的多边形:")
print(f" 有效: {is_valid(poly_dup)}")
print(f" 顶点数: {len(poly_dup.exterior.coords)}")
# 去除重复顶点
from shapely import simplify
simplified = simplify(poly_dup, tolerance=0)
print(f" 简化后顶点数: {len(simplified.exterior.coords)}")
退化几何(零面积多边形)
from shapely import Polygon, LineString, is_valid
# 退化多边形(所有点共线)
degenerate = Polygon([(0, 0), (5, 0), (10, 0), (5, 0)])
print("退化多边形(共线):")
print(f" 有效: {is_valid(degenerate)}")
print(f" 面积: {degenerate.area}")
print(f" 空: {degenerate.is_empty}")
# 零面积多边形
zero_area = Polygon([(0, 0), (1, 0), (1, 0), (0, 0)])
print(f"\n零面积多边形:")
print(f" 有效: {is_valid(zero_area)}")
print(f" 面积: {zero_area.area}")
手动修复策略
buffer(0) 技巧
from shapely import Polygon, is_valid
# buffer(0) 是最经典的修复技巧
# 原理:缓冲区操作会重新构建几何体的拓扑
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"修复前: {is_valid(bowtie)}")
# buffer(0) 修复
fixed = bowtie.buffer(0)
print(f"buffer(0) 后: {is_valid(fixed)}")
print(f"类型: {fixed.geom_type}")
print(f"面积: {fixed.area:.2f}")
unary_union 修复
from shapely import Polygon, is_valid, unary_union
# unary_union 也可以修复某些无效几何
invalid = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
# 对单个几何体做 unary_union
fixed = unary_union([invalid])
print(f"unary_union 修复:")
print(f" 有效: {is_valid(fixed)}")
print(f" 类型: {fixed.geom_type}")
simplify 清理
from shapely import Polygon, is_valid, simplify
# 使用 simplify 去除退化部分
messy_poly = Polygon([
(0, 0), (5, 0), (5.001, 0.001), (5, 0), # 微小尖刺
(10, 0), (10, 10), (0, 10)
])
print(f"混乱多边形有效: {is_valid(messy_poly)}")
# 使用小容差简化
cleaned = simplify(messy_poly, tolerance=0.01)
print(f"简化后有效: {is_valid(cleaned)}")
print(f"简化后: {cleaned.wkt}")
综合修复函数
from shapely import is_valid, is_valid_reason, make_valid, simplify
def robust_fix(geometry, tolerance=1e-8):
"""
健壮的几何修复函数,按照优先级尝试多种修复方法
"""
# 已经有效,直接返回
if is_valid(geometry):
return geometry
reason = is_valid_reason(geometry)
print(f" 无效原因: {reason}")
# 方法1: make_valid(推荐,GEOS 内置)
try:
fixed = make_valid(geometry)
if is_valid(fixed) and not fixed.is_empty:
print(" 使用 make_valid 修复成功")
return fixed
except Exception:
pass
# 方法2: buffer(0)
try:
fixed = geometry.buffer(0)
if is_valid(fixed) and not fixed.is_empty:
print(" 使用 buffer(0) 修复成功")
return fixed
except Exception:
pass
# 方法3: simplify + buffer(0)
try:
fixed = simplify(geometry, tolerance=tolerance).buffer(0)
if is_valid(fixed) and not fixed.is_empty:
print(" 使用 simplify + buffer(0) 修复成功")
return fixed
except Exception:
pass
print(" ⚠ 所有修复方法均失败")
return geometry
# 测试
test_cases = [
("蝴蝶结", Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])),
("正常多边形", Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])),
]
for name, poly in test_cases:
print(f"\n{name}:")
result = robust_fix(poly)
print(f" 结果类型: {result.geom_type}")
print(f" 有效: {is_valid(result)}")
预防无效几何的最佳实践
输入验证
from shapely import is_valid, is_valid_reason, Polygon
def create_safe_polygon(coords, holes=None):
"""安全地创建多边形,带验证和自动修复"""
from shapely import make_valid
# 确保坐标闭合
if coords[0] != coords[-1]:
coords = list(coords) + [coords[0]]
# 确保至少 4 个坐标
if len(coords) < 4:
raise ValueError(f"多边形至少需要4个坐标,实际: {len(coords)}")
# 创建多边形
poly = Polygon(coords, holes)
# 验证并修复
if not is_valid(poly):
reason = is_valid_reason(poly)
print(f"⚠ 创建了无效多边形: {reason}")
poly = make_valid(poly)
print(f" 已自动修复")
return poly
# 使用
poly1 = create_safe_polygon([(0, 0), (10, 0), (10, 10), (0, 10)])
print(f"正常: {poly1.geom_type}, 面积={poly1.area}")
poly2 = create_safe_polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"修复后: {poly2.geom_type}, 面积={poly2.area}")
坐标精度控制
from shapely import Polygon, set_precision, is_valid
# 浮点精度问题可能导致无效几何
# 使用 set_precision 统一精度
coords = [
(0.0000000001, 0.0000000001),
(10.0000000002, 0.0000000003),
(10.0000000001, 10.0000000002),
(0.0000000003, 10.0000000001),
]
poly = Polygon(coords)
print(f"高精度多边形: {is_valid(poly)}")
# 降低精度到合理范围
snapped = set_precision(poly, grid_size=0.001)
print(f"降低精度后: {is_valid(snapped)}")
print(f"简化后坐标: {list(snapped.exterior.coords)}")
批量验证与修复
批量处理流程
from shapely import is_valid, is_valid_reason, make_valid, Polygon
import numpy as np
# 模拟大批量数据
test_polygons = [
Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), # 有效
Polygon([(0, 0), (5, 5), (5, 0), (0, 5)]), # 无效
Polygon([(10, 10), (20, 10), (20, 20), (10, 20)]), # 有效
Polygon([(0, 0), (10, 10), (10, 0), (0, 10)]), # 无效
Polygon([(0, 0), (3, 0), (3, 3), (0, 3)]), # 有效
]
# 批量验证
validity = np.array([is_valid(p) for p in test_polygons])
print(f"有效数量: {np.sum(validity)} / {len(test_polygons)}")
# 批量修复
fixed_polygons = []
for i, (poly, valid) in enumerate(zip(test_polygons, validity)):
if valid:
fixed_polygons.append(poly)
else:
reason = is_valid_reason(poly)
fixed = make_valid(poly)
fixed_polygons.append(fixed)
print(f" 修复第{i}个: {reason}")
# 验证修复结果
all_valid = all(is_valid(p) for p in fixed_polygons)
print(f"\n修复后全部有效: {all_valid}")
使用向量化操作
from shapely import is_valid, make_valid, Polygon
import numpy as np
# 向量化的批量验证
polygons = np.array([
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (2, 2), (2, 0), (0, 2)]),
Polygon([(0, 0), (3, 0), (3, 3), (0, 3)]),
])
# 向量化 is_valid
validity = is_valid(polygons)
print(f"有效性: {validity}")
# 向量化 make_valid
fixed = make_valid(polygons)
print(f"修复后有效性: {is_valid(fixed)}")
生成验证报告
from shapely import is_valid, is_valid_reason, make_valid, Polygon
from collections import Counter
def validation_report(geometries, names=None):
"""生成几何验证报告"""
if names is None:
names = [f"geom_{i}" for i in range(len(geometries))]
total = len(geometries)
valid_count = 0
invalid_reasons = Counter()
results = []
for name, geom in zip(names, geometries):
if is_valid(geom):
valid_count += 1
results.append((name, True, "Valid", geom))
else:
reason = is_valid_reason(geom)
# 提取主要错误类型
error_type = reason.split("[")[0].strip()
invalid_reasons[error_type] += 1
fixed = make_valid(geom)
results.append((name, False, reason, fixed))
# 打印报告
print("=" * 60)
print("几何验证报告")
print("=" * 60)
print(f"总数: {total}")
print(f"有效: {valid_count} ({valid_count/total*100:.1f}%)")
print(f"无效: {total - valid_count} ({(total-valid_count)/total*100:.1f}%)")
if invalid_reasons:
print(f"\n无效原因统计:")
for reason, count in invalid_reasons.most_common():
print(f" {reason}: {count} 个")
return results
# 示例
polygons = [
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (2, 2), (2, 0), (0, 2)]),
Polygon([(0, 0), (3, 0), (3, 3), (0, 3)]),
Polygon([(0, 0), (4, 4), (4, 0), (0, 4)]),
Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
]
names = [f"多边形_{i}" for i in range(len(polygons))]
results = validation_report(polygons, names)
小结
本章详细介绍了几何验证与修复的完整工作流:
验证工具:
is_valid(geometry):快速检查有效性is_valid_reason(geometry):获取详细的无效原因is_simple(geometry):检查线的简单性
修复工具:
make_valid(geometry):GEOS 内置的自动修复(推荐)buffer(0):经典的拓扑重建修复unary_union:通过合并操作修复simplify:去除微小退化部分
常见无效场景:
- 蝴蝶结(自相交多边形)
- 尖刺多边形
- 孔洞超出外环
- 重复顶点
- 退化几何
最佳实践:
- 在数据入口处验证几何有效性
- 使用
make_valid作为首选修复方法 - 批量处理使用向量化操作
- 控制坐标精度避免浮点问题
- 建立验证报告机制跟踪数据质量