znlgis 博客

GIS开发与技术分享

第14章:几何验证与修复

在实际 GIS 应用中,几何数据经常由于采集误差、格式转换或编辑操作而产生无效几何体。无效几何体会导致空间运算(如交集、缓冲区)产生错误结果甚至程序崩溃。Shapely 提供了完整的几何验证与修复工具,本章将详细介绍 OGC 有效性规则、常见的无效几何场景,以及自动和手动修复策略。


OGC 几何有效性规则

有效性规则概述

OGC(Open Geospatial Consortium)定义了一套严格的几何有效性规则,不同几何类型有不同的要求:

# OGC 有效性规则汇总
rules = """
几何类型        有效性规则
─────────────────────────────────────────────────────
Point           - 坐标不为 NaN
                - 至少有 2 维坐标 (x, y)

LineString      - 至少 2 个不同的点
                - 不能自相交(is_simple 检查)

LinearRing      - 首尾点相同(闭合)
                - 至少 4 个坐标(含闭合点)
                - 不能自相交

Polygon         - 外环闭合且有效
                - 外环方向一致
                - 孔洞在外环内部
                - 孔洞不互相重叠
                - 孔洞不与外环重叠(可接触)
                - 不自相交

MultiPolygon    - 每个多边形独立有效
                - 多边形之间不重叠(可接触)

MultiLineString - 每条线独立有效

MultiPoint      - 允许重复点
"""
print(rules)

多边形有效性规则详解

from shapely import Polygon, is_valid, is_valid_reason

# 规则1:外环不能自相交
valid_poly = Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])
print(f"正常多边形有效: {is_valid(valid_poly)}")

# 规则2:孔洞必须在外环内
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
inner = [(11, 11), (12, 11), (12, 12), (11, 12)]  # 孔洞在外环外
poly_bad_hole = Polygon(outer, [inner])
print(f"孔洞在外: {is_valid(poly_bad_hole)}")
print(f"原因: {is_valid_reason(poly_bad_hole)}")

# 规则3:孔洞不能重叠
hole1 = [(2, 2), (5, 2), (5, 5), (2, 5)]
hole2 = [(3, 3), (6, 3), (6, 6), (3, 6)]  # 与 hole1 重叠
poly_overlap_holes = Polygon(outer, [hole1, hole2])
print(f"孔洞重叠: {is_valid(poly_overlap_holes)}")
print(f"原因: {is_valid_reason(poly_overlap_holes)}")

线串的简单性

from shapely import LineString, is_valid, is_simple

# 简单线:不自相交
simple_line = LineString([(0, 0), (5, 5), (10, 0)])
print(f"简单线 is_valid: {is_valid(simple_line)}")
print(f"简单线 is_simple: {is_simple(simple_line)}")

# 自相交的线
self_intersecting = LineString([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"\n自相交线 is_valid: {is_valid(self_intersecting)}")
print(f"自相交线 is_simple: {is_simple(self_intersecting)}")
# 注意:LineString 的 is_valid 通常为 True
# 自相交通过 is_simple 来检查

is_valid:检查有效性

基本用法

from shapely import is_valid, Point, LineString, Polygon

# 点
print(f"Point(0, 0) 有效: {is_valid(Point(0, 0))}")

# 正常线
print(f"LineString 有效: {is_valid(LineString([(0, 0), (1, 1)]))}")

# 正常多边形
print(f"Polygon 有效: {is_valid(Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]))}")

# 蝴蝶结(自相交多边形)
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"蝴蝶结 有效: {is_valid(bowtie)}")

批量验证

from shapely import is_valid, Polygon
import numpy as np

# 创建一组多边形(部分无效)
polygons = np.array([
    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),           # 有效
    Polygon([(0, 0), (10, 10), (10, 0), (0, 10)]),        # 无效(蝴蝶结)
    Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),            # 有效
    Polygon([(0, 0), (1, 0), (0, 1), (1, 1), (0, 0)]),    # 无效(自相交)
])

# 批量检查
validity = is_valid(polygons)
print(f"有效性数组: {validity}")

# 筛选无效几何
invalid_mask = ~validity
invalid_geoms = polygons[invalid_mask]
print(f"\n无效几何体数量: {len(invalid_geoms)}")
for geom in invalid_geoms:
    print(f"  {geom.wkt[:60]}...")

is_valid_reason:详细无效原因

基本用法

from shapely import is_valid_reason, Polygon

# 有效几何体
valid = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
print(f"有效几何: {is_valid_reason(valid)}")
# "Valid Geometry"

# 蝴蝶结
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"蝴蝶结: {is_valid_reason(bowtie)}")
# "Self-intersection[5 5]"

常见无效原因

from shapely import is_valid, is_valid_reason, Polygon, LineString

# 1. Self-intersection(自相交)
bowtie = Polygon([(0, 0), (2, 2), (2, 0), (0, 2)])
print(f"1. 自相交:")
print(f"   {is_valid_reason(bowtie)}")

# 2. Ring Self-intersection(环自相交)
ring_self = Polygon([(0, 0), (5, 0), (5, 5), (3, -1), (0, 5)])
print(f"\n2. 环自相交:")
print(f"   {is_valid_reason(ring_self)}")

# 3. Too few points(点太少)
# 通过 WKT 构造无效几何
from shapely import from_wkt
try:
    degen = from_wkt("POLYGON ((0 0, 1 1, 0 0))")
    if degen is not None:
        print(f"\n3. 点太少:")
        print(f"   {is_valid_reason(degen)}")
except Exception as e:
    print(f"\n3. 构造失败: {e}")

# 4. Hole outside shell(孔洞在外环外)
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
hole_outside = [(20, 20), (21, 20), (21, 21), (20, 21)]
poly_hole_out = Polygon(outer, [hole_outside])
print(f"\n4. 孔洞在外环外:")
print(f"   {is_valid_reason(poly_hole_out)}")

# 5. Nested holes(嵌套孔洞)
hole_a = [(1, 1), (9, 1), (9, 9), (1, 9)]
hole_b = [(2, 2), (8, 2), (8, 8), (2, 8)]  # 在 hole_a 内
poly_nested = Polygon(outer, [hole_a, hole_b])
print(f"\n5. 嵌套孔洞:")
print(f"   {is_valid_reason(poly_nested)}")

批量获取无效原因

from shapely import is_valid, is_valid_reason, Polygon
import numpy as np

polygons = np.array([
    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
    Polygon([(0, 0), (2, 2), (2, 0), (0, 2)]),
    Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
])

reasons = is_valid_reason(polygons)
for i, (poly, reason) in enumerate(zip(polygons, reasons)):
    status = "✓" if "Valid" in reason else "✗"
    print(f"[{status}] 多边形{i}: {reason}")

is_simple:简单性检查

线的简单性

from shapely import is_simple, LineString

# 简单线(不自相交)
simple = LineString([(0, 0), (1, 1), (2, 0)])
print(f"简单线: is_simple = {is_simple(simple)}")

# 非简单线(自相交 / 8 字形)
not_simple = LineString([(0, 0), (2, 2), (2, 0), (0, 2)])
print(f"自相交线: is_simple = {is_simple(not_simple)}")

# 端点重合的线(闭合线)
closed = LineString([(0, 0), (1, 0), (1, 1), (0, 0)])
print(f"闭合线: is_simple = {is_simple(closed)}")

多几何体的简单性

from shapely import is_simple, MultiPoint, MultiLineString

# MultiPoint 总是简单的(即使有重复点)
mp = MultiPoint([(0, 0), (0, 0), (1, 1)])
print(f"MultiPoint(含重复点): is_simple = {is_simple(mp)}")

# MultiLineString
ml = MultiLineString([
    [(0, 0), (1, 1)],
    [(0, 1), (1, 0)]  # 与第一条交叉
])
print(f"交叉的 MultiLineString: is_simple = {is_simple(ml)}")

make_valid:自动修复无效几何

基本用法

from shapely import make_valid, is_valid, Polygon

# 蝴蝶结多边形(自相交)
bowtie = Polygon([(0, 0), (2, 2), (2, 0), (0, 2)])
print(f"修复前: is_valid = {is_valid(bowtie)}")
print(f"修复前: {bowtie.wkt}")

# 自动修复
fixed = make_valid(bowtie)
print(f"\n修复后: is_valid = {is_valid(fixed)}")
print(f"修复后类型: {fixed.geom_type}")
print(f"修复后: {fixed.wkt}")

修复各种无效几何

from shapely import make_valid, is_valid, is_valid_reason, Polygon

# 场景1:蝴蝶结(自相交)
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
fixed1 = make_valid(bowtie)
print(f"蝴蝶结修复:")
print(f"  类型: {bowtie.geom_type}{fixed1.geom_type}")
print(f"  面积: {bowtie.area:.1f}{fixed1.area:.1f}")

# 场景2:孔洞与外环重叠
outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
hole = [(-2, -2), (5, -2), (5, 5), (-2, 5)]  # 超出外环
poly_bad = Polygon(outer, [hole])
fixed2 = make_valid(poly_bad)
print(f"\n孔洞越界修复:")
print(f"  原因: {is_valid_reason(poly_bad)}")
print(f"  修复后类型: {fixed2.geom_type}")
print(f"  修复后有效: {is_valid(fixed2)}")

# 场景3:尖刺多边形
spike = Polygon([
    (0, 0), (5, 0), (5, 10),  # 正常部分
    (5.01, 5), (5, 5),        # 尖刺
    (0, 10)
])
fixed3 = make_valid(spike)
print(f"\n尖刺多边形修复:")
print(f"  修复后有效: {is_valid(fixed3)}")
print(f"  修复后类型: {fixed3.geom_type}")

修复前后对比

from shapely import make_valid, is_valid, Polygon

# 创建一个复杂的无效多边形
invalid_poly = Polygon([
    (0, 0), (10, 0), (10, 10), (5, 3),
    (0, 10), (5, 7), (0, 0)
])

print("修复前后对比:")
print(f"  有效性: {is_valid(invalid_poly)}")
print(f"  面积: {invalid_poly.area:.2f}")
print(f"  类型: {invalid_poly.geom_type}")

fixed = make_valid(invalid_poly)
print(f"\n  修复后有效: {is_valid(fixed)}")
print(f"  修复后面积: {fixed.area:.2f}")
print(f"  修复后类型: {fixed.geom_type}")

# 如果修复结果是 MultiPolygon,遍历各部分
if fixed.geom_type == 'MultiPolygon':
    for i, part in enumerate(fixed.geoms):
        print(f"  部分{i}: 面积={part.area:.2f}")
elif fixed.geom_type == 'GeometryCollection':
    for i, part in enumerate(fixed.geoms):
        print(f"  部分{i}: {part.geom_type}, 面积={part.area:.2f}")

常见无效几何场景

蝴蝶结多边形(自相交)

from shapely import Polygon, is_valid, is_valid_reason, make_valid

# 蝴蝶结:两个三角形共用一个交点
bowtie = Polygon([
    (0, 0), (4, 4), (4, 0), (0, 4), (0, 0)
])

print("蝴蝶结多边形:")
print(f"  有效: {is_valid(bowtie)}")
print(f"  原因: {is_valid_reason(bowtie)}")

# 修复:分割为两个三角形
fixed = make_valid(bowtie)
print(f"  修复结果: {fixed.geom_type}")
if hasattr(fixed, 'geoms'):
    for i, g in enumerate(fixed.geoms):
        if g.area > 0:
            print(f"    部分{i}: {g.geom_type}, 面积={g.area:.2f}")

尖刺多边形

from shapely import Polygon, is_valid, make_valid

# 带有零宽度尖刺的多边形
spike_poly = Polygon([
    (0, 0), (10, 0), (10, 10),
    (5, 10), (5, 15), (5, 10),  # 尖刺:向上延伸再返回
    (0, 10), (0, 0)
])

print("尖刺多边形:")
print(f"  有效: {is_valid(spike_poly)}")

fixed = make_valid(spike_poly)
print(f"  修复后: {fixed.geom_type}")
print(f"  修复后面积: {fixed.area:.2f}")

孔洞超出外环

from shapely import Polygon, is_valid, is_valid_reason, make_valid

outer = [(0, 0), (10, 0), (10, 10), (0, 10)]
# 孔洞部分超出外环
bad_hole = [(-2, 3), (5, 3), (5, 7), (-2, 7)]

poly = Polygon(outer, [bad_hole])
print("孔洞超出外环:")
print(f"  有效: {is_valid(poly)}")
print(f"  原因: {is_valid_reason(poly)}")

fixed = make_valid(poly)
print(f"  修复后类型: {fixed.geom_type}")
print(f"  修复后有效: {is_valid(fixed)}")

重复顶点

from shapely import Polygon, is_valid, LineString

# 重复顶点通常不导致无效,但可能引起问题
poly_dup = Polygon([
    (0, 0), (5, 0), (5, 0),  # 重复点
    (5, 5), (0, 5), (0, 0)
])
print("含重复顶点的多边形:")
print(f"  有效: {is_valid(poly_dup)}")
print(f"  顶点数: {len(poly_dup.exterior.coords)}")

# 去除重复顶点
from shapely import simplify
simplified = simplify(poly_dup, tolerance=0)
print(f"  简化后顶点数: {len(simplified.exterior.coords)}")

退化几何(零面积多边形)

from shapely import Polygon, LineString, is_valid

# 退化多边形(所有点共线)
degenerate = Polygon([(0, 0), (5, 0), (10, 0), (5, 0)])
print("退化多边形(共线):")
print(f"  有效: {is_valid(degenerate)}")
print(f"  面积: {degenerate.area}")
print(f"  空: {degenerate.is_empty}")

# 零面积多边形
zero_area = Polygon([(0, 0), (1, 0), (1, 0), (0, 0)])
print(f"\n零面积多边形:")
print(f"  有效: {is_valid(zero_area)}")
print(f"  面积: {zero_area.area}")

手动修复策略

buffer(0) 技巧

from shapely import Polygon, is_valid

# buffer(0) 是最经典的修复技巧
# 原理:缓冲区操作会重新构建几何体的拓扑

bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"修复前: {is_valid(bowtie)}")

# buffer(0) 修复
fixed = bowtie.buffer(0)
print(f"buffer(0) 后: {is_valid(fixed)}")
print(f"类型: {fixed.geom_type}")
print(f"面积: {fixed.area:.2f}")

unary_union 修复

from shapely import Polygon, is_valid, unary_union

# unary_union 也可以修复某些无效几何
invalid = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])

# 对单个几何体做 unary_union
fixed = unary_union([invalid])
print(f"unary_union 修复:")
print(f"  有效: {is_valid(fixed)}")
print(f"  类型: {fixed.geom_type}")

simplify 清理

from shapely import Polygon, is_valid, simplify

# 使用 simplify 去除退化部分
messy_poly = Polygon([
    (0, 0), (5, 0), (5.001, 0.001), (5, 0),  # 微小尖刺
    (10, 0), (10, 10), (0, 10)
])
print(f"混乱多边形有效: {is_valid(messy_poly)}")

# 使用小容差简化
cleaned = simplify(messy_poly, tolerance=0.01)
print(f"简化后有效: {is_valid(cleaned)}")
print(f"简化后: {cleaned.wkt}")

综合修复函数

from shapely import is_valid, is_valid_reason, make_valid, simplify

def robust_fix(geometry, tolerance=1e-8):
    """
    健壮的几何修复函数,按照优先级尝试多种修复方法
    """
    # 已经有效,直接返回
    if is_valid(geometry):
        return geometry

    reason = is_valid_reason(geometry)
    print(f"  无效原因: {reason}")

    # 方法1: make_valid(推荐,GEOS 内置)
    try:
        fixed = make_valid(geometry)
        if is_valid(fixed) and not fixed.is_empty:
            print("  使用 make_valid 修复成功")
            return fixed
    except Exception:
        pass

    # 方法2: buffer(0)
    try:
        fixed = geometry.buffer(0)
        if is_valid(fixed) and not fixed.is_empty:
            print("  使用 buffer(0) 修复成功")
            return fixed
    except Exception:
        pass

    # 方法3: simplify + buffer(0)
    try:
        fixed = simplify(geometry, tolerance=tolerance).buffer(0)
        if is_valid(fixed) and not fixed.is_empty:
            print("  使用 simplify + buffer(0) 修复成功")
            return fixed
    except Exception:
        pass

    print("  ⚠ 所有修复方法均失败")
    return geometry

# 测试
test_cases = [
    ("蝴蝶结", Polygon([(0, 0), (10, 10), (10, 0), (0, 10)])),
    ("正常多边形", Polygon([(0, 0), (10, 0), (10, 10), (0, 10)])),
]

for name, poly in test_cases:
    print(f"\n{name}:")
    result = robust_fix(poly)
    print(f"  结果类型: {result.geom_type}")
    print(f"  有效: {is_valid(result)}")

预防无效几何的最佳实践

输入验证

from shapely import is_valid, is_valid_reason, Polygon

def create_safe_polygon(coords, holes=None):
    """安全地创建多边形,带验证和自动修复"""
    from shapely import make_valid

    # 确保坐标闭合
    if coords[0] != coords[-1]:
        coords = list(coords) + [coords[0]]

    # 确保至少 4 个坐标
    if len(coords) < 4:
        raise ValueError(f"多边形至少需要4个坐标,实际: {len(coords)}")

    # 创建多边形
    poly = Polygon(coords, holes)

    # 验证并修复
    if not is_valid(poly):
        reason = is_valid_reason(poly)
        print(f"⚠ 创建了无效多边形: {reason}")
        poly = make_valid(poly)
        print(f"  已自动修复")

    return poly

# 使用
poly1 = create_safe_polygon([(0, 0), (10, 0), (10, 10), (0, 10)])
print(f"正常: {poly1.geom_type}, 面积={poly1.area}")

poly2 = create_safe_polygon([(0, 0), (10, 10), (10, 0), (0, 10)])
print(f"修复后: {poly2.geom_type}, 面积={poly2.area}")

坐标精度控制

from shapely import Polygon, set_precision, is_valid

# 浮点精度问题可能导致无效几何
# 使用 set_precision 统一精度

coords = [
    (0.0000000001, 0.0000000001),
    (10.0000000002, 0.0000000003),
    (10.0000000001, 10.0000000002),
    (0.0000000003, 10.0000000001),
]

poly = Polygon(coords)
print(f"高精度多边形: {is_valid(poly)}")

# 降低精度到合理范围
snapped = set_precision(poly, grid_size=0.001)
print(f"降低精度后: {is_valid(snapped)}")
print(f"简化后坐标: {list(snapped.exterior.coords)}")

批量验证与修复

批量处理流程

from shapely import is_valid, is_valid_reason, make_valid, Polygon
import numpy as np

# 模拟大批量数据
test_polygons = [
    Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),         # 有效
    Polygon([(0, 0), (5, 5), (5, 0), (0, 5)]),          # 无效
    Polygon([(10, 10), (20, 10), (20, 20), (10, 20)]),   # 有效
    Polygon([(0, 0), (10, 10), (10, 0), (0, 10)]),       # 无效
    Polygon([(0, 0), (3, 0), (3, 3), (0, 3)]),           # 有效
]

# 批量验证
validity = np.array([is_valid(p) for p in test_polygons])
print(f"有效数量: {np.sum(validity)} / {len(test_polygons)}")

# 批量修复
fixed_polygons = []
for i, (poly, valid) in enumerate(zip(test_polygons, validity)):
    if valid:
        fixed_polygons.append(poly)
    else:
        reason = is_valid_reason(poly)
        fixed = make_valid(poly)
        fixed_polygons.append(fixed)
        print(f"  修复第{i}个: {reason}")

# 验证修复结果
all_valid = all(is_valid(p) for p in fixed_polygons)
print(f"\n修复后全部有效: {all_valid}")

使用向量化操作

from shapely import is_valid, make_valid, Polygon
import numpy as np

# 向量化的批量验证
polygons = np.array([
    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
    Polygon([(0, 0), (2, 2), (2, 0), (0, 2)]),
    Polygon([(0, 0), (3, 0), (3, 3), (0, 3)]),
])

# 向量化 is_valid
validity = is_valid(polygons)
print(f"有效性: {validity}")

# 向量化 make_valid
fixed = make_valid(polygons)
print(f"修复后有效性: {is_valid(fixed)}")

生成验证报告

from shapely import is_valid, is_valid_reason, make_valid, Polygon
from collections import Counter

def validation_report(geometries, names=None):
    """生成几何验证报告"""
    if names is None:
        names = [f"geom_{i}" for i in range(len(geometries))]

    total = len(geometries)
    valid_count = 0
    invalid_reasons = Counter()
    results = []

    for name, geom in zip(names, geometries):
        if is_valid(geom):
            valid_count += 1
            results.append((name, True, "Valid", geom))
        else:
            reason = is_valid_reason(geom)
            # 提取主要错误类型
            error_type = reason.split("[")[0].strip()
            invalid_reasons[error_type] += 1
            fixed = make_valid(geom)
            results.append((name, False, reason, fixed))

    # 打印报告
    print("=" * 60)
    print("几何验证报告")
    print("=" * 60)
    print(f"总数: {total}")
    print(f"有效: {valid_count} ({valid_count/total*100:.1f}%)")
    print(f"无效: {total - valid_count} ({(total-valid_count)/total*100:.1f}%)")

    if invalid_reasons:
        print(f"\n无效原因统计:")
        for reason, count in invalid_reasons.most_common():
            print(f"  {reason}: {count} 个")

    return results

# 示例
polygons = [
    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
    Polygon([(0, 0), (2, 2), (2, 0), (0, 2)]),
    Polygon([(0, 0), (3, 0), (3, 3), (0, 3)]),
    Polygon([(0, 0), (4, 4), (4, 0), (0, 4)]),
    Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
]
names = [f"多边形_{i}" for i in range(len(polygons))]

results = validation_report(polygons, names)

小结

本章详细介绍了几何验证与修复的完整工作流:

验证工具:

  • is_valid(geometry):快速检查有效性
  • is_valid_reason(geometry):获取详细的无效原因
  • is_simple(geometry):检查线的简单性

修复工具:

  • make_valid(geometry):GEOS 内置的自动修复(推荐)
  • buffer(0):经典的拓扑重建修复
  • unary_union:通过合并操作修复
  • simplify:去除微小退化部分

常见无效场景:

  • 蝴蝶结(自相交多边形)
  • 尖刺多边形
  • 孔洞超出外环
  • 重复顶点
  • 退化几何

最佳实践:

  1. 在数据入口处验证几何有效性
  2. 使用 make_valid 作为首选修复方法
  3. 批量处理使用向量化操作
  4. 控制坐标精度避免浮点问题
  5. 建立验证报告机制跟踪数据质量