字符串地址解析 省市区
解析字符串中存在的省市区信息
效果
标准地址
如:广东省广州市番禺区沙湾镇…
标准地址可以直接使用正则表达式解析
/// <summary>
/// 通过正则表达式 解析地址行政区
/// </summary>
/// <param name="address"></param>
/// <returns></returns>
public static Dictionary<string, string> SplitAddressByRegex(this string address)
{
Dictionary<string, string> valuePairs = new Dictionary<string, string>();
string regex = "(?<province>[^省]+自治区|.*?省|.*?行政区)?" +
"(?<city>[^市]+自治州|.*?地区|.*?行政单位|.+盟|市辖区|.*?市)?" +
"(?<country>[^县]+县|.+区|.+市|.+旗|.+海域|.+岛)?" +
"(?<community>[^区]+区|.+镇)?(?<village>.*)";
var m = Regex.Match(address, regex, RegexOptions.IgnoreCase);
valuePairs.Add("province", m.Groups["province"].Value);
valuePairs.Add("city", m.Groups["city"].Value);
valuePairs.Add("country", m.Groups["country"].Value);
valuePairs.Add("community", m.Groups["community"].Value);
return valuePairs;
}
使用:
var addressObj = address.SplitAddressByRegex().ToJson().ToObject<SplitDistrictDTO>()
特殊类型的地址
先将特殊类型的地址预处理为较标准的地址格式
然后进行正则、数据库匹配等方式进一步精确查询
公司微信订单地址
特殊处理,处理格式非常不规范的情况
微信订单 示例, 中华路51室, 上海, China
private SplitDistrictDTO SpecialHandlingWeChat(string address)
{
address = address.Replace(',', ',');
SplitDistrictDTO districtDTO = new SplitDistrictDTO();
if (address.Length < 5 || address[^5..].ToLower() != "china")
return districtDTO;
int lastIndex = address.LastIndexOf(',');
if (lastIndex < 1)
return districtDTO;
address = address[..lastIndex];
// 找倒数第二个的逗号
lastIndex = address.LastIndexOf(',');
if (lastIndex < 1)
return districtDTO;
// 得到市 这种可能是没有 '市'
lastIndex += 1;
address = address[lastIndex..];
districtDTO.City = address;
return districtDTO;
}
通过省或城市名称获取
// <summary>
/// 通过省或城市名称 获取地址信息 特殊不规则地址匹配
/// 例:回民区振华一楼,, 呼和浩特市 得到呼和浩特市
/// </summary>
/// <param name="address">包含名称的字符</param>
/// <param name="type">判断类型 省或市 自治区</param>
/// <param name="len">长度 向前截取位数,默认两位</param>
/// <returns></returns>
private List<AddressCity> GetAddressByCityOrProvince(string address, string type, int len = 2)
{
List<AddressCity> cityList = new List<AddressCity>();
if (len > address.Length)
return cityList; // 避免超索引 直接退
int index = address.IndexOf(type);
string name = address;// 对于一些省略了省、市的 默认直接使用字符去查一遍
if (index > 0)
{
if (index - len < 0)
return cityList;
// 截取名称
name = address.Substring(index - len, len);
// 如果第一个字符不是中文 则直接退出
if (!name[0].IsChineseChar())
return cityList;
len++;
}
// 使用名称去查询数据 若没有找到 则递归
if (type == "省" || type == "自治区")
cityList = _addressCityService.Db.Queryable<AddressCity>().Where(p => p.Province.StartsWith(name)).ToList();
else
cityList = _addressCityService.Db.Queryable<AddressCity>().Where(p => p.City.StartsWith(name)).ToList();
if (cityList.Count == 0 && index > 0)
return GetAddressByCityOrProvince(address, type, len);
return cityList;
}
数据库反查
通过省、直辖市、直辖区得到省级信息
private List<AddressCity> GetAddressByProvince(string province)
{
if (province.Length < 2)
return new List<AddressCity>();//throw new RunInterceptException("解析地址异常,省级单位至少需要两位字符 " + province);
// 第一步,尝试判断 省级单位是否正确,像 上海市、湖南省 这种就是正确的,单 北京 这种就不正确
var cache = _addressCache.Where(p => p.Province == province).ToList();
if (cache.Count == 0)
cache = _addressCache.Where(p => p.Province.StartsWith(province[..2])).ToList();
if (cache.Count > 0)
return cache;
var provinces = _addressCityService.Db.Queryable<AddressCity>()
.Where(p => p.Province == province).ToList();
// 此时是不正确的 则使用其前两个字符进行模糊查询 一般情况下是可以得到正确的省信息
if (provinces.Count == 0)
{
provinces = GetAddressByCityOrProvince(province, "省");
if (provinces.Count == 0)
provinces = GetAddressByCityOrProvince(province, "自治区");
}
// 对于一些不写省 直接填市 且不是直辖市的,这里做一下尝试获取 从市反得到省的信息
if (provinces.Count == 0)
{
// 但是 若出现同名城市则可能得到错误地址(可能性低 但是也有)
provinces = GetAddressByCityOrProvince(province, "市");
if (provinces.Count == 0)
provinces = GetAddressByCityOrProvince(province, "自治州");
}
if (provinces.Count > 0)
_addressCache.AddRange(provinces);
return provinces;
}
在省级地址的基础上反查市级单位
private string GetCity(List<AddressCity> addressList, string city)
{
if (city.Length < 2)
return string.Empty;//throw new RunInterceptException("解析地址异常,市级单位至少需要两位字符 " + city);
var citys = addressList.Where(p => p.City == city).ToList();
if (citys.Count == 0) // 去同省的县级单位找 可能是县级市
citys = addressList.Where(p => p.Area == city).ToList();
if (citys.Count == 0)
citys = GetAddressByCityOrProvince(city, "市");
return citys.Any() ? citys[0].City : string.Empty;
}
使用到的实体
DistrictDTO
/// <summary>
/// 功 能: 行政区 省、市、区
/// V0.01 2024/8/16 10:19:14 xliu 初版
/// </summary>
public class DistrictDTO
{
/// <summary>
/// 省
/// </summary>
public string Province { get; set; }
/// <summary>
/// 市
/// </summary>
public string City { get; set; }
/// <summary>
/// 区
/// </summary>
public string Country { get; set; }
/// <summary>
/// 社区
/// </summary>
public string Community { get; set; }
}
SplitDistrictDTO
public class SplitDistrictDTO : DistrictDTO
{
public string ID { get; set; }
public string FullAddress { get; set; }
}
使用到的扩展
ToJson
/// <summary>
/// 对象序列化成JSON字符串。
/// </summary>
/// <param name="obj">序列化对象</param>
/// <param name="IgnoreDefaults">是否忽略默认值与null值</param>
/// <param name="UpperFirst">首字母大写</param>
/// <returns></returns>
public static string ToJson(this object obj,bool IgnoreDefaults = false, bool UpperFirst = true)
{
JsonSerializerOptions options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All),
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
AllowTrailingCommas = true
};
//设置时间格式
options.Converters.Add(new DateTimeJsonConverter());
options.Converters.Add(new DateTimeNullableConverter());
//设置bool获取格式
options.Converters.Add(new BooleanJsonConverter());
//设置数字
options.Converters.Add(new IntJsonConverter());
options.Converters.Add(new LongJsonConverter());
options.Converters.Add(new DoubleJsonConverter());
options.Converters.Add(new StringNullableJsonConver());
if (UpperFirst)
options.PropertyNamingPolicy = new UpperFirstCaseNamingPolicy();
options.PropertyNameCaseInsensitive = true; //忽略大小写
if (IgnoreDefaults) // 忽略默认值
options.DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull;
options.WriteIndented = false; // JSON压缩为一行,true时会以换行的形式 使其可读性更强
return JsonSerializer.Serialize(obj, options);
}
ToObeject
/// <summary>
/// JSON字符串序列化成对象。
/// </summary>
/// <typeparam name="T">对象类型</typeparam>
/// <param name="json">JSON字符串</param>
/// <returns></returns>
public static T ToObject<T>(this string json)
{
JsonSerializerOptions options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All),
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
AllowTrailingCommas = true
};
//设置时间格式
options.Converters.Add(new DateTimeJsonConverter());
options.Converters.Add(new DateTimeNullableConverter());
//设置bool获取格式
options.Converters.Add(new BooleanJsonConverter());
//设置数字
options.Converters.Add(new IntJsonConverter());
options.Converters.Add(new LongJsonConverter());
options.Converters.Add(new DoubleJsonConverter());
options.Converters.Add(new StringNullableJsonConver());
//options.PropertyNamingPolicy = new UpperFirstCaseNamingPolicy();
options.PropertyNameCaseInsensitive = true; //忽略大小写
return json == null ? default : JsonSerializer.Deserialize<T>(json, options);
}
完整代码
namespace apm.Services.Extend
{
/// <summary>
/// 功 能: N/A
/// V0.01 9/3/2024 16:23:12 xliu 初版
/// </summary>
public class ProvinceCityAreaService : IProvinceCityAreaService
{
private readonly IAddressCityService _addressCityService;
/// <summary>
/// 缓存本次查询中地址信息,减少无意义的数据库查询。不做redis缓存 降低系统与维护的复杂度,且意义也不是很大
/// </summary>
private readonly List<AddressCity> _addressCache = new List<AddressCity>();
public ProvinceCityAreaService(IAddressCityService addressCityService)
{
_addressCityService = addressCityService;
}
public SplitDistrictDTO FillProvinceCityArea(string address)
{
List<AddressCity> provinces = new List<AddressCity>();
// 优先处理特殊的地址
SplitDistrictDTO district = SpecialHandlingWeChat(address);
district.FullAddress = address;
if (district.City.IsNotEmptyOrNull() && district.City[0].IsChineseChar())
provinces = GetAddress(district);
// 二次尝试偏正常地址
if (!provinces.Any())
{
district = address.SplitAddressByRegex().ToJson().ToObject<SplitDistrictDTO>();
district.FullAddress = address;
provinces = GetAddress(district);
}
district.FullAddress = ""; // 置空 减少网络传输压力
if (!provinces.Any())
return null;
// 深拷贝一个对象 做原始解析的记录
var oldDistrict = district.CreateDeepCopy();
// 修改正确的省信息
district.Province = provinces[0].Province;
// 判断市 优先判断是否是直辖市的
if (provinces[0].City == "市辖区" || provinces[0].City == district.Province)
{
district.City = district.Province;
}
else
{
// 移除省级 把第一个当市级单位
if (district.City.IsNullOrEmpty())
{
string province = district.Province.Replace("省", "").Replace("自治区", "").Replace("市", "");
if (oldDistrict.Province.IsNotEmptyOrNull())
{
if (oldDistrict.Province.Contains(province))
district.City = oldDistrict.Province.Replace(province, "");
province = oldDistrict.Province;
}
// 前面可能已经处理了 避免多次处理导致的错误
if (district.City.IsNullOrEmpty())
district.City = address.Replace(province, "");
}
district.City = GetCity(provinces, district.City);
}
// 判断区、县
if (district.Country.IsNotEmptyOrNull()) // 这个节点判断不准的概率比较大 不过仅做参考 问题到是不大
district.Country = GetCountry(provinces, district.Country);
// 若需要提高准确率与配置性 可以尝试做数据库到地址的反查、配置特殊规则优先转换等
return district;
}
private List<AddressCity> GetAddress(SplitDistrictDTO district)
{
// 对于直辖市,没有省这个概念
if (district.Province.IsNullOrEmpty())
district.Province = district.City;
List<AddressCity> provinces;
if (district.Province.IsNullOrEmpty())
provinces = GetAddressByProvince(district.FullAddress[..2]); // 对于 省级单位没解析到的 直接将第一个当省级用
else
provinces = GetAddressByProvince(district.Province);
// 尝试用城市去反找一下
if (provinces.Count == 0 && district.City.IsNotEmptyOrNull())
provinces = GetAddressByProvince(district.City);
return provinces;
}
private List<AddressCity> GetAddressByProvince(string province)
{
if (province.Length < 2)
return new List<AddressCity>();//throw new RunInterceptException("解析地址异常,省级单位至少需要两位字符 " + province);
// 第一步,尝试判断 省级单位是否正确,像 上海市、湖南省 这种就是正确的,单 北京 这种就不正确
var cache = _addressCache.Where(p => p.Province == province).ToList();
if (cache.Count == 0)
cache = _addressCache.Where(p => p.Province.StartsWith(province[..2])).ToList();
if (cache.Count > 0)
return cache;
var provinces = _addressCityService.Db.Queryable<AddressCity>()
.Where(p => p.Province == province).ToList();
// 此时是不正确的 则使用其前两个字符进行模糊查询 一般情况下是可以得到正确的省信息
if (provinces.Count == 0)
{
provinces = GetAddressByCityOrProvince(province, "省");
if (provinces.Count == 0)
provinces = GetAddressByCityOrProvince(province, "自治区");
}
// 对于一些不写省 直接填市 且不是直辖市的,这里做一下尝试获取 从市反得到省的信息
if (provinces.Count == 0)
{
// 但是 若出现同名城市则可能得到错误地址(可能性低 但是也有)
provinces = GetAddressByCityOrProvince(province, "市");
if (provinces.Count == 0)
provinces = GetAddressByCityOrProvince(province, "自治州");
}
if (provinces.Count > 0)
_addressCache.AddRange(provinces);
return provinces;
}
private string GetCity(List<AddressCity> addressList, string city)
{
if (city.Length < 2)
return string.Empty;//throw new RunInterceptException("解析地址异常,市级单位至少需要两位字符 " + city);
var citys = addressList.Where(p => p.City == city).ToList();
if (citys.Count == 0) // 去同省的县级单位找 可能是县级市
citys = addressList.Where(p => p.Area == city).ToList();
if (citys.Count == 0)
citys = GetAddressByCityOrProvince(city, "市");
return citys.Any() ? citys[0].City : string.Empty;
}
private string GetCountry(List<AddressCity> addressList, string country)
{
var areas = addressList.Where(p => p.Area == country).ToList();
if (areas.Count == 0)
areas = addressList.Where(p => p.Area.StartsWith(country[..2])).ToList();
return areas.Any() ? areas[0].Area : string.Empty;
}
/// <summary>
/// 特殊处理,处理格式非常不规范的情况
/// 微信订单 示例, 中华路51室,, 上海, China
/// </summary>
/// <param name="address"></param>
/// <returns></returns>
private SplitDistrictDTO SpecialHandlingWeChat(string address)
{
address = address.Replace(',', ',');
SplitDistrictDTO districtDTO = new SplitDistrictDTO();
if (address.Length < 5 || address[^5..].ToLower() != "china")
return districtDTO;
int lastIndex = address.LastIndexOf(',');
if (lastIndex < 1)
return districtDTO;
address = address[..lastIndex];
// 找倒数第二个的逗号
lastIndex = address.LastIndexOf(',');
if (lastIndex < 1)
return districtDTO;
// 得到市 这种可能是没有 '市'
lastIndex += 1;
address = address[lastIndex..];
districtDTO.City = address;
return districtDTO;
}
/// <summary>
/// 通过省或城市名称 获取地址信息 特殊不规则地址匹配
/// 例:回民区振华一楼,, 呼和浩特市 得到呼和浩特市
/// </summary>
/// <param name="address">包含名称的字符</param>
/// <param name="type">判断类型 省或市 自治区</param>
/// <param name="len">长度 向前截取位数,默认两位</param>
/// <returns></returns>
private List<AddressCity> GetAddressByCityOrProvince(string address, string type, int len = 2)
{
List<AddressCity> cityList = new List<AddressCity>();
if (len > address.Length)
return cityList; // 避免超索引 直接退
int index = address.IndexOf(type);
string name = address;// 对于一些省略了省、市的 默认直接使用字符去查一遍
if (index > 0)
{
if (index - len < 0)
return cityList;
// 截取名称
name = address.Substring(index - len, len);
// 如果第一个字符不是中文 则直接退出
if (!name[0].IsChineseChar())
return cityList;
len++;
}
// 使用名称去查询数据 若没有找到 则递归
if (type == "省" || type == "自治区")
cityList = _addressCityService.Db.Queryable<AddressCity>().Where(p => p.Province.StartsWith(name)).ToList();
else
cityList = _addressCityService.Db.Queryable<AddressCity>().Where(p => p.City.StartsWith(name)).ToList();
if (cityList.Count == 0 && index > 0)
return GetAddressByCityOrProvince(address, type, len);
return cityList;
}
}
}
数据库文件
见绑定的资源