编辑代码

<?php
// $address = "身份证号:13012519910927226x  收货人吴彦祖收货地址:河北省石家庄市长安区中山东路888号万达广场1001  050000  13211006666";
$address = "湖南省常德市澧县码头铺镇杨家坊街道 高潇潇 18676867026";
$addressArr = getDetail($address);
echo '<pre>';
print_r($addressArr);

function getDetail(string $address){
   
    //解析结果
    $parse = [];
    $parse['name']     = '';
    $parse['mobile']   = '';  
    $parse['province'] = '';
    $parse['city']     = '';
    $parse['area']     = '';
    $parse['address']  = '';


    //1. 过滤掉收货地址中的常用说明字符,排除干扰词
    $search = ['收货地址', '地址', '收货人', '收件人', '收货', '邮编', '电话', '身份证号码', '身份证号', '身份证', ':', ':', ';', ';', ',', ',', '。', ];
    $replace = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
    $address = str_replace($search, $replace, $address);

    //2. 连续2个或多个空格替换成一个空格
    $address = preg_replace('/ {2,}/', ' ', $address);

    //3. 去除手机号码中的短横线 如136-3333-6666 主要针对苹果手机
    $address = preg_replace('/(\d{3})-(\d{4})-(\d{4})/', '$1$2$3', $address);

 

    //5. 提取11位手机号码或者7位以上座机号
    preg_match('/\d{7,11}|\d{3,4}-\d{6,8}/', $address, $match);
    if ($match && $match[0]) {
        $parse['mobile'] = $match[0];
        $address = str_replace($match[0], '', $address);
    }
 

    //再次把2个及其以上的空格合并成一个,并首位TRIM
    $address = trim(preg_replace('/ {2,}/', ' ', $address));

    //按照空格切分 长度长的为地址 短的为姓名 因为不是基于自然语言分析,所以采取统计学上高概率的方案
    $split_arr = explode(' ', $address);
    if (count($split_arr) > 1) {
        $parse['name'] = $split_arr[0];
        foreach ($split_arr as $value) {
            if (strlen($value) < strlen($parse['name'])) {
                $parse['name'] = $value;
            }
        }
        $address = trim(str_replace($parse['name'], '', $address));
    }
    // $parse['detail'] = $address;

    // 省市区提取
    preg_match('/(.*?(省|自治区|北京|天津|上海|重庆))/', $address, $matches);
    if (count($matches) > 1) {
        $parse['province'] = $matches[count($matches) - 2];
        $address = preg_replace('/(.*?(省|自治区|北京|天津|上海|重庆))/', '', $address, 1);
    }

    preg_match('/(.*?(市|自治州|地区|区划|县))/', $address, $matches);
    if (count($matches) > 1) {
        $parse['city'] = $matches[count($matches) - 2];
        $address = str_replace($parse['city'], '', $address);
    }

    preg_match('/(.*?(区|县|镇|乡|街道))/', $address, $matches);
    if (count($matches) > 1) {
        $parse['area'] = $matches[count($matches) - 2];
        $parse['address'] = str_replace($parse['area'], '', $address);
    }


    return	$parse;
}