从文本字符串中提取数字组并分成几列

Extracting groups of numbers from a text string and separate into columns

提问人:3Dguy 提问时间:11/9/2023 最后编辑:Ron Rosenfeld3Dguy 更新时间:11/9/2023 访问量:49

问:

我找到了一个执行此任务的脚本,但是它看起来很慢并且不处理小数点。我使用的是 mac,无法使用正则表达式。

数据集(相当混乱)的示例如下

$2900 + Slab ($550) + P/Prep ($350) + Xover ($500)
$2600 + Pave Prep ($350) + Slab ($550) + Crossover ($500)
$2900 + $350 P/Prep + $500 Xover
2800+Slab$480+PavePrep$315+ Pave Prep$7.00+XOver$450
$3350(inc prep up to 50m2)+Slab$650+Extra P/prep$224+Xover$450
WM02-3050 XO-450 PPEX-128 SS-650
WM01-2850 XO-450 SS-650
XO-450 PPEX-307.68 SS-650 WM03-3350 DRYLINE-0
XO-450 PPEX-126.08 SS-650 WM01-2850

不需要作为标识符的数字,例如。WM01, WM03, 50m2 乐于删除小数字 <10

希望输出是

2900 550 350 500
2600 350 550 500
2900 350 500
2800 480 315 7 450
3350 650 224 450
3050 450 128 650
2850 450 650
450 307.68 3350
450 126.08 650 2850

然后,我可以使用文本到列功能。 谢谢

Function NDigits(ByVal SourceString As String, _
    Optional ByVal NumberOfDigits As Long = 0, _
    Optional ByVal TargetDelimiter As String = " ") As String

Dim i As Long         ' SourceString Character Counter
Dim strDel As String  ' Current Target String

' Check if SourceString is empty (""). Exit if. NDigits = "".
If SourceString = "" Then Exit Function

' Loop through characters of SourceString.
For i = 1 To Len(SourceString)
    ' Check if current character is not a digit (#), then replace with " ".
    If Not Mid(SourceString, i, 1) Like "#" Then _
            Mid(SourceString, i, 1) = " "
Next

' Note: While VBA's Trim function removes spaces before and after a string,
'       Excel's Trim function additionally removes redundant spaces, i.e.
'       doesn't 'allow' more than one space, between words.
' Remove all spaces from SourceString except single spaces between words.
strDel = Application.WorksheetFunction.Trim(SourceString)

' Check if current TargetString is empty (""). Exit if. NDigits = "".
If strDel = "" Then Exit Function

' Replace (Substitute) " " with TargetDelimiter if it is different than
' " " and is not a number (#).
If TargetDelimiter <> " " And Not TargetDelimiter Like "#" Then
    strDel = WorksheetFunction.Substitute(strDel, " ", TargetDelimiter)
End If

' Check if NumberOfDigits is greater than 0.
If NumberOfDigits > 0 Then

    Dim vnt As Variant  ' Number of Digits Array (NOD Array)
    Dim k As Long       ' NOD Array Element Counter

    ' Write (Split) Digit Groups from Current Target String to NOD Array.
    vnt = Split(strDel, TargetDelimiter)
    ' Reset NOD Array Element Counter to -1, because NOD Array is 0-based.
    k = -1
    ' Loop through elements (digit groups) of NOD Array.
    For i = 0 To UBound(vnt)
        ' Check if current element has number of characters (digits)
        ' equal to NumberOfDigits.
        If Len(vnt(i)) = NumberOfDigits Then
           ' Count NOD Array Element i.e. prepare for write.
           k = k + 1
           
           ' Write i-th element of NOD Array to k-th element.
           ' Note: Data (Digit Groups) are possibly being overwritten.
           vnt(k) = vnt(i)
        End If
    Next
    ' Check if no Digit Group of size of NumberOfDigits was found.
    ' Exit if. NDigits = "".
    If k = -1 Then Exit Function
    ' Resize NOD Array to NOD Array Element Count, possibly smaller,
    ' due to fewer found Digit Groups with the size of NumberOfDigits.
    ReDim Preserve vnt(k)
    ' Join elements of NOD Array to Current Target String.
    strDel = Join(vnt, TargetDelimiter)
End If

' Write Current Target String to NDigits.
NDigits = strDel

End Function
Excel VBA 数字 提取

评论

0赞 taller 11/9/2023
你有一个规则.为什么在输出中?我想这是一个很难处理的部分。remove small numbers <107(inc prep up to 50m2)
0赞 Black cat 11/9/2023
Don't need the numbers that are identifier eg. WM01, WM03同时,在您所需的结果中,这些数字也会显示出来吗? 在上一行的最后一行中缺失。真正需要的是什么?SS-650

答:

0赞 taller 11/9/2023 #1

注意:RegExp 模式已针对提供的示例数据进行了测试。您可能需要对其进行微调以准确匹配您的特定数据集。

Option Explicit
Sub GetDigits()
    Dim objRegEx As Object, sRes As String
    Dim objMH As Object, rngData As Range
    Dim arrData, arrRes()
    Dim i As Long, j As Integer
    ' Create RegExp object
    Set objRegEx = CreateObject("vbscript.regexp")
    objRegEx.Pattern = "(^|[\s\+])\D*(\d+\-)*(\d{2,}(\.\d+)*)(?![A-Z])"
    ' Case-insensitive
    objRegEx.IgnoreCase = True
    objRegEx.Global = True
    ' Load source data
    Set rngData = Range("A1", Cells(Rows.Count, "A").End(xlUp))
    arrData = rngData.Value
    ReDim arrRes(1 To UBound(arrData), 1 To 1)
    ' Loop through data
    For i = LBound(arrData) To UBound(arrData)
        sRes = ""
        ' RegExp matching
        Set objMH = objRegEx.Execute(Trim(arrData(i, 1)))
        If objMH.Count > 0 Then
            ' Collect match result
            For j = 0 To objMH.Count - 1
                sRes = sRes & Chr(32) & objMH(j).submatches(2)
            Next
            arrRes(i, 1) = Mid(sRes, 2)
        End If
    Next
    ' Write output to sheet
    With rngData.Offset(0, 1)
        .ClearContents
        .Value = arrRes
    End With
    Set objMH = Nothing
    Set objRegEx = Nothing
End Sub

enter image description here

评论

0赞 3Dguy 11/10/2023
删除少量数字只是一个建议。谢谢你的帮助。找到了另一种解决这个问题的方法。非常感谢。