XML 文件管理器按标记值划分到子文件夹中

XML file organizer into sub folders by tag value

提问人:thiago gentil 提问时间:2/28/2023 最后编辑:thiago gentil 更新时间:3/1/2023 访问量:88

问:

简而言之:我正在做一个项目,我需要通过其标签内的值来分隔不同文件夹中的 XML 文件。

具体情况:我是使用 Python 的新手。此特定脚本考虑在 XML 文件(相关文件是“NF-E 4.00”巴西电子发票)中查找标记,此标记通知付款类型。我想将文件夹中包含的文件分开,并将它们分为两个文件夹:现金支付和其他文件夹。我将发送有关标签的文档的链接:

谷歌

本文档为葡萄牙语;但是,在使用Google Chrome浏览器提供的Google翻译器时,我发现它很容易用英语理解。

我真正想要的是让代码开始识别标签内的值,并停止将所有文件传递到卡片文件夹。

我的代码:

"""
This script reads XML files in a folder and moves the files to different
folders based on the value of the <tPag> tag.
Files with the value 01 in <tPag> are moved to the 'money' folder
and files with other values are moved to the 'cards' folder.
The path of the input and output folders can be modified by changing the
constants INPUT_FOLDER, OUTPUT_FOLDER_TPAG_1, and OUTPUT_FOLDER_TPAG_NOT_1.
"""
import os
import shutil
import xml.etree.ElementTree as ET

# folder where the input XML files are located
INPUT_FOLDER = r"C:\Exemple\input\folder"

# folder where files with value 01 in the <tPag> tag will be saved
OUTPUT_FOLDER_TPAG_1 = r"C:\Exemple\output\money\folder"

# folder where files with values other than 01 in the <tPag> tag will be saved
OUTPUT_FOLDER_TPAG_NOT_1 = r"C:\Exemple\output\card\folder"

# Iterate through all files in the input folder
for filename in os.listdir(INPUT_FOLDER):
    # Check if the file is an XML file
    if filename.endswith(".xml"):
        # Create the full path of the file
        xml_file = os.path.join(INPUT_FOLDER, filename)
        try:
            # Parse the XML file
            tree = ET.parse(xml_file)
            root = tree.getroot()
            # Check if there is a <tPag> tag with a value equal to 01
            TPAG_1_FOUND = False
            for child in root.findall("TNFe/infNFe/pag/detPag/tPag"):
                # Get the value of the tag
                tpag_value = child.text
                # Check if the value of the tag is 1
                if tpag_value == "01":
                    TPAG_1_FOUND = True
                    break
            # Move the file to the corresponding folder
            if TPAG_1_FOUND:
                output_file = os.path.join(OUTPUT_FOLDER_TPAG_1, filename)
                shutil.move(xml_file, output_file)
            else:
                output_file = os.path.join(OUTPUT_FOLDER_TPAG_NOT_1, filename)
                shutil.move(xml_file, output_file)
        except ET.ParseError as e:
            # If there is an error while parsing the file, display an error message
            print(f"Error parsing file {xml_file}")
# Display a completion message when the task is finished
print("Task completed!")

终端只显示:任务已完成! 然后它将所有文件传递到卡文件夹,没有文件进入资金文件夹。

我试图更改标签的路径。我创建了一个异常,该异常在任何发票中都找不到 tPAg 值(尽管在所有发票中都存在)。当我删除异常和错误消息时,它只是将常量TPAG_1_FOUND标记为其他,并将 XML 文件抛出到卡片文件夹中。

这是我正在处理的 XML 文件的示例:

    <?xml version="1.0"?>
<nfeProc xmlns="http://www.portalfiscal.inf.br/nfe" versao="4.00">
    <NFe xmlns="http://www.portalfiscal.inf.br/nfe">
        <infNFe versao="4.00" Id="example">
            <ide>
                <cUF>example</cUF>
                <cNF>example</cNF>
                <natOp>example</natOp>
                <mod>example</mod>
                <serie>example</serie>
                <nNF>example</nNF>
                <dhEmi>example</dhEmi>
                <tpNF>example</tpNF>
                <idDest>example</idDest>
                <cMunFG>example</cMunFG>
                <tpImp>example</tpImp>
                <tpEmis>example</tpEmis>
                <cDV>example</cDV>
                <tpAmb>example</tpAmb>
                <finNFe>example</finNFe>
                <indFinal>example</indFinal>
                <indPres>example</indPres>
                <procEmi>example</procEmi>
                <verProc>example</verProc>
            </ide>
            <emit>
                <CNPJ>example</CNPJ>
                <xNome>example</xNome>
                <xFant>example</xFant>
                <enderEmit>
                    <xLgr>example</xLgr>
                    <nro>example</nro>
                    <xCpl>example</xCpl>
                    <xBairro>example</xBairro>
                    <cMun>example</cMun>
                    <xMun>example</xMun>
                    <UF>example</UF>
                    <CEP>example</CEP>
                    <cPais>example</cPais>
                    <xPais>example</xPais>
                    <fone>example</fone>
                </enderEmit>
                <IE>example</IE>
                <CRT>example</CRT>
            </emit>
            <det nItem="1">
                <prod>
                    <cProd>example</cProd>
                    <cEAN>example</cEAN>
                    <xProd>example</xProd>
                    <NCM>example</NCM>
                    <CFOP>example</CFOP>
                    <uCom>example</uCom>
                    <qCom>0example</qCom>
                    <vUnCom>example</vUnCom>
                    <vProd>example</vProd>
                    <cEANTrib>example</cEANTrib>
                    <uTrib>example</uTrib>
                    <qTrib>example</qTrib>
                    <vUnTrib>example</vUnTrib>
                    <indTot>example</indTot>
                </prod>
                <imposto>
                    <ICMS>
                        <ICMSSN102>
                            <orig></orig>
                            <CSOSN>example</CSOSN>
                        </ICMSSN102>
                    </ICMS>
                    <PIS>
                        <PISAliq>
                            <CST></CST>
                            <vBC>example</vBC>
                            <pPIS>example</pPIS>
                            <vPIS>example</vPIS>
                        </PISAliq>
                    </PIS>
                    <COFINS>
                        <COFINSAliq>
                            <CST>example</CST>
                            <vBC>example</vBC>
                            <pCOFINS>example</pCOFINS>
                            <vCOFINS>example</vCOFINS>
                        </COFINSAliq>
                    </COFINS>
                </imposto>
            </det>
            <total>
                <ICMSTot>
                    <vBC>example</vBC>
                    <vICMS>example</vICMS>
                    <vICMSDeson>example</vICMSDeson>
                    <vFCPUFDest>example</vFCPUFDest>
                    <vICMSUFDest>example</vICMSUFDest>
                    <vICMSUFRemet>example</vICMSUFRemet>
                    <vFCP>example</vFCP>
                    <vBCST>example</vBCST>
                    <vST>example</vST>
                    <vFCPST>example</vFCPST>
                    <vFCPSTRet>example</vFCPSTRet>
                    <vProd>5.80</vProd>
                    <vFrete>example</vFrete>
                    <vSeg>example</vSeg>
                    <vDesc>example</vDesc>
                    <vII>example</vII>
                    <vIPI>example</vIPI>
                    <vIPIDevol>example</vIPIDevol>
                    <vPIS>example</vPIS>
                    <vCOFINS>example</vCOFINS>
                    <vOutro>example</vOutro>
                    <vNF>5.80</vNF>
                </ICMSTot>
            </total>
            <transp>
                <modFrete>example</modFrete>
            </transp>
            <pag>
                <detPag>
                    <indPag>0</indPag>
                    <tPag>01</tPag>
                    <vPag>5.80</vPag>
                </detPag>
            </pag>
            <infAdic>
                <infCpl>example</infCpl>
            </infAdic>
            <infRespTec>
                <CNPJ>example</CNPJ>
                <xContato>example</xContato>
                <email>example</email>
                <fone>example</fone>
            </infRespTec>
        </infNFe>
        <infNFeSupl>
            <qrCode>example</qrCode>
            <urlChave>example</urlChave>
        </infNFeSupl>
        <Signature xmlns="http://www.w3.org/2000/09/xmldsig#">
            <SignedInfo>
                <CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>
                <SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1"/>
                <Reference URI="example">
                    <Transforms>
                        <Transform Algorithm="http://www.w3.org/2000/09/xmldsig#enveloped-signature"/>
                        <Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>
                    </Transforms>
                    <DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1"/>
                    <DigestValue>example</DigestValue>
                </Reference>
            </SignedInfo>
            <SignatureValue>example</SignatureValue>
            <KeyInfo>
                <X509Data>
                    <X509Certificate>example</X509Certificate>
                </X509Data>
            </KeyInfo>
        </Signature>
    </NFe>
    <protNFe versao="4.00">
        <infProt Id="NFe00">
            <tpAmb>example</tpAmb>
            <verAplic>example</verAplic>
            <chNFe>example</chNFe>
            <dhRecbto>example</dhRecbto>
            <nProt>example</nProt>
            <digVal>example</digVal>
            <cStat>example</cStat>
            <xMotivo>example</xMotivo>
        </infProt>
    </protNFe>
</nfeProc>
python-3.x xml xml 解析

评论

0赞 LMC 2/28/2023
确保找到该元素,在代码之外进行测试。ElementTree 使用相对 xpaths,我相信它可能是或root.findall("TNFe/infNFe/pag/detPag/tPag")root.findall("./TNFe/infNFe/pag/detPag/tPag")root.findall(".//TNFe/infNFe/pag/detPag/tPag")
0赞 thiago gentil 2/28/2023
感谢您的提示,我无法找到所需的元素,我将继续尝试找到标签的正确路径。我用我正在使用的相同 XML 之一尝试了这个:import xml.etree.ElementTree as ET # create the XML root object tree = ET.parse('test.xml') root = tree.getroot() # find all occurrences of the 'tPag' tag tpags = root.findall(".//TNFe/infNFe/pag/detPag/tPag") # check if any tags were found if len(tpags) > 0: print(f"{len(tpags)} <tPag> tags were found.") else: print("No <tPag> tags were found.")
0赞 thiago gentil 2/28/2023
并始终在终端中接收No <tPag> tags were found.
0赞 LMC 2/28/2023
XML 的根元素是什么?是否有任何命名空间?最好发布一个最小的文件示例。
0赞 thiago gentil 2/28/2023
我刚才添加了 xml 文件,我认为正确的方法是:但仍然不能使用原始的 NF-e XML,我制作了一个只有 6 个元素进行测试并且效果很好。tpags = root.findall("nfeProc/NFe/infNFe/pag/tPag")

答:

0赞 Fanchen Bao 3/1/2023 #1

正如@LMC在注释中建议的那样,在搜索元素时需要指定命名空间。有关命名空间的详细信息,请查看文档tPag

脚本的可能解决方案如下所示,仅更改为一行。请注意,我还使用 XPath 语法来简化对所有元素的搜索。但是,如果文件中有多个元素并且您必须依赖特定的元素,则此语法可能不起作用。tPagtPag

"""
This script reads XML files in a folder and moves the files to different
folders based on the value of the <tPag> tag.
Files with the value 01 in <tPag> are moved to the 'money' folder
and files with other values are moved to the 'cards' folder.
The path of the input and output folders can be modified by changing the
constants INPUT_FOLDER, OUTPUT_FOLDER_TPAG_1, and OUTPUT_FOLDER_TPAG_NOT_1.
"""
import os
import shutil
import xml.etree.ElementTree as ET

# folder where the input XML files are located
INPUT_FOLDER = r"."

# folder where files with value 01 in the <tPag> tag will be saved
OUTPUT_FOLDER_TPAG_1 = r"./pages"

# folder where files with values other than 01 in the <tPag> tag will be saved
OUTPUT_FOLDER_TPAG_NOT_1 = r"."

# Iterate through all files in the input folder
for filename in os.listdir(INPUT_FOLDER):
    # Check if the file is an XML file
    if filename.endswith(".xml"):
        # Create the full path of the file
        xml_file = os.path.join(INPUT_FOLDER, filename)
        try:
            # Parse the XML file
            tree = ET.parse(xml_file)
            root = tree.getroot()
            # Check if there is a <tPag> tag with a value equal to 01
            TPAG_1_FOUND = False
            for child in root.findall(".//{http://www.portalfiscal.inf.br/nfe}tPag"):  # <-- add namespace and use XPath syntax
                # Get the value of the tag
                tpag_value = child.text
                # Check if the value of the tag is 1
                if tpag_value == "01":
                    TPAG_1_FOUND = True
                    break
            # Move the file to the corresponding folder
            if TPAG_1_FOUND:
                output_file = os.path.join(OUTPUT_FOLDER_TPAG_1, filename)
                shutil.move(xml_file, output_file)
            else:
                output_file = os.path.join(OUTPUT_FOLDER_TPAG_NOT_1, filename)
                shutil.move(xml_file, output_file)
        except ET.ParseError as e:
            # If there is an error while parsing the file, display an error message
            print(f"Error parsing file {xml_file}")
# Display a completion message when the task is finished
print("Task completed!")

评论

0赞 thiago gentil 3/1/2023
感谢您的帮助,我设法完成并更正了代码。
0赞 thiago gentil 3/1/2023 #2

在阅读文档并指定 namescapeURI 后,我需要重写代码,并设法解决了这个问题。

"""
Script that organizes XML files based on the payment method used.

Files are moved to specific folders depending on the payment method specified in the XML file.

"""

import shutil
import os
import xml.etree.ElementTree as ET

# Specifies the directory where the XML files are located
DIRECTORY = r"C:\Users\example\input\folder"

# Loop through the files in the directory
for filename in os.listdir(DIRECTORY):
   if filename.endswith('.xml'):  # Check if it is an XML file
       # Get the full path of the file
       file_path = os.path.join(DIRECTORY, filename)

       # Read the XML file
       tree = ET.parse(file_path)
       root = tree.getroot()

       # Get the name of the file
       file_name = 'NFe' + \
           root.find(
               ".//{http://www.portalfiscal.inf.br/nfe}nNF").text + '.xml'

       # Check the value of the tPag element to determine the destination folder
       if root.find(".//{http://www.portalfiscal.inf.br/nfe}tPag").text == '01':
           DESTINATION = r"C:\Users\example\folder\money"
       else:
           DESTINATION = r"C:\Users\example\folder\outher"

       # Move the file to the correct destination folder
       shutil.move(file_path, os.path.join(DESTINATION, file_name))