提问人:JokerMartini 提问时间:2/25/2021 最后编辑:JokerMartini 更新时间:2/26/2021 访问量:200
正则表达式捕获每行引号内的第一个文本组
Regex capture first text group within quotes per line
问:
我正在努力编写一个简单的荧光笔,我需要捕获所有文本,包括引号,每行的第一个单词。我该如何调整它以做到这一点?目前,这让我在引号内获得了每组单词,但是我只需要第一个。
以下是我在引号中发现的两个正则表达式(\"[^\"]*\")
(\".*?[^\\]\")
我只是尝试在 pyside 中制作一个简单的 json 语法高亮器。
import os
import sys
from PySide2 import QtCore, QtGui, QtWidgets
class SourceEditor(QtWidgets.QPlainTextEdit):
def __init__(self, parent=None):
super(SourceEditor, self).__init__(parent)
font = QtGui.QFont()
font.setFamily('Courier')
font.setFixedPitch(True)
font.setPointSize(10)
self.setFont(font)
self.highlighter = Highlighter(self.document())
class Highlighter(QtGui.QSyntaxHighlighter):
def __init__(self, parent=None):
super(Highlighter, self).__init__(parent)
self.highlightingRules = []
singleLineCommentFormat = QtGui.QTextCharFormat()
singleLineCommentFormat.setFontItalic(True)
singleLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
self.highlightingRules.append((QtCore.QRegExp("//[^\n]*"), singleLineCommentFormat))
self.multiLineCommentFormat = QtGui.QTextCharFormat()
self.multiLineCommentFormat.setFontItalic(True)
self.multiLineCommentFormat.setForeground(QtGui.QColor(115,115,115))
quotationFormat = QtGui.QTextCharFormat()
quotationFormat.setForeground(QtGui.QColor(230,145,100))
self.highlightingRules.append((QtCore.QRegExp("\"[^\"]*\""), quotationFormat))
self.commentStartExpression = QtCore.QRegExp("/\\*")
self.commentEndExpression = QtCore.QRegExp("\\*/")
def highlightBlock(self, text):
for pattern, format in self.highlightingRules:
expression = QtCore.QRegExp(pattern)
index = expression.indexIn(text)
while index >= 0:
length = expression.matchedLength()
self.setFormat(index, length, format)
index = expression.indexIn(text, index + length)
self.setCurrentBlockState(0)
startIndex = 0
if self.previousBlockState() != 1:
startIndex = self.commentStartExpression.indexIn(text)
while startIndex >= 0:
endIndex = self.commentEndExpression.indexIn(text, startIndex)
if endIndex == -1:
self.setCurrentBlockState(1)
commentLength = len(text) - startIndex
else:
commentLength = endIndex - startIndex + self.commentEndExpression.matchedLength()
self.setFormat(startIndex, commentLength, self.multiLineCommentFormat)
startIndex = self.commentStartExpression.indexIn(text, startIndex + commentLength);
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
window = SourceEditor()
style.setStyle(widget=window)
window.setPlainText('''
[
{
"group": "Simple",
"name": "Simple",
"category name": "Apps",
"icon": "Simple.svg",
"paths": [
{
"path": "notepad.exe"
}
]
},
// some comment here
{
"group": "Simple",
"name": "Simple",
"category name": "Simple",
"icon": "Simple.svg"
"paths": [
{
"path": "notepad",
"args": "notepad.py"
},
{
"path": "run.exe",
}
]
}
]
''')
window.resize(640, 512)
window.show()
sys.exit(app.exec_())
类似的问题...如何捕获没有尾随逗号的数字?(\d+),
[
{
"description": null,
"entity": {
"id": 343,
"name": "07010",
"type": "Shot"
},
"id": 1673,
"project": {
"id": 9,
"name": "test10",
}
}
]
答:
1赞
Ryszard Czech
2/26/2021
#1
使用捕获组并返回它:
^[ \t]*("[^"]*")
请参阅正则表达式证明。
解释
节点 | 解释 |
---|---|
^ |
字符串的开头 |
[ \t]* |
任意字符: ' ', '\t' (制表符) (0 次或更多次(匹配尽可能多的数量)) |
( |
分组并捕获到 \1: |
" |
'"' |
[^"]* |
除以下字符以外的任何字符:'“' (0 次或更多次(匹配尽可能多的数量)) |
" |
'"' |
) |
\1 结束 |
评论
r'(\"[^\"]*\"):'
r'^\s*("[^"]*")'
"07010"