提问人:Chris Barr 提问时间:8/24/2023 最后编辑:ThomasChris Barr 更新时间:8/24/2023 访问量:84
如何开始在 JavaScript 中解析二进制文件?
How can I get started parsing binary files in JavaScript?
问:
我有一些二进制文件,我希望能够在浏览器中解析。我发现了一些 python 代码(我认为)完全可以满足我的需要,但我对 python 的了解还不够,无法解释我所看到的。
我在自己的代码存储库中有一些示例文件,下面是我解析这些文件的尝试。您可以将文件拖到代码段窗口中以解析该文件
const elBody = document.body;
const dragClass = "drag-over";
const fReader = new FileReader();
fReader.onload = function (e) {
const data = e.target.result;
processFile(data);
};
elBody.addEventListener("dragover", (dragEvent) => {
dragEvent.preventDefault();
if (!elBody.classList.contains(dragClass)) {
elBody.classList.add(dragClass);
}
});
elBody.addEventListener("dragleave", () => {
elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
dropEvent.preventDefault();
elBody.classList.remove(dragClass);
[...dropEvent.dataTransfer.items].forEach((item, i) => {
if (item.kind === "file") {
const file = item.getAsFile();
fReader.readAsArrayBuffer(file, 'utf-8');
}
});
});
function processFile(arrayBuffer){
const byteLength = 4;
const b1 = new Int32Array(arrayBuffer.slice(0, byteLength))[0]
console.log(b1)
// for (let startIdx = 0; startIdx < arrayBuffer.byteLength; startIdx += byteLength) {
// const byte = new Int32Array(arrayBuffer.slice(startIdx, startIdx + byteLength))[0]
// console.log(byte)
// }
}
html,body {height: 100vh;margin: 0;}
.drag-over {background-color: #ccc;}
<h1>Drop a file here</h1>
我只是不确定我是否走在正确的轨道上。有关这些类型文件的结构的唯一信息来源来自 python 文件中的这些注释
如何获取有关文件格式的信息并将其转换为在 JavaScript 中执行相同操作的方法?
答:
0赞
mplungjan
8/24/2023
#1
这在您的第一个示例文件中对我有用。
在西班牙的那个有翅膀的就更不用说了
如果你需要解析文本,它可以很容易地实现。
数据视图在这里很有用
const processFile = (arrayBuffer) => {
let offset = 0;
const dataView = new DataView(arrayBuffer);
// Read the first 4 bytes as a 32-bit integer; true denotes little-endian
const firstInteger = dataView.getInt32(offset, true);
console.log("First integer:", firstInteger);
// Increment the offset by 4 bytes
offset += 4;
// Loop through the buffer and read bytes based on the context
let outputString = "";
while (offset < arrayBuffer.byteLength) {
// Read one byte to identify the character set
const char = dataView.getInt8(offset);
offset += 1;
// If the char code is a printable ASCII character, append to output string
if (char >= 32 && char <= 126) {
outputString += String.fromCharCode(char);
}
// If the char code corresponds to a newline, add a newline character
else if (char === 10) {
outputString += "\n";
}
// For other cases, handle them based on your specific needs
// for example your paragraph markers
}
outputString = outputString
.replace(/%(\w+)/g,"<h2>$1</h2>")
document.getElementById("output").innerHTML = outputString;
};
html,
body {
height: 100vh;
margin: 0;
}
.drag-over {
background-color: #ccc;
}
pre { margin-bottom: 120px; }
<h1>Drop a file here</h1>
<pre id="output"></pre>
<hr/>
<script>
// file reader code - no modified except to add identifiers
const fReader = new FileReader();
fReader.onload = function(e) {
const data = e.target.result;
processFile(data);
};
const dragClass = "drag-over"
elBody = document.querySelector("h1");
elBody.addEventListener("dragover", (dragEvent) => {
dragEvent.preventDefault();
if (!elBody.classList.contains(dragClass)) {
elBody.classList.add(dragClass);
}
});
elBody.addEventListener("dragleave", () => {
elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
dropEvent.preventDefault();
elBody.classList.remove(dragClass);
[...dropEvent.dataTransfer.items].forEach((item, i) => {
if (item.kind === "file") {
const file = item.getAsFile();
fReader.readAsArrayBuffer(file, 'utf-8');
}
});
});
</script>
数据视图的替代方案
const processFile = (arrayBuffer) => {
let offset = 0;
const outputArr = [];
const firstInteger = new Int32Array(arrayBuffer.slice(offset, offset + 4))[0];
console.log("First integer:", firstInteger);
offset += 4;
const int8View = new Int8Array(arrayBuffer);
for (; offset < int8View.length; offset++) {
const char = int8View[offset];
if (char >= 32 && char <= 126) {
outputArr.push(String.fromCharCode(char));
} else if (char === 10) {
outputArr.push("\n");
}
}
document.getElementById("output").innerHTML = outputArr
.join("")
.replace(/%(\w+)/g, "<h2>$1</h2>");
};
html,
body {
height: 100vh;
margin: 0;
}
.drag-over {
background-color: #ccc;
}
pre {
margin-bottom: 120px;
}
<h1>Drop a file here</h1>
<pre id="output"></pre>
<hr/>
<script>
// file reader code - no modified except to add identifiers
const fReader = new FileReader();
fReader.onload = function(e) {
const data = e.target.result;
processFile(data);
};
const dragClass = "drag-over"
elBody = document.querySelector("h1");
elBody.addEventListener("dragover", (dragEvent) => {
dragEvent.preventDefault();
if (!elBody.classList.contains(dragClass)) {
elBody.classList.add(dragClass);
}
});
elBody.addEventListener("dragleave", () => {
elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
dropEvent.preventDefault();
elBody.classList.remove(dragClass);
[...dropEvent.dataTransfer.items].forEach((item, i) => {
if (item.kind === "file") {
const file = item.getAsFile();
fReader.readAsArrayBuffer(file, 'utf-8');
}
});
});
</script>
评论
1赞
Chris Barr
8/24/2023
哇,效果很好,谢谢!我将不得不解决这个问题,看看其他数据是如何在这个文件中拆分的
0赞
mplungjan
8/24/2023
我对第二个文件(西班牙语)印象不深,似乎它可能是 RTF?
1赞
Chris Barr
8/24/2023
是的,您可以在该存储库中查找我的非二进制解析解决方案,该解决方案非常丑陋。不是 RTF 数据。许多其他文件格式都选择XML,我更喜欢使用它!
1赞
Chris Barr
8/29/2023
我只想再次说声谢谢!我刚刚用它作为启动器重写了我的库。如果你好奇,可以在这里查看,并查看描述文件格式的大注释,以分解描述接下来内容的长度或类型的某些字节的含义:github.com/FiniteLooper/SongShowPlus-parser
1赞
mplungjan
8/29/2023
@ChrisBarr 不错的项目。结构良好,有测试。起首🎩部分
评论