I'm trying to remove sensitive information from word-files before they are sent from our system. Below is an example of the custom properties in a file that is going to be sent. I would like to remove the contents of filePath and templateFilePath.
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="2" name="docId">
<vt:lpwstr>123</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="3" name="verId">
<vt:lpwstr>1</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="4" name="templateId">
<vt:lpwstr>321</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="5" name="fileId">
<vt:lpwstr>123</vt:lpwstr>
</property>
<property fmtid="{D5C开发者_如何转开发DD505-2E9C-101B-9397-08002B2CF9AE}" pid="6" name="filePath">
<vt:lpwstr>I want to remove this</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="7" name="templateFilePath">
<vt:lpwstr>I want to remove this</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="8" name="filePathOneNote">
<vt:lpwstr>\</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="9" name="fileName">
<vt:lpwstr>test.docx</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="10" name="comment">
<vt:lpwstr>Test comment</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="11" name="sourceId">
<vt:lpwstr>12345</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="12" name="module">
<vt:lpwstr>Document</vt:lpwstr>
</property>
</Properties>
I got this code from the Open XML SDK productivity tool:
private static void ChangeCustomFilePropertiesPart(CustomFilePropertiesPart customFilePropertiesPart)
{
CustomProperties.Properties properties = customFilePropertiesPart.Properties;
CustomProperties.CustomDocumentProperty customDocumentProperty1 = properties.Elements<CustomProperties.CustomDocumentProperty>().ElementAt(4);
CustomProperties.CustomDocumentProperty customDocumentProperty2 = properties.Elements<CustomProperties.CustomDocumentProperty>().ElementAt(5);
VariantTypes.VTLPWSTR vTLPWSTR1 = customDocumentProperty1.GetFirstChild<VariantTypes.VTLPWSTR>();
vTLPWSTR1.Text = "";
VariantTypes.VTLPWSTR vTLPWSTR2 = customDocumentProperty2.GetFirstChild<VariantTypes.VTLPWSTR>();
vTLPWSTR2.Text = "";
}
But I can't trust that the properties I want to remove are number four and five, so I have to find them by the name attribute before I remove the text. Can anyone help me? I would like to use linq or the Open XML SDK in some way.
Thanks!
You don't to query by @pid
, as that may change. Query by @name
instead, which will always remain the same value for custom document properties; so in your case just use a Lambda to query Where @name = "templateFilePath" and then set it's .Value
to nothing and copy that back and save.
Here is the code I came up with:
private void ChangeCustomFilePropertiesPart(CustomFilePropertiesPart customFilePropertiesPart)
{
var props = from n in customFilePropertiesPart.Properties.Elements<CustomProperties.CustomDocumentProperty>()
where n.Name == "filePath" || n.Name == "templateFilePath"
select n;
foreach (var prop in props)
{
VariantTypes.VTLPWSTR value = prop.GetFirstChild<VariantTypes.VTLPWSTR>();
value.Text = "";
}
}
精彩评论