PowerQuery Commands
Combine transaction txt, pdf etc files in a folder, move header rows into transaction line rows, filter-out unwanted rows
Let
'this is the auto load of the text files in folder
Source = Folder.Files("D:\PiT\3. Clients\Banks\client_name\BAS data\AP txt files"),
#"Filtered Hidden Files1" = Table.SelectRows(Source, each [Attributes]?[Hidden]? <> true),
#"Invoke Custom Function1" = Table.AddColumn(#"Filtered Hidden Files1", "Transform File", each #"Transform File"([Content])),
#"Renamed Columns1" = Table.RenameColumns(#"Invoke Custom Function1", {"Name", "Source.Name"}),
#"Removed Other Columns1" = Table.SelectColumns(#"Renamed Columns1", {"Source.Name", "Transform File"}),
#"Expanded Table Column1" = Table.ExpandTableColumn(#"Removed Other Columns1", "Transform File", Table.ColumnNames(#"Transform File"(#"Sample File"))),
#"Changed Type" = Table.TransformColumnTypes(#"Expanded Table Column1",{{"Source.Name", type text}, {"Column1", type text}, {"Column2", type text}, {"Column3", type text}, {"Column4", type text}, {"Column5", type number}}),
#"Remove blank rows" = Table.SelectRows(#"Changed Type", each ([Column1] <> "")),
'remove other filler rows that aren't containing data
#"Remove spacer rows" = Table.SelectRows(#"Remove blank rows", each not Text.Contains([Column1], "===========") and not Text.Contains([Column1], "-----------------") and not Text.Contains([Column1], "___________") and not Text.Contains([Column1], "Batch") and not Text.Contains([Column1], "Morris Finance Ltd") and not Text.Contains([Column1], "Creditors Batch Report") and not Text.Contains([Column1], "Totals ") and not Text.Contains([Column1], "Auth. Variance") and not Text.Contains([Column1], "Age Order") and not Text.Contains([Column1], " Entered")),
'Delete columns not selected
#"Removed Other Columns" = Table.SelectColumns(#"Remove spacer rows",{"Source.Name", "Column1"}),
'Add a column and include the header name if it does not contains the identifier " 001". The line data starts with this hence they are null and remainder are duplicate of the header text
#"Duplicate for Headers" = Table.AddColumn(#"Removed Other Columns", "Headers", each if Text.Contains([Column1], " 001 ") then null else [Column1]),
'Fill down the header information. We then have 2 columns, one with the header and line info, one with the headers that repeat for each transaction
#"Fill down" = Table.FillDown(#"Duplicate for Headers",{"Headers"}),
'The header data contains a '**', so only keep rows that do not have that text
#"Remove header from Col1" = Table.SelectRows(#"Fill down", each not Text.Contains([Column1], "**")),
'Drag to put headers before Column 1, then rename column 1 to 'Line detail', then rename source name to BAS Period (it contains the file names that include the period)
#"Reordered Columns" = Table.ReorderColumns(#"Remove header from Col1",{"Source.Name", "Headers", "Column1"}),
#"Renamed Column1" = Table.RenameColumns(#"Reordered Columns",{{"Column1", "Line Detail"}}),
#"Renamed Source.Name" = Table.RenameColumns(#"Renamed Column1",{{"Source.Name", "BAS Period"}}),
'Extract only the BAS period from the file name and change data type
#"Extracted First Characters" = Table.TransformColumns(#"Renamed Source.Name", {{"BAS Period", each Text.Start(_, 8), type text}}),
#"Changed Type1" = Table.TransformColumnTypes(#"Extracted First Characters",{{"BAS Period", type date}}),
'I think this next bit of code filters out rows with text "Opening" or "Closing"
#"Filtered Rows" = Table.SelectRows(#"Changed Type1", each true)
in
#"Filtered Rows"
= Table.SelectRows(#"Changed Type1", each not Text.Contains([Date], "Opening") and not Text.Contains([Date], "Closing"))
Partners In Tax
Collaborative approach to improve business outcomes.
Contact
stephen@partnersintax.com.au
© 2024. All rights reserved.

