Difference between revisions of "LGenerics"
Line 382: | Line 382: | ||
==Export to JSON== | ==Export to JSON== | ||
+ | Datasets used: a collection of simple items (with three string properties and one integer property) and an array of records whose fields contain values that coincide with the values of the collection's item properties (i.e. the resulting JSONs should be the same). The number of elements in the datasets was chosen so that the size of the resulting JSON approximately corresponded to the values (20KB, 100KB, 1MB, 10MB, 50MB). | ||
+ | |||
+ | TJSONStreamer.CollectionToJSON() was taken as a reference implementation (FpJsonStreamer for short), other members: | ||
+ | |||
+ | * PdoToJson() with a collection as parameter (PdoCollection); | ||
+ | * PdoToJson() with array as parameter and field name list registration (PdoRecordFieldMap); | ||
+ | * PdoToJson() with array as parameter and custom callback registration (PdoRecordCallback); | ||
+ | |||
+ | Each member exports the data several times (number of attempts is pre-defined), the best time is taken as the result. | ||
+ | Full benchmark code (Windows only, uses QueryPerformanceCounter): | ||
+ | |||
+ | <syntaxhighlight lang="pascal"> | ||
+ | program test; | ||
+ | |||
+ | {$MODE OBJFPC}{$H+}{$MODESWITCH ADVANCEDRECORDS} | ||
+ | {$OPTIMIZATION NOORDERFIELDS} | ||
+ | |||
+ | uses | ||
+ | Windows, Classes, SysUtils, lgPdo, FpJsonRtti, lgJson; | ||
+ | |||
+ | const | ||
+ | AlphaLen = 64; | ||
+ | Alphabet: array[0..AlphaLen-1] of Char = '1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_ '; | ||
+ | |||
+ | function RandomString(aLength: SizeInt): string; | ||
+ | var | ||
+ | I: SizeInt; | ||
+ | p: PChar; | ||
+ | begin | ||
+ | Result := ''; | ||
+ | SetLength(Result, aLength); | ||
+ | p := PChar(Result); | ||
+ | for I := 0 to Pred(aLength) do | ||
+ | p[I] := Alphabet[Random(AlphaLen)]; | ||
+ | end; | ||
+ | |||
+ | type | ||
+ | TMyObj = class(TCollectionItem) | ||
+ | private | ||
+ | FId: Int64; | ||
+ | FName, | ||
+ | FValue, | ||
+ | FInfo: string; | ||
+ | published | ||
+ | property id: Int64 read FId write FId; | ||
+ | property name: string read FName write FName; | ||
+ | property value: string read FValue write FValue; | ||
+ | property info: string read FInfo write FInfo; | ||
+ | end; | ||
+ | |||
+ | TMyRec = record | ||
+ | Id: Int64; | ||
+ | Name, | ||
+ | Value, | ||
+ | Info: string; | ||
+ | class procedure JsonWrite(r: Pointer; aWriter: TJsonStrWriter); static; | ||
+ | end; | ||
+ | |||
+ | class procedure TMyRec.JsonWrite(r: Pointer; aWriter: TJsonStrWriter); | ||
+ | type | ||
+ | PMyRec = ^TMyRec; | ||
+ | const | ||
+ | fnId = 'id'; | ||
+ | fnName = 'name'; | ||
+ | fnValue = 'value'; | ||
+ | fnInfo = 'info'; | ||
+ | begin | ||
+ | with PMyRec(r)^ do | ||
+ | aWriter | ||
+ | .BeginObject | ||
+ | .Add(fnId, Id) | ||
+ | .Add(fnName, Name) | ||
+ | .Add(fnValue, Value) | ||
+ | .Add(fnInfo, Info) | ||
+ | .EndObject; | ||
+ | end; | ||
+ | |||
+ | const | ||
+ | MinLen = 5; | ||
+ | Range = 16; | ||
+ | |||
+ | var | ||
+ | TestColl: TCollection = nil; | ||
+ | RecList: array of TMyRec; | ||
+ | |||
+ | procedure UpdateData(aElCount: Integer); | ||
+ | var | ||
+ | I, OldSize: Integer; | ||
+ | const | ||
+ | IdRange = 9007199254740991; | ||
+ | begin | ||
+ | if TestColl = nil then | ||
+ | TestColl := TCollection.Create(TMyObj); | ||
+ | OldSize := TestColl.Count; | ||
+ | SetLength(RecList, aElCount); | ||
+ | for I := OldSize to Pred(aElCount) do | ||
+ | with TMyObj(TestColl.Add) do begin | ||
+ | Id := Random(IdRange); | ||
+ | Name := RandomString(MinLen + Random(Range)); | ||
+ | Value := RandomString(MinLen + Random(Range)); | ||
+ | Info := RandomString(MinLen + Random(Range)); | ||
+ | RecList[I].Id := Id; | ||
+ | RecList[I].Name := Name; | ||
+ | RecList[I].Value := Value; | ||
+ | RecList[I].Info := Info; | ||
+ | end; | ||
+ | end; | ||
+ | |||
+ | function JsonStreamer: string; | ||
+ | begin | ||
+ | with TJSONStreamer.Create(nil) do | ||
+ | try | ||
+ | Result := CollectionToJSON(TestColl); | ||
+ | finally | ||
+ | Free; | ||
+ | end; | ||
+ | end; | ||
+ | |||
+ | function PdoCollection: string; | ||
+ | begin | ||
+ | Result := PdoToJson(TypeInfo(TestColl), TestColl); | ||
+ | end; | ||
+ | |||
+ | function PdoMap: string; | ||
+ | begin | ||
+ | Result := PdoToJson(TypeInfo(RecList), RecList); | ||
+ | end; | ||
+ | |||
+ | function PdoProc: string; | ||
+ | begin | ||
+ | Result := PdoToJson(TypeInfo(RecList), RecList); | ||
+ | end; | ||
+ | |||
+ | procedure RegisterFields; | ||
+ | begin | ||
+ | RegisterRecordFields(TypeInfo(TMyRec), ['id','name','value','info']); | ||
+ | end; | ||
+ | |||
+ | procedure RegisterCallback; | ||
+ | begin | ||
+ | RegisterRecordJsonProc(TypeInfo(TMyRec), @TMyRec.JsonWrite); | ||
+ | end; | ||
+ | |||
+ | procedure UnRegister; | ||
+ | begin | ||
+ | UnRegisterPdo(TypeInfo(TMyRec)); | ||
+ | end; | ||
+ | |||
+ | type | ||
+ | TJsonFun = function: string; | ||
+ | |||
+ | TMember = record | ||
+ | Fun: TJsonFun; | ||
+ | Name: string; | ||
+ | BeforeRun, | ||
+ | AfterRun: TProcedure; | ||
+ | end; | ||
+ | |||
+ | TRound = record | ||
+ | ElCount, | ||
+ | TryCount: Integer; | ||
+ | JsonSize: string; | ||
+ | end; | ||
+ | |||
+ | const | ||
+ | MemberList: array of TMember = ( | ||
+ | (Fun: @JsonStreamer; Name: 'FpJsonStreamer......'; BeforeRun: nil; AfterRun: nil), | ||
+ | (Fun: @PdoCollection; Name: 'PdoCollection.......'; BeforeRun: nil; AfterRun: nil), | ||
+ | (Fun: @PdoMap; Name: 'PdoRecordFieldMap...'; BeforeRun: @RegisterFields; AfterRun: @UnRegister), | ||
+ | (Fun: @PdoProc; Name: 'PdoRecordCallback...'; BeforeRun: @RegisterCallback; AfterRun: @UnRegister)); | ||
+ | |||
+ | Rounds: array of TRound = ( | ||
+ | (ElCount: 220; TryCount: 100; JsonSize: '20 KB'), | ||
+ | (ElCount: 1091; TryCount: 50; JsonSize: '100 KB'), | ||
+ | (ElCount: 10830; TryCount: 10; JsonSize: '1 MB'), | ||
+ | (ElCount: 108400; TryCount: 5; JsonSize: '10 MB'), | ||
+ | (ElCount: 542000; TryCount: 1; JsonSize: '50 MB') | ||
+ | ); | ||
+ | |||
+ | procedure Run; | ||
+ | var | ||
+ | CurrRound: TRound; | ||
+ | Member: TMember; | ||
+ | I: Integer; | ||
+ | Start, Fin, Freq, Best: Int64; | ||
+ | Elapsed, Rate: Double; | ||
+ | s: string = ''; | ||
+ | const | ||
+ | Million = 1000000; | ||
+ | begin | ||
+ | if not QueryPerformanceFrequency(Freq) or (Freq = 0) then | ||
+ | begin | ||
+ | WriteLn('Oops, something went wrong with QueryPerformanceFrequency()'); | ||
+ | WriteLn('Error: ', GetLastOsError); | ||
+ | exit; | ||
+ | end; | ||
+ | for CurrRound in Rounds do begin | ||
+ | UpdateData(CurrRound.ElCount); | ||
+ | WriteLn('-------<JSON size ', CurrRound.JsonSize, '>-------'); | ||
+ | for Member in MemberList do begin | ||
+ | if Member.BeforeRun <> nil then | ||
+ | Member.BeforeRun(); | ||
+ | Best := High(Int64); | ||
+ | for I := 1 to CurrRound.TryCount do begin | ||
+ | QueryPerformanceCounter(Start); | ||
+ | s := Member.Fun(); | ||
+ | QueryPerformanceCounter(Fin); | ||
+ | if Fin - Start < Best then | ||
+ | Best := Fin - Start; | ||
+ | end; | ||
+ | if Member.AfterRun <> nil then | ||
+ | Member.AfterRun(); | ||
+ | Elapsed := Double(Best)/Freq; | ||
+ | WriteLn(' ', Member.Name, Double2Str(Double(Round(Elapsed*Million))/1000), ' ms.'); | ||
+ | Rate := (Double(Length(s))/Million)/Elapsed; | ||
+ | WriteLn(' Rating: ', Double2Str(Double(Round(Rate*100))/100), ' MB/s.'); | ||
+ | end; | ||
+ | end; | ||
+ | end; | ||
+ | |||
+ | begin | ||
+ | Run; | ||
+ | TestColl.Free; | ||
+ | end. | ||
+ | </syntaxhighlight> | ||
+ | |||
+ | Results on author's machine: | ||
+ | <pre> | ||
+ | -------<JSON size 20 KB>------- | ||
+ | FpJsonStreamer......1.139 ms. | ||
+ | Rating: 20.41 MB/s. | ||
+ | PdoCollection.......0.135 ms. | ||
+ | Rating: 149.47 MB/s. | ||
+ | PdoRecordFieldMap...0.095 ms. | ||
+ | Rating: 212.01 MB/s. | ||
+ | PdoRecordCallback...0.06 ms. | ||
+ | Rating: 336.12 MB/s. | ||
+ | -------<JSON size 100 KB>------- | ||
+ | FpJsonStreamer......6.171 ms. | ||
+ | Rating: 18.76 MB/s. | ||
+ | PdoCollection.......0.675 ms. | ||
+ | Rating: 148.94 MB/s. | ||
+ | PdoRecordFieldMap...0.475 ms. | ||
+ | Rating: 211.85 MB/s. | ||
+ | PdoRecordCallback...0.304 ms. | ||
+ | Rating: 331.03 MB/s. | ||
+ | -------<JSON size 1 MB>------- | ||
+ | FpJsonStreamer......88.315 ms. | ||
+ | Rating: 13.04 MB/s. | ||
+ | PdoCollection.......7.062 ms. | ||
+ | Rating: 141.6 MB/s. | ||
+ | PdoRecordFieldMap...5.018 ms. | ||
+ | Rating: 199.27 MB/s. | ||
+ | PdoRecordCallback...3.317 ms. | ||
+ | Rating: 301.47 MB/s. | ||
+ | -------<JSON size 10 MB>------- | ||
+ | FpJsonStreamer......3736.737 ms. | ||
+ | Rating: 3.08 MB/s. | ||
+ | PdoCollection.......76.899 ms. | ||
+ | Rating: 130.16 MB/s. | ||
+ | PdoRecordFieldMap...56.989 ms. | ||
+ | Rating: 175.63 MB/s. | ||
+ | PdoRecordCallback...40.309 ms. | ||
+ | Rating: 248.3 MB/s. | ||
+ | -------<JSON size 50 MB>------- | ||
+ | FpJsonStreamer......86500.116 ms. | ||
+ | Rating: 0.67 MB/s. | ||
+ | PdoCollection.......379.533 ms. | ||
+ | Rating: 131.94 MB/s. | ||
+ | PdoRecordFieldMap...278.097 ms. | ||
+ | Rating: 180.07 MB/s. | ||
+ | PdoRecordCallback...192.341 ms. | ||
+ | Rating: 260.35 MB/s. | ||
+ | </pre> | ||
+ | |||
+ | It seems to work pretty fast. | ||
=Download= | =Download= |
Revision as of 13:18, 25 December 2022
About
Collection of generic algorithms and data structures for Free Pascal.
Requires: FPC 3.2+, Lazarus 1.9+.
Author: A.Koverdyaev (avk)
License: Apache License 2.0
Features
Implemented primitives
- stack (unit lgStack)
- queue (unit lgQueue)
- deque (unit lgDeque)
- vector (unit lgVector)
- vector of bits (unit lgVector)
- priority queue based on binary heap (unit lgPriorityQueue)
- priority queue with key update and melding based on pairing heap (unit lgPriorityQueue)
- sorted list (unit lgList)
- hashed list - array based list with the ability to fast search by key (unit lgList)
- hashset (unit lgHashSet)
- fine-grained concurrent hashset (unit lgHashSet)
- sorted set (unit lgTreeSet)
- set of arbitrary size (unit lgUtil, TGSet)
- hash multiset (unit lgHashMultiSet)
- fine-grained concurrent hashmultiset (unit lgHashMultiSet)
- sorted multiset (unit lgTreeMultiSet)
- hashmap (unit lgHashMap)
- fine-grained concurrent hashmap (unit lgHashMap)
- sorted map (unit lgTreeMap)
- hash multimap (unit lgMultiMap)
- tree multimap (unit lgMultiMap)
- list miltimap (unit lgMultiMap)
- bijective map (unit lgBiMap)
- sparse 2D table (unit lgTable2D)
- disjoint set (unit lgHashSet)
- AVL tree (unit lgAvlTree)
- red-black tree (unit lgRbTree)
- some treap variants (unit lgTreap)
- general rooted tree (unit lgRootTree)
- sparse labeled undirected graph (unit lgSimpleGraph)
- sparse labeled directed graph (unit lgSimpleDigraph)
- lite containers based on advanced records
- extended IEnumearble interface - filtering, mapping, etc.
Algorithms on graphs
- core functions:
- vertices/edges addition/removal/query/enumeration, edge contraction, degree
- load/save to own binary format, primitive export to DOT format
- connectivity:
- connected/strongly connected components, bipartite detection, degeneracy, k-core
- articulation points, bridges, biconnected components
- edge-connectivity
- traversals:
- BFS/DFS traversals with visitors,
- cycle/negative cycle detection,
- topological sort
- operations:
- induced subgraphs, complement, reverse, union, intersect, symmetric difference,
- chordality testing
- planarity testing: FMR Left-Right Planarity algorithm
- distance within graph:
- eccentricity, radius, diameter, center, periphery
- matching:
- maximum cardinality matching on bipartite/arbitrary graphs
- minimum/maximum weight matching on bipartite graphs
- dominators in flowgraps: simple iterative and Semi-NCA algorithms
- some suggestions for NP-hard problems:
- maximum independent set, maximal independent sets enumeration
- maximum clique, cliques enumeration
- minimum vertex cover, minimal vertex covers enumeration
- vertex coloring, approximations and exact
- minimum dominating set
- Hamiltonian cycles and paths
- local search TSP approximations, BnB TSP solver
- minimum spanning trees: Prims's and Kruskal's algorithms
- single source shortest paths:
- Dijkstra with pairing heap, Bellman-Ford-Moor with Tarjan's subtree disassembly(BFMT)
- single pair shortest paths:
- Dijkstra with binary heap, BFMT, bidirection Dijkstra, A*, NBA*
- all pairs shortest paths:
- Floyd–Warshall, Johnson, BFMT
- networks:
- maximum flow: push/relabel, capacity scaling Dinitz
- minimum-cost flow: Busacker-Gowen, cost scaling push/relabel algorithm
- global minimum cut: Stoer–Wagner, Nagamochi-Ibaraki
Algorithms on arrays and vectors
(mostly unit lgArrayHelpers)
- reverse, right/left cyclic shifts
- permutations
- binary search
- N-th order statistics
- inversion counting
- distinct values selection
- quicksort
- introsort
- dual pivot quicksort
- mergesort
- timsort (unit lgMiscUtils)
- counting sort
- radix sort
- translation of Orson Peters' PDQSort algorithm
- static segment tree
- longest increasing subsequence
- ...
Algorithms on strings and sequences
(units lgStrHelpers, lgSeqUtils)
- exact string matching
- Boyer-Moore string matching algorithm(in Fast Search variant), case sensitive and case insensitive (unit lgStrHelpers)
- Boyer-Moore-Horspool-Raita algorithm (unit lgStrHelpers)
- longest common subsequence of two sequences:
- reducing the LCS problem to LIS
- Kumar-Rangan algorithm for LCS
- Myers algorithm for LCS
- the Levenshtein distance:
- simple DP algorithm
- modified Berghel-Roach algorithm
- Myers bit-vector algorithm with cut-off heuristic
- LCS distance:
- Myers algorithm for LCS distance
- fuzzy string matching(k differences)
- Ukkonen EDP algorithm
- fuzzy string matching with preprocessing(something similar to fuzzywuzzy)
JSON Patch
The TJsonPatch class from the lgJson unit implements the JSON Patch (RFC 6902) functionality. JSON Patch is a JSON document format (media type is application/json-patch+json) for expressing a sequence of operations applied to a target JSON document; it is always an array of objects, each representing a single operation, supported set of operations: ADD, REMOVE, REPLACE, MOVE, COPY, and TEST.
For example, for this document:
{
"foo": [1, 3],
"boo": ["bar", "baz"]
}
applying this patch
[
{"op": "add", "path": "/foo/1", "value": "fizz"},
{"op": "replace", "path": "/boo/1", "value": "qux"},
{"op": "add", "path": "/ozz", "value": "zoo"}
]
will have the following result:
{
"foo": [1, "fizz", 3],
"boo": ["bar", "qux"],
"ozz": "zoo"
}
The TJsonPatch class also contains a POC implementation of the Diff utility, which generates a patch by comparing source and target documents.
For example, for this source:
{
"a": 1,
"b": null,
"c": ["test", "plop"]
}
and for this target:
{
"a": 6,
"c": ["test", "flop"],
"d": "baz"
}
it will generate a patch like this:
[
{"op": "replace", "path": "/a", "value": 6},
{"op": "remove", "path": "/b"},
{"op": "replace", "path": "/c/1", "value": "flop"},
{"op": "add", "path": "/d", "value": "baz"}
]
Exporting structures to JSON format
A unit has been added to LGenerics for exporting Pascal data structures to JSON format. The following types are currently supported (called PDOs for short):
- numeric, boolean or string types, some limited support of variants;
- enumerations (exported in string form as the name of the corresponding constant);
- sets (exported as an array of its elements);
- regular records, it is possible to register a list of field names or a custom serialization callback;
- classes, using published properties or by registering a custom callback (TStrings and TCollection as a special case);
- objects (only by registering a custom serialization callback);
- static arrays of PDO (only one-dimensional, multidimensional arrays are written as one-dimensional);
- dynamic arrays of PDO;
- variant arrays (currently only one-dimensional);
For example, this code:
program test;
{$mode objfpc}{$h+}{$optimization noorderfields}
uses
SysUtils, lgUtils, lgJson, lgPdo;
type
TDayOfWeek = 1..7;
TDaySet = set of TDayOfWeek;
TMember = record
Name: string;
Age: Integer;
VisitDays: TDaySet;
end;
const
Members: array of TMember = (
(Name: 'Alice'; Age: 21; VisitDays: [1, 3, 5]),
(Name: 'Bob'; Age: 25; VisitDays: [2, 5, 6]));
var
n: specialize TGAutoRef<TJsonNode>;
s: string;
begin
RegisterRecordFields(TypeInfo(TMember), ['name', 'age', 'visitDays']);
s := PdoToJson(TypeInfo(Members), Members);
if not n.Instance.Parse(s) then
WriteLn('Oops, something went wrong :(')
else
WriteLn(n.Instance.FormatJson([jfoEgyptBrace, jfoSingleLineArray]));
end.
produces this JSON:
[
{
"name": "Alice",
"age": 21,
"visitDays": [1, 3, 5]
},
{
"name": "Bob",
"age": 25,
"visitDays": [2, 5, 6]
}
]
Other features
- non-cryptogarphic hashes (unit lgHash):
- Yann Collet's xxHash32, xxHash64
- Austin Appleby's MurmurHash2, MurmurHash2A, MurmurHash3_x86_32, MurmurHash64A
- brief and dirty implementation of futures concept (unit lgAsync)
- brief channel implementation (unit lgAsync)
- brief implementation of thread pool (unit lgAsync)
- 128-bit integers (unit lgInt128)
- JSON validator/parser/generator (unit lgJson)
- JSON Patch/Diff (unit lgJson)
- CSV document processing (unit lgCsvUtils)
- Eisel-Lemire fast string-to-double conversion algorithm (unit lgJson)
- Ryū double-to-string conversion algorithm (unit lgJson)
Perfomance
Containers
Hash maps
There is a wonderful benchmark from BeniBela, which also covers hashmaps from LGenerics.
Algorithms on arrays and vectors
Sorting
The benchmark sorts 1000000 integers and calculates the number of CPU cycles spent on one element and involves:
- QuickSort_ItemList_Context from unit SortBase (Rtl/SortBase)
- TOrderingArrayUtils.Sort from Fcl-Stl.GArrayUtils (Fcl-Stl)
- TArrayHelper.Sort from Rtl-Generics.Generics.Collections (Rtl-Generics)
- std::sort from libstdc++ (std::sort)
- std::stable_sort from libstdc++ (std::stable_sort)
- TGOrdinalArrayHelper.QuickSort (LG/QuickSort)
- TGOrdinalArrayHelper.IntroSort (LG/IntroSort)
- TGOrdinalArrayHelper.DualPivotQuickSort (LG/DualPivotQuickSort)
- TGOrdinalArrayHelper.PDQSort (LG/PDQSort)
- TGComparableArrayHelper.MergeSort (LG/MergeSort)
- TGComparableTimSort.Sort (LG/TimSort)
- TGOrdinalArrayHelper.Sort (LG/Sort)
- A negative value indicates that the algorithm exhibits quadratic behavior.
Algorithms on graphs
For performance comparison the AGraph library (taken from here) was chosen as a reference implementation. All tests were compiled for x86 and run on win64 machine (unfortunately the 64-bit version of AGraph crashes on these tests).
So, AGraph vs LGenerics:
problem | graph | AGraph time(ms) | LGenerics time(ms) | notes |
---|---|---|---|---|
BFS traversal | undirected, V=500000, E=4000000 | 640 | 109 | |
BFS traversal | directed, V=500000, E=4000000 | 484 | 94 | |
Connected components | undirected, V=500000, E=4000000 | 764 | 0 | If no edges were removed, TGSimpleGraph always knows the number and ownership of connected components |
Strongly connected components | directed, V=500000, E=4000000 | 1060 | 265 | |
Single-source shortest path | undirected, V=500000, E=4000000 | 3011 | 842 | |
Single-source shortest path | directed, V=500000, E=4000000 | 2153 | 718 | |
Single-pair shortest path | undirected, V=500000, E=4000000 | 2777 | 31 | |
Single-pair shortest path | directed, V=500000, E=4000000 | 1825 | 47 | |
Minimum spanning tree | undirected, V=500000, E=4000000 | 5974 | 640 | |
Maximum flow | dense, V=1000, E=499500 | 29499 | 47 | |
Maximum flow | sparse, V=64000, E=512000 | 3339 | 63 | |
Planarity test | undirected, V=219086, E=657252 | 1419 | 172 | |
Maximum clique | DIMACS brock200_2.clq, V=200, E=9876 | 84210 | 16 | |
Maximum clique | DIMACS brock200_4.clq, V=200, E=13089 | Cancelled(timeout 1 h) | 312 |
Algorithms on strings and sequences
It was curious to compare the performance of the SimRatioLevEx() function (which is inspired by FuzzyWuzzy) with some incarnations of the FuzzyWuzzy (listed here) on benchmark datasets. Disclamer: SimRatioLevEx() does not reproduce FuzzyWuzzy, but it does some things in a similar way, in particular, SimRatioLevEx() in smTokenSetEx mode and token_set_ratio() do roughly the same job.
It seems the C++ version only supports single-byte strings, so only compared to the single-byte version of SimRatioLevEx():
Dataset SimRatioLevEx() token_set_ratio() Short/big_dist 1154 6440 Short/small_dist 967 3020 Medium/big_dist 811 3450 Medium/small_dist 702 1470 Long/big_dist 1966 15000 Long/small_dist 1061 2250
The numbers indicate the run time in milliseconds; the C++ version was compiled with gcc-8.1.0 with options -O3 -m64; the Pascal version was compiled with FPC-3.3.1-9941-g8e6e9bbf33, -O3. The benchmark was run on a Windows x64 machine.
The Go version, on the contrary, always works with Unicode strings:
Dataset SimRatioLevExUtf8() TokenSetRatio() Short/big_dist 2143 18705 Short/small_dist 1593 2224 Medium/big_dist 1266 15062 Medium/small_dist 853 1742 Long/big_dist 3853 79851 Long/small_dist 1269 3126
Go version: go1.10.4 linux/amd64; FPC-3.3.1-10683-g2a19e152b7 -O3. The benchmark was run on a virtual Linux machine.
JSON documents
A small performance comparison of the lgJson. TJsonNode class with some other implementations, the benchmark was compiled for x86_64 and run on a win64 machine.
description | FpJson | JsonTools | lgJson | notes |
---|---|---|---|---|
Average parsing speed on a set of JSON samples from the example/json/parse_bench folder, MB/s | 34,45 | 46,11 | 68,12 | |
Average validating speed on a set of JSON samples from the example/json/parse_bench folder, MB/s | 34,4 | 46,02 | 349,48 | FpJson does not provide any function to validate JSON, so a parser was used |
Recursive traversal (using the built-in enumerator) of the document of moderate size (34 KB) with all values read (50000 repeats), milliseconds | 8315 | 6802 | 639 | |
Search in a moderately sized document using path (750000 repeats), milliseconds | 8549 | 5398 | 873 | |
Dynamic creation of a small document and reading it as formatted JSON(150000 repeats), milliseconds | 7597 | 4150 | 1903 | |
Saving a moderate size document to a stream (10000 repeats), milliseconds | 4820 | 4587 | 827 | |
RFC 8259 compliance test, 295 test files from test/json_testset/testset | 5 failed | 23 failed | all passed |
Export to JSON
Datasets used: a collection of simple items (with three string properties and one integer property) and an array of records whose fields contain values that coincide with the values of the collection's item properties (i.e. the resulting JSONs should be the same). The number of elements in the datasets was chosen so that the size of the resulting JSON approximately corresponded to the values (20KB, 100KB, 1MB, 10MB, 50MB).
TJSONStreamer.CollectionToJSON() was taken as a reference implementation (FpJsonStreamer for short), other members:
- PdoToJson() with a collection as parameter (PdoCollection);
- PdoToJson() with array as parameter and field name list registration (PdoRecordFieldMap);
- PdoToJson() with array as parameter and custom callback registration (PdoRecordCallback);
Each member exports the data several times (number of attempts is pre-defined), the best time is taken as the result. Full benchmark code (Windows only, uses QueryPerformanceCounter):
program test;
{$MODE OBJFPC}{$H+}{$MODESWITCH ADVANCEDRECORDS}
{$OPTIMIZATION NOORDERFIELDS}
uses
Windows, Classes, SysUtils, lgPdo, FpJsonRtti, lgJson;
const
AlphaLen = 64;
Alphabet: array[0..AlphaLen-1] of Char = '1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_ ';
function RandomString(aLength: SizeInt): string;
var
I: SizeInt;
p: PChar;
begin
Result := '';
SetLength(Result, aLength);
p := PChar(Result);
for I := 0 to Pred(aLength) do
p[I] := Alphabet[Random(AlphaLen)];
end;
type
TMyObj = class(TCollectionItem)
private
FId: Int64;
FName,
FValue,
FInfo: string;
published
property id: Int64 read FId write FId;
property name: string read FName write FName;
property value: string read FValue write FValue;
property info: string read FInfo write FInfo;
end;
TMyRec = record
Id: Int64;
Name,
Value,
Info: string;
class procedure JsonWrite(r: Pointer; aWriter: TJsonStrWriter); static;
end;
class procedure TMyRec.JsonWrite(r: Pointer; aWriter: TJsonStrWriter);
type
PMyRec = ^TMyRec;
const
fnId = 'id';
fnName = 'name';
fnValue = 'value';
fnInfo = 'info';
begin
with PMyRec(r)^ do
aWriter
.BeginObject
.Add(fnId, Id)
.Add(fnName, Name)
.Add(fnValue, Value)
.Add(fnInfo, Info)
.EndObject;
end;
const
MinLen = 5;
Range = 16;
var
TestColl: TCollection = nil;
RecList: array of TMyRec;
procedure UpdateData(aElCount: Integer);
var
I, OldSize: Integer;
const
IdRange = 9007199254740991;
begin
if TestColl = nil then
TestColl := TCollection.Create(TMyObj);
OldSize := TestColl.Count;
SetLength(RecList, aElCount);
for I := OldSize to Pred(aElCount) do
with TMyObj(TestColl.Add) do begin
Id := Random(IdRange);
Name := RandomString(MinLen + Random(Range));
Value := RandomString(MinLen + Random(Range));
Info := RandomString(MinLen + Random(Range));
RecList[I].Id := Id;
RecList[I].Name := Name;
RecList[I].Value := Value;
RecList[I].Info := Info;
end;
end;
function JsonStreamer: string;
begin
with TJSONStreamer.Create(nil) do
try
Result := CollectionToJSON(TestColl);
finally
Free;
end;
end;
function PdoCollection: string;
begin
Result := PdoToJson(TypeInfo(TestColl), TestColl);
end;
function PdoMap: string;
begin
Result := PdoToJson(TypeInfo(RecList), RecList);
end;
function PdoProc: string;
begin
Result := PdoToJson(TypeInfo(RecList), RecList);
end;
procedure RegisterFields;
begin
RegisterRecordFields(TypeInfo(TMyRec), ['id','name','value','info']);
end;
procedure RegisterCallback;
begin
RegisterRecordJsonProc(TypeInfo(TMyRec), @TMyRec.JsonWrite);
end;
procedure UnRegister;
begin
UnRegisterPdo(TypeInfo(TMyRec));
end;
type
TJsonFun = function: string;
TMember = record
Fun: TJsonFun;
Name: string;
BeforeRun,
AfterRun: TProcedure;
end;
TRound = record
ElCount,
TryCount: Integer;
JsonSize: string;
end;
const
MemberList: array of TMember = (
(Fun: @JsonStreamer; Name: 'FpJsonStreamer......'; BeforeRun: nil; AfterRun: nil),
(Fun: @PdoCollection; Name: 'PdoCollection.......'; BeforeRun: nil; AfterRun: nil),
(Fun: @PdoMap; Name: 'PdoRecordFieldMap...'; BeforeRun: @RegisterFields; AfterRun: @UnRegister),
(Fun: @PdoProc; Name: 'PdoRecordCallback...'; BeforeRun: @RegisterCallback; AfterRun: @UnRegister));
Rounds: array of TRound = (
(ElCount: 220; TryCount: 100; JsonSize: '20 KB'),
(ElCount: 1091; TryCount: 50; JsonSize: '100 KB'),
(ElCount: 10830; TryCount: 10; JsonSize: '1 MB'),
(ElCount: 108400; TryCount: 5; JsonSize: '10 MB'),
(ElCount: 542000; TryCount: 1; JsonSize: '50 MB')
);
procedure Run;
var
CurrRound: TRound;
Member: TMember;
I: Integer;
Start, Fin, Freq, Best: Int64;
Elapsed, Rate: Double;
s: string = '';
const
Million = 1000000;
begin
if not QueryPerformanceFrequency(Freq) or (Freq = 0) then
begin
WriteLn('Oops, something went wrong with QueryPerformanceFrequency()');
WriteLn('Error: ', GetLastOsError);
exit;
end;
for CurrRound in Rounds do begin
UpdateData(CurrRound.ElCount);
WriteLn('-------<JSON size ', CurrRound.JsonSize, '>-------');
for Member in MemberList do begin
if Member.BeforeRun <> nil then
Member.BeforeRun();
Best := High(Int64);
for I := 1 to CurrRound.TryCount do begin
QueryPerformanceCounter(Start);
s := Member.Fun();
QueryPerformanceCounter(Fin);
if Fin - Start < Best then
Best := Fin - Start;
end;
if Member.AfterRun <> nil then
Member.AfterRun();
Elapsed := Double(Best)/Freq;
WriteLn(' ', Member.Name, Double2Str(Double(Round(Elapsed*Million))/1000), ' ms.');
Rate := (Double(Length(s))/Million)/Elapsed;
WriteLn(' Rating: ', Double2Str(Double(Round(Rate*100))/100), ' MB/s.');
end;
end;
end;
begin
Run;
TestColl.Free;
end.
Results on author's machine:
-------<JSON size 20 KB>------- FpJsonStreamer......1.139 ms. Rating: 20.41 MB/s. PdoCollection.......0.135 ms. Rating: 149.47 MB/s. PdoRecordFieldMap...0.095 ms. Rating: 212.01 MB/s. PdoRecordCallback...0.06 ms. Rating: 336.12 MB/s. -------<JSON size 100 KB>------- FpJsonStreamer......6.171 ms. Rating: 18.76 MB/s. PdoCollection.......0.675 ms. Rating: 148.94 MB/s. PdoRecordFieldMap...0.475 ms. Rating: 211.85 MB/s. PdoRecordCallback...0.304 ms. Rating: 331.03 MB/s. -------<JSON size 1 MB>------- FpJsonStreamer......88.315 ms. Rating: 13.04 MB/s. PdoCollection.......7.062 ms. Rating: 141.6 MB/s. PdoRecordFieldMap...5.018 ms. Rating: 199.27 MB/s. PdoRecordCallback...3.317 ms. Rating: 301.47 MB/s. -------<JSON size 10 MB>------- FpJsonStreamer......3736.737 ms. Rating: 3.08 MB/s. PdoCollection.......76.899 ms. Rating: 130.16 MB/s. PdoRecordFieldMap...56.989 ms. Rating: 175.63 MB/s. PdoRecordCallback...40.309 ms. Rating: 248.3 MB/s. -------<JSON size 50 MB>------- FpJsonStreamer......86500.116 ms. Rating: 0.67 MB/s. PdoCollection.......379.533 ms. Rating: 131.94 MB/s. PdoRecordFieldMap...278.097 ms. Rating: 180.07 MB/s. PdoRecordCallback...192.341 ms. Rating: 260.35 MB/s.
It seems to work pretty fast.