44
55#include < ydb/core/formats/arrow/accessor/composite_serial/accessor.h>
66#include < ydb/core/formats/arrow/accessor/plain/constructor.h>
7+ #include < ydb/core/formats/arrow/accessor/sub_columns/json_value_path.h>
78#include < ydb/core/formats/arrow/save_load/loader.h>
89#include < ydb/core/formats/arrow/size_calcer.h>
910#include < ydb/core/formats/arrow/splitter/simple.h>
1011
1112#include < ydb/library/formats/arrow/protos/accessor.pb.h>
1213#include < ydb/library/formats/arrow/simple_arrays_cache.h>
1314
15+ #include < yql/essentials/minikql/jsonpath/parser/parser.h>
1416#include < yql/essentials/types/binary_json/format.h>
1517#include < yql/essentials/types/binary_json/write.h>
1618
@@ -113,103 +115,16 @@ TString TSubColumnsArray::SerializeToString(const TChunkConstructionData& extern
113115 return result;
114116}
115117
116- class TJsonRestorer {
117- private:
118- NJson::TJsonValue Result;
119-
120- public:
121- bool IsNull () const {
122- return !Result.IsDefined ();
123- }
124-
125- TConclusion<NBinaryJson::TBinaryJson> Finish () {
126- auto bJson = NBinaryJson::SerializeToBinaryJson (Result.GetStringRobust ());
127- if (const TString* val = std::get_if<TString>(&bJson)) {
128- return TConclusionStatus::Fail (*val);
129- } else if (const NBinaryJson::TBinaryJson* val = std::get_if<NBinaryJson::TBinaryJson>(&bJson)) {
130- return std::move (*val);
131- } else {
132- return TConclusionStatus::Fail (" undefined case for binary json construction" );
133- }
134- }
135-
136- void SetValueByPath (const TString& path, const NJson::TJsonValue& jsonValue) {
137- ui32 start = 0 ;
138- bool enqueue = false ;
139- bool wasEnqueue = false ;
140- NJson::TJsonValue* current = &Result;
141- for (ui32 i = 0 ; i < path.size (); ++i) {
142- if (path[i] == ' \\ ' ) {
143- ++i;
144- continue ;
145- }
146- if (path[i] == ' \' ' || path[i] == ' \" ' ) {
147- wasEnqueue = true ;
148- enqueue = !enqueue;
149- continue ;
150- }
151- if (enqueue) {
152- continue ;
153- }
154- if (path[i] == ' .' ) {
155- if (wasEnqueue) {
156- AFL_VERIFY (i > start + 2 );
157- TStringBuf key (path.data () + start + 1 , (i - 1 ) - start - 1 );
158- NJson::TJsonValue* currentNext = nullptr ;
159- if (current->GetValuePointer (key, ¤tNext)) {
160- current = currentNext;
161- } else {
162- current = ¤t->InsertValue (key, NJson::JSON_MAP);
163- }
164- } else {
165- AFL_VERIFY (i > start);
166- TStringBuf key (path.data () + start, i - start);
167- NJson::TJsonValue* currentNext = nullptr ;
168- if (current->GetValuePointer (key, ¤tNext)) {
169- current = currentNext;
170- } else {
171- ui32 keyIndex;
172- if (key.StartsWith (" [" ) && key.EndsWith (" ]" ) && TryFromString<ui32>(key.data () + 1 , key.size () - 2 , keyIndex)) {
173- AFL_VERIFY (!current->IsDefined () || current->IsArray () || (current->IsMap () && current->GetMapSafe ().empty ()));
174- current->SetType (NJson::JSON_ARRAY);
175- if (current->GetArraySafe ().size () <= keyIndex) {
176- current->GetArraySafe ().resize (keyIndex + 1 );
177- }
178- current = ¤t->GetArraySafe ()[keyIndex];
179- } else {
180- AFL_VERIFY (!current->IsArray ())(" current_type" , current->GetType ())(" current" , current->GetStringRobust ());
181- current = ¤t->InsertValue (key, NJson::JSON_MAP);
182- }
183- }
184- }
185- wasEnqueue = false ;
186- start = i + 1 ;
187- }
188- }
189- if (wasEnqueue) {
190- AFL_VERIFY (path.size () > start + 2 )(" path" , path)(" start" , start);
191- TStringBuf key (path.data () + start + 1 , (path.size () - 1 ) - start - 1 );
192- current->InsertValue (key, jsonValue);
193- } else {
194- AFL_VERIFY (path.size () >= start)(" path" , path)(" start" , start);
195- TStringBuf key (path.data () + start, (path.size ()) - start);
196- ui32 keyIndex;
197- if (key.StartsWith (" [" ) && key.EndsWith (" ]" ) && TryFromString<ui32>(key.data () + 1 , key.size () - 2 , keyIndex)) {
198- AFL_VERIFY (!current->IsDefined () || current->IsArray () || (current->IsMap () && current->GetMapSafe ().empty ()));
199- current->SetType (NJson::JSON_ARRAY);
200-
201- if (current->GetArraySafe ().size () <= keyIndex) {
202- current->GetArraySafe ().resize (keyIndex + 1 );
203- }
204- current->GetArraySafe ()[keyIndex] = jsonValue;
205- } else {
206- AFL_VERIFY (!current->IsArray ())(" key" , key)(" current" , current->GetStringRobust ())(" full" , Result.GetStringRobust ())(
207- " current_type" , current->GetType ());
208- current->InsertValue (key, jsonValue);
209- }
210- }
118+ TConclusion<NBinaryJson::TBinaryJson> ToBinaryJson (const TJsonRestorer& restorer) {
119+ auto bJson = NBinaryJson::SerializeToBinaryJson (restorer.GetResult ().GetStringRobust ());
120+ if (const TString* val = std::get_if<TString>(&bJson)) {
121+ return TConclusionStatus::Fail (*val);
122+ } else if (const NBinaryJson::TBinaryJson* val = std::get_if<NBinaryJson::TBinaryJson>(&bJson)) {
123+ return std::move (*val);
124+ } else {
125+ return TConclusionStatus::Fail (" undefined case for binary json construction" );
211126 }
212- };
127+ }
213128
214129std::shared_ptr<arrow::Array> TSubColumnsArray::BuildBJsonArray (const TColumnConstructionContext& context) const {
215130 auto it = BuildUnorderedIterator ();
@@ -233,7 +148,7 @@ std::shared_ptr<arrow::Array> TSubColumnsArray::BuildBJsonArray(const TColumnCon
233148 if (value.IsNull ()) {
234149 TStatusValidator::Validate (builder->AppendNull ());
235150 } else {
236- const TConclusion<NBinaryJson::TBinaryJson> bJson = value. Finish ( );
151+ const TConclusion<NBinaryJson::TBinaryJson> bJson = ToBinaryJson (value );
237152 NArrow::Append<arrow::BinaryType>(*builder, arrow::util::string_view (bJson->data (), bJson->size ()));
238153 }
239154 };
@@ -268,4 +183,32 @@ IChunkedArray::TLocalDataAddress TSubColumnsArray::DoGetLocalData(
268183 return TLocalDataAddress (BuildBJsonArray (TColumnConstructionContext ()), 0 , 0 );
269184}
270185
186+ bool TJsonRestorer::IsNull () const {
187+ return !Result.IsDefined ();
188+ }
189+
190+ const NJson::TJsonValue& TJsonRestorer::GetResult () const {
191+ return Result;
192+ }
193+
194+ void TJsonRestorer::SetValueByPath (const TString& path, const NJson::TJsonValue& jsonValue) {
195+ // Path may be empty (for backward compatibility), so make it $."" in this case
196+ auto splitResult = NSubColumns::SplitJsonPath (NSubColumns::ToJsonPath (path.empty () ? " \"\" " : path), NSubColumns::TJsonPathSplitSettings{.FillTypes = true });
197+ AFL_VERIFY (splitResult.IsSuccess ())(" error" , splitResult.GetErrorMessage ())(" path" , path);
198+ const auto [pathItems, pathTypes, _] = splitResult.DetachResult ();
199+ AFL_VERIFY (pathItems.size () > 0 );
200+ AFL_VERIFY (pathItems.size () == pathTypes.size ());
201+ NJson::TJsonValue* current = &Result;
202+ for (decltype (pathItems)::size_type i = 0 ; i < pathItems.size () - 1 ; ++i) {
203+ AFL_VERIFY (pathTypes[i] == NYql::NJsonPath::EJsonPathItemType::MemberAccess);
204+ NJson::TJsonValue* currentNext = nullptr ;
205+ if (current->GetValuePointer (pathItems[i], ¤tNext)) {
206+ current = currentNext;
207+ } else {
208+ current = ¤t->InsertValue (pathItems[i], NJson::JSON_MAP);
209+ }
210+ }
211+ current->InsertValue (pathItems[pathItems.size () - 1 ], jsonValue);
212+ }
213+
271214} // namespace NKikimr::NArrow::NAccessor
0 commit comments