@@ -81,6 +81,13 @@ def get_index_fields(self) -> list[SearchableField]:
8181 type = SearchFieldDataType .String ,
8282 collection = True ,
8383 ),
84+ SimpleField (
85+ name = "PageNumber" ,
86+ type = SearchFieldDataType .Int64 ,
87+ sortable = True ,
88+ filterable = True ,
89+ facetable = True ,
90+ ),
8491 SearchField (
8592 name = "ChunkEmbedding" ,
8693 type = SearchFieldDataType .Collection (SearchFieldDataType .Single ),
@@ -137,19 +144,6 @@ def get_index_fields(self) -> list[SearchableField]:
137144 ),
138145 ]
139146
140- if self .enable_page_by_chunking :
141- fields .extend (
142- [
143- SimpleField (
144- name = "PageNumber" ,
145- type = SearchFieldDataType .Int64 ,
146- sortable = True ,
147- filterable = True ,
148- facetable = True ,
149- )
150- ]
151- )
152-
153147 return fields
154148
155149 def get_semantic_search (self ) -> SemanticSearch :
@@ -194,11 +188,12 @@ def get_skills(self) -> list:
194188 if self .enable_page_by_chunking :
195189 embedding_skill = self .get_vector_skill (
196190 "/document/page_wise_layout/*" ,
197- "/document/page_wise_layout/*/chunk_cleaned " ,
191+ "/document/page_wise_layout/*/final_cleaned_text " ,
198192 )
199193 else :
200194 embedding_skill = self .get_vector_skill (
201- "/document/chunk_mark_ups/*" , "/document/chunk_mark_ups/*/chunk_cleaned"
195+ "/document/chunk_mark_ups/*" ,
196+ "/document/chunk_mark_ups/*/final_cleaned_text" ,
202197 )
203198
204199 if self .enable_page_by_chunking :
@@ -229,7 +224,7 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
229224 source_context = "/document/page_wise_layout/*"
230225 mappings = [
231226 InputFieldMappingEntry (
232- name = "Chunk" , source = "/document/page_wise_layout/*/chunk_mark_up "
227+ name = "Chunk" , source = "/document/page_wise_layout/*/final_mark_up "
233228 ),
234229 InputFieldMappingEntry (
235230 name = "ChunkEmbedding" ,
@@ -239,24 +234,25 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
239234 InputFieldMappingEntry (name = "SourceUri" , source = "/document/SourceUri" ),
240235 InputFieldMappingEntry (
241236 name = "Sections" ,
242- source = "/document/page_wise_layout/*/chunk_sections " ,
237+ source = "/document/page_wise_layout/*/final_sections " ,
243238 ),
244239 InputFieldMappingEntry (
245240 name = "ChunkFigures" ,
246- source = "/document/page_wise_layout/*/chunk_figures /*" ,
241+ source = "/document/page_wise_layout/*/final_chunk_figures /*" ,
247242 ),
248243 InputFieldMappingEntry (
249244 name = "DateLastModified" , source = "/document/DateLastModified"
250245 ),
251246 InputFieldMappingEntry (
252- name = "PageNumber" , source = "/document/page_wise_layout/*/page_number"
247+ name = "PageNumber" ,
248+ source = "/document/page_wise_layout/*/final_page_number" ,
253249 ),
254250 ]
255251 else :
256252 source_context = "/document/chunk_mark_ups/*"
257253 mappings = [
258254 InputFieldMappingEntry (
259- name = "Chunk" , source = "/document/chunk_mark_ups/*/chunk_mark_up "
255+ name = "Chunk" , source = "/document/chunk_mark_ups/*/final_mark_up "
260256 ),
261257 InputFieldMappingEntry (
262258 name = "ChunkEmbedding" ,
@@ -265,15 +261,19 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
265261 InputFieldMappingEntry (name = "Title" , source = "/document/Title" ),
266262 InputFieldMappingEntry (name = "SourceUri" , source = "/document/SourceUri" ),
267263 InputFieldMappingEntry (
268- name = "Sections" , source = "/document/chunk_mark_ups/*/chunk_sections "
264+ name = "Sections" , source = "/document/chunk_mark_ups/*/final_sections "
269265 ),
270266 InputFieldMappingEntry (
271267 name = "ChunkFigures" ,
272- source = "/document/chunk_mark_ups/*/chunk_figures /*" ,
268+ source = "/document/chunk_mark_ups/*/final_chunk_figures /*" ,
273269 ),
274270 InputFieldMappingEntry (
275271 name = "DateLastModified" , source = "/document/DateLastModified"
276272 ),
273+ InputFieldMappingEntry (
274+ name = "PageNumber" ,
275+ source = "/document/chunk_mark_ups/*/final_page_number" ,
276+ ),
277277 ]
278278
279279 index_projections = SearchIndexerIndexProjection (
0 commit comments