From 339a568c2abb02754ccb375f7b623bf6eb50c7ca Mon Sep 17 00:00:00 2001
From: vaibhav45sktech <vm872108@gmail.com>
Date: Sat, 24 Jan 2026 17:57:43 +0000
Subject: [PATCH 1/3] updated chnages

---
 .../extraction/mappings/GenderExtractor.scala | 175 +++++++++++-------
 1 file changed, 103 insertions(+), 72 deletions(-)

diff --git a/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala b/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala
index 32b1221e59..0d58060332 100644
--- a/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala
+++ b/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala
@@ -1,88 +1,119 @@
 package org.dbpedia.extraction.mappings
 
-import org.dbpedia.extraction.config.provenance.DBpediaDatasets
-import org.dbpedia.extraction.transform.Quad
-import org.dbpedia.extraction.wikiparser._
 import org.dbpedia.extraction.config.mappings.GenderExtractorConfig
+import org.dbpedia.extraction.config.provenance.DBpediaDatasets
 import org.dbpedia.extraction.ontology.Ontology
+import org.dbpedia.extraction.ontology.datatypes.Datatype
+import org.dbpedia.extraction.transform.Quad
 import org.dbpedia.extraction.util.Language
+import org.dbpedia.extraction.wikiparser._
 import util.matching.Regex
-import org.dbpedia.extraction.ontology.datatypes.Datatype
 import scala.language.reflectiveCalls
-
 /**
- * Extracts the grammatical gender of people using a heuristic.
+ * Extracts the grammatical gender of people using a pronoun-based heuristic.
  */
-class GenderExtractor( 
-  context : {
-    def mappings : Mappings
-    def ontology : Ontology
-    def language : Language
-    def redirects : Redirects 
-  } 
-) 
-extends MappingExtractor(context)
-{
-  private val language = context.language.wikiCode
-
-  private val pronounMap: Map[String, String] = GenderExtractorConfig.pronounsMap(language)
-
-  // FIXME: don't use string constant, use context.ontology (or at least RdfNamespace.FOAF)
-  private val genderProperty = "http://xmlns.com/foaf/0.1/gender"
-  // FIXME: don't use string constant, use context.ontology (or at least RdfNamespace.RDF)
-  private val typeProperty = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
-  // FIXME: don't use string constant, use context.ontology (or at least DBpediaNamespace.ONTOLOGY)
-  private val personUri = "http://dbpedia.org/ontology/Person"
+class GenderExtractor(
+  context: {
+    def mappings: Mappings
+    def ontology: Ontology
+    def language: Language
+    def redirects: Redirects
+  }
+) extends MappingExtractor(context) {
+  
+  /** Language code (en, de, fr, etc.) */
+  private val language: String =
+    context.language.wikiCode
+  /** Pronoun → gender map (from config) */
+  private val pronounMap: Map[String, String] =
+    GenderExtractorConfig.pronounsMap(language)
+
+  /** Ontology-based properties & classes */
+  private val genderProperty =
+    context.ontology.properties("foaf:gender")
+
+  private val typeProperty =
+    context.ontology.properties("rdf:type")
+
+  private val personClass =
+    context.ontology.classes("Person")
+
+  private val langStringDatatype =
+    new Datatype("rdf:langString")
 
   override val datasets = Set(DBpediaDatasets.Genders)
 
-  override def extract(node : PageNode, subjectUri : String) : Seq[Quad] =
-  {
-    // apply mappings
-    // FIXME: To find out if it's a person, we extract all mapped properties a second time and throw them away.
-    // Find a better solution. For example: Make sure that this extractor runs after the 
-    // MappingExtractor. In the MappingExtractor, set the page type as an attriute.
-    // Even better: in the first extraction pass, extract all types. Use them in the second pass.
-    val mappingGraph = super.extract(node, subjectUri)
-
-    // if this page is mapped onto Person
-    if (mappingGraph.exists(q => q.predicate == typeProperty && q.value == personUri))
-    {
-      // get the page text
-      val wikiText: String = node.toWikiText
-
-      // count gender pronouns
-      var genderCounts: Map[String, Int] = Map()
-      for ((pronoun, gender) <- pronounMap)
-      {
-        val regex = new Regex("\\W" + pronoun + "\\W")
-        val count = regex.findAllIn(wikiText).size
-        val oldCount = genderCounts.getOrElse(gender, 0)
-        genderCounts = genderCounts.updated(gender, oldCount + count)
-      }
-
-      // get maximum gender
-      var maxGender = ""
-      var maxCount = 0
-      var secondCount = 0.0
-      for ((gender, count) <- genderCounts)
-      {
-        if (count > maxCount)
-        {
-          secondCount = maxCount.toDouble
-          maxCount = count
-          maxGender = gender
-        }
-      }
-
-      // output triple for maximum gender
-      if (maxGender != "" && maxCount > GenderExtractorConfig.minCount && maxCount/secondCount > GenderExtractorConfig.minDifference)
-      {
-        return Seq(new Quad(context.language, DBpediaDatasets.Genders, subjectUri, genderProperty, maxGender, node.sourceIri, new Datatype("rdf:langString")))
-      }
+  override def extract(node: PageNode, subjectUri: String): Seq[Quad] = {
+
+    /** First pass: extract mappings to detect rdf:type */
+    val mappingGraph: Seq[Quad] =
+      super.extract(node, subjectUri)
+
+    /** Check if entity is a dbo:Person */
+    val isPerson: Boolean =
+      mappingGraph.exists(q =>
+        q.predicate == typeProperty.uri &&
+        q.value == personClass.uri
+      )
+
+    if (!isPerson) return Seq.empty
+
+    /** Get full wiki text */
+    val wikiText: String =
+      node.toWikiText
+
+    /** Count pronouns by gender */
+    var genderCounts: Map[String, Int] =
+      Map.empty.withDefaultValue(0)
+
+    for ((pronoun, gender) <- pronounMap) {
+      val regex =
+        new Regex("(?i)\\b" + Regex.quote(pronoun) + "\\b")
+
+      val count =
+        regex.findAllIn(wikiText).size
+
+      genderCounts =
+        genderCounts.updated(gender, genderCounts(gender) + count)
     }
 
-    Seq.empty
-  }
+    if (genderCounts.isEmpty) return Seq.empty
+
+    /** Find dominant gender */
+    val sorted =
+      genderCounts.toSeq.sortBy(-_._2)
+
+    val (maxGender, maxCount) =
+      sorted.head
+
+    val secondCount: Double =
+      if (sorted.size > 1) sorted(1)._2.toDouble else 0.0
 
+    /** Avoid division-by-zero */
+    val differenceOk: Boolean =
+      secondCount == 0.0 ||
+        (maxCount.toDouble / secondCount) >
+          GenderExtractorConfig.minDifference
+
+    /** Threshold checks */
+    if (
+      maxGender.nonEmpty &&
+      maxCount > GenderExtractorConfig.minCount &&
+      differenceOk
+    ) {
+      Seq(
+        new Quad(
+          context.language,
+          DBpediaDatasets.Genders,
+          subjectUri,
+          genderProperty,
+          maxGender,
+          node.sourceIri,
+          langStringDatatype
+        )
+      )
+    } else {
+      Seq.empty
+    }
+  }
 }

From 70fbc63222404a0af22d3d529845d3a9cc554c1a Mon Sep 17 00:00:00 2001
From: vaibhav45sktech <vm872108@gmail.com>
Date: Tue, 27 Jan 2026 22:22:02 +0530
Subject: [PATCH 2/3] Fix template text extraction for lang, native name, and
 Nihongo templates

---
 .../src/main/resources/templatetransform.json | 14 ++++++++++++-
 .../TemplateTransformParserTest.scala         | 20 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/templatetransform.json b/core/src/main/resources/templatetransform.json
index fce2421f03..2d075c2515 100644
--- a/core/src/main/resources/templatetransform.json
+++ b/core/src/main/resources/templatetransform.json
@@ -30,7 +30,19 @@
     },
     "Lang":{
       "transformer":"textNode",
-      "replace": "<br />$(3||)<br />"
+      "replace": "$(2||)"
+    },
+    "Native name|native_name":{
+      "transformer":"textNode",
+      "replace": "$(2||)"
+    },
+    "Nihongo2":{
+      "transformer":"textNode",
+      "replace": "$(1||)"
+    },
+    "Nihongo":{
+      "transformer":"textNode",
+      "replace": "$(2||)"
     },
     "Marriage":{
       "transformer":"extractChildren",
diff --git a/core/src/test/scala/org/dbpedia/extraction/wikiparser/TemplateTransformParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/wikiparser/TemplateTransformParserTest.scala
index af95a30f41..dcb923d34d 100644
--- a/core/src/test/scala/org/dbpedia/extraction/wikiparser/TemplateTransformParserTest.scala
+++ b/core/src/test/scala/org/dbpedia/extraction/wikiparser/TemplateTransformParserTest.scala
@@ -53,6 +53,26 @@ class TemplateTransformParserTest extends FlatSpec with Matchers
       parse("en", "{{url|https://www.dji.com DJI.com}}") should be (Some("[https://www.dji.com]"))
     }
 
+  it should "extract text from {{lang|nap|Abbrùzzu}}" in
+    {
+      parse("en", "{{lang|nap|Abbrùzzu}}") should be (Some("Abbrùzzu"))
+    }
+
+  it should "extract text from {{native name|nap|Abbrùzze}}" in
+    {
+      parse("en", "{{native name|nap|Abbrùzze}}") should be (Some("Abbrùzze"))
+    }
+
+  it should "extract text from {{Nihongo2|東京都}}" in
+    {
+      parse("en", "{{Nihongo2|東京都}}") should be (Some("東京都"))
+    }
+
+  it should "extract text from {{Nihongo|Tokyo|東京|Tōkyō}}" in
+    {
+      parse("en", "{{Nihongo|Tokyo|東京|Tōkyō}}") should be (Some("東京"))
+    }
+
 
   private val wikiParser = WikiParser.getInstance()
 

From 10f785844b6b01f155b8f745fef147a42a963e4a Mon Sep 17 00:00:00 2001
From: vaibhav45sktech <vm872108@gmail.com>
Date: Tue, 27 Jan 2026 22:45:08 +0530
Subject: [PATCH 3/3] Revert GenderExtractor.scala to upstream version

---
 .../extraction/mappings/GenderExtractor.scala | 175 +++++++-----------
 1 file changed, 72 insertions(+), 103 deletions(-)

diff --git a/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala b/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala
index 0d58060332..32b1221e59 100644
--- a/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala
+++ b/core/src/main/scala/org/dbpedia/extraction/mappings/GenderExtractor.scala
@@ -1,119 +1,88 @@
 package org.dbpedia.extraction.mappings
 
-import org.dbpedia.extraction.config.mappings.GenderExtractorConfig
 import org.dbpedia.extraction.config.provenance.DBpediaDatasets
-import org.dbpedia.extraction.ontology.Ontology
-import org.dbpedia.extraction.ontology.datatypes.Datatype
 import org.dbpedia.extraction.transform.Quad
-import org.dbpedia.extraction.util.Language
 import org.dbpedia.extraction.wikiparser._
+import org.dbpedia.extraction.config.mappings.GenderExtractorConfig
+import org.dbpedia.extraction.ontology.Ontology
+import org.dbpedia.extraction.util.Language
 import util.matching.Regex
+import org.dbpedia.extraction.ontology.datatypes.Datatype
 import scala.language.reflectiveCalls
+
 /**
- * Extracts the grammatical gender of people using a pronoun-based heuristic.
+ * Extracts the grammatical gender of people using a heuristic.
  */
-class GenderExtractor(
-  context: {
-    def mappings: Mappings
-    def ontology: Ontology
-    def language: Language
-    def redirects: Redirects
-  }
-) extends MappingExtractor(context) {
-  
-  /** Language code (en, de, fr, etc.) */
-  private val language: String =
-    context.language.wikiCode
-  /** Pronoun → gender map (from config) */
-  private val pronounMap: Map[String, String] =
-    GenderExtractorConfig.pronounsMap(language)
-
-  /** Ontology-based properties & classes */
-  private val genderProperty =
-    context.ontology.properties("foaf:gender")
-
-  private val typeProperty =
-    context.ontology.properties("rdf:type")
-
-  private val personClass =
-    context.ontology.classes("Person")
-
-  private val langStringDatatype =
-    new Datatype("rdf:langString")
+class GenderExtractor( 
+  context : {
+    def mappings : Mappings
+    def ontology : Ontology
+    def language : Language
+    def redirects : Redirects 
+  } 
+) 
+extends MappingExtractor(context)
+{
+  private val language = context.language.wikiCode
+
+  private val pronounMap: Map[String, String] = GenderExtractorConfig.pronounsMap(language)
+
+  // FIXME: don't use string constant, use context.ontology (or at least RdfNamespace.FOAF)
+  private val genderProperty = "http://xmlns.com/foaf/0.1/gender"
+  // FIXME: don't use string constant, use context.ontology (or at least RdfNamespace.RDF)
+  private val typeProperty = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+  // FIXME: don't use string constant, use context.ontology (or at least DBpediaNamespace.ONTOLOGY)
+  private val personUri = "http://dbpedia.org/ontology/Person"
 
   override val datasets = Set(DBpediaDatasets.Genders)
 
-  override def extract(node: PageNode, subjectUri: String): Seq[Quad] = {
-
-    /** First pass: extract mappings to detect rdf:type */
-    val mappingGraph: Seq[Quad] =
-      super.extract(node, subjectUri)
-
-    /** Check if entity is a dbo:Person */
-    val isPerson: Boolean =
-      mappingGraph.exists(q =>
-        q.predicate == typeProperty.uri &&
-        q.value == personClass.uri
-      )
-
-    if (!isPerson) return Seq.empty
-
-    /** Get full wiki text */
-    val wikiText: String =
-      node.toWikiText
-
-    /** Count pronouns by gender */
-    var genderCounts: Map[String, Int] =
-      Map.empty.withDefaultValue(0)
-
-    for ((pronoun, gender) <- pronounMap) {
-      val regex =
-        new Regex("(?i)\\b" + Regex.quote(pronoun) + "\\b")
-
-      val count =
-        regex.findAllIn(wikiText).size
-
-      genderCounts =
-        genderCounts.updated(gender, genderCounts(gender) + count)
+  override def extract(node : PageNode, subjectUri : String) : Seq[Quad] =
+  {
+    // apply mappings
+    // FIXME: To find out if it's a person, we extract all mapped properties a second time and throw them away.
+    // Find a better solution. For example: Make sure that this extractor runs after the 
+    // MappingExtractor. In the MappingExtractor, set the page type as an attriute.
+    // Even better: in the first extraction pass, extract all types. Use them in the second pass.
+    val mappingGraph = super.extract(node, subjectUri)
+
+    // if this page is mapped onto Person
+    if (mappingGraph.exists(q => q.predicate == typeProperty && q.value == personUri))
+    {
+      // get the page text
+      val wikiText: String = node.toWikiText
+
+      // count gender pronouns
+      var genderCounts: Map[String, Int] = Map()
+      for ((pronoun, gender) <- pronounMap)
+      {
+        val regex = new Regex("\\W" + pronoun + "\\W")
+        val count = regex.findAllIn(wikiText).size
+        val oldCount = genderCounts.getOrElse(gender, 0)
+        genderCounts = genderCounts.updated(gender, oldCount + count)
+      }
+
+      // get maximum gender
+      var maxGender = ""
+      var maxCount = 0
+      var secondCount = 0.0
+      for ((gender, count) <- genderCounts)
+      {
+        if (count > maxCount)
+        {
+          secondCount = maxCount.toDouble
+          maxCount = count
+          maxGender = gender
+        }
+      }
+
+      // output triple for maximum gender
+      if (maxGender != "" && maxCount > GenderExtractorConfig.minCount && maxCount/secondCount > GenderExtractorConfig.minDifference)
+      {
+        return Seq(new Quad(context.language, DBpediaDatasets.Genders, subjectUri, genderProperty, maxGender, node.sourceIri, new Datatype("rdf:langString")))
+      }
     }
 
-    if (genderCounts.isEmpty) return Seq.empty
-
-    /** Find dominant gender */
-    val sorted =
-      genderCounts.toSeq.sortBy(-_._2)
-
-    val (maxGender, maxCount) =
-      sorted.head
-
-    val secondCount: Double =
-      if (sorted.size > 1) sorted(1)._2.toDouble else 0.0
-
-    /** Avoid division-by-zero */
-    val differenceOk: Boolean =
-      secondCount == 0.0 ||
-        (maxCount.toDouble / secondCount) >
-          GenderExtractorConfig.minDifference
-
-    /** Threshold checks */
-    if (
-      maxGender.nonEmpty &&
-      maxCount > GenderExtractorConfig.minCount &&
-      differenceOk
-    ) {
-      Seq(
-        new Quad(
-          context.language,
-          DBpediaDatasets.Genders,
-          subjectUri,
-          genderProperty,
-          maxGender,
-          node.sourceIri,
-          langStringDatatype
-        )
-      )
-    } else {
-      Seq.empty
-    }
+    Seq.empty
   }
+
 }