|
|
@@ -22,7 +22,7 @@ import java.util.regex.*; |
|
|
|
// What's the reason to extending LanguageUtils? |
|
|
|
// Just doing it because already done in the previous code. |
|
|
|
public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
// Composite Letters. |
|
|
|
// Composite Letters. |
|
|
|
private final static HashMap<String, String> composites = new HashMap<String, String>() { |
|
|
|
{ |
|
|
|
put("ক্ষ", "kkh"); |
|
|
@@ -39,7 +39,25 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
put("্ব", "w"); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
// Vowels Only |
|
|
|
private final static HashMap<String, String> vowels = new HashMap<String, String>() { |
|
|
|
{ |
|
|
|
put("আ", "aa"); |
|
|
|
put("অ", "a"); |
|
|
|
put("ই", "i"); |
|
|
|
put("ঈ", "ii"); |
|
|
|
put("উ", "u"); |
|
|
|
put("ঊ", "uu"); |
|
|
|
put("ঋ", "ri"); |
|
|
|
put("এ", "e"); |
|
|
|
put("ঐ", "oi"); |
|
|
|
put("ও", "o"); |
|
|
|
put("ঔ", "ou"); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
// Vowels and Hasants |
|
|
|
private final static HashMap<String, String> vowelsAndHasants = new HashMap<String, String>() { |
|
|
|
{ |
|
|
|
put("আ", "aa"); |
|
|
@@ -149,7 +167,8 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
}; |
|
|
|
|
|
|
|
// The regex to extract Bengali characters in nested groups. |
|
|
|
private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])| "; |
|
|
|
private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])|(\\s)"; |
|
|
|
|
|
|
|
private final static Pattern bengaliRegex = Pattern.compile(pattern); |
|
|
|
|
|
|
|
private static String getVal(String key) { |
|
|
@@ -173,7 +192,15 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
|
|
|
|
Matcher m = bengaliRegex.matcher(txt); |
|
|
|
StringBuffer sb = new StringBuffer(); |
|
|
|
String lastChar = ""; |
|
|
|
boolean lastHadComposition = false; |
|
|
|
boolean lastHadKaar = false; |
|
|
|
boolean nextNeedsO = false; |
|
|
|
int lastHadO = 0; |
|
|
|
while (m.find()) { |
|
|
|
boolean thisNeedsO = false; |
|
|
|
boolean changePronounciation = false; |
|
|
|
boolean thisHadKaar = false; |
|
|
|
String appendableString = ""; |
|
|
|
String reff = m.group(1); |
|
|
|
if (reff != null) { |
|
|
@@ -200,6 +227,10 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
g = g + 1; |
|
|
|
} |
|
|
|
} |
|
|
|
if (m.group(2) != null && m.group(2).equals("ক্ষ")) { |
|
|
|
changePronounciation = true; |
|
|
|
thisNeedsO = true; |
|
|
|
} |
|
|
|
int g = 6; |
|
|
|
while (g < 10) { |
|
|
|
String key = getVal(m.group(g)); |
|
|
@@ -209,16 +240,24 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
} |
|
|
|
g = g + 1; |
|
|
|
} |
|
|
|
String phala = m.group(8); |
|
|
|
if (phala != null && phala.equals("্য")) { |
|
|
|
changePronounciation = true; |
|
|
|
thisNeedsO = true; |
|
|
|
} |
|
|
|
String jukto = m.group(4); |
|
|
|
if (jukto != null) { |
|
|
|
thisNeedsO = true; |
|
|
|
} |
|
|
|
String kaar = m.group(10); |
|
|
|
if (kaar != null) { |
|
|
|
String kaarStr = letters.get(kaar); |
|
|
|
if (kaarStr != null) { |
|
|
|
appendableString = appendableString + kaarStr; |
|
|
|
} |
|
|
|
} else if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0))) { |
|
|
|
// Adding 'a' like ITRANS if no vowel is present. |
|
|
|
// TODO: Have to add it dynamically using Bengali grammer rules. |
|
|
|
appendableString = appendableString + "a"; |
|
|
|
if (kaarStr.equals("i") || kaarStr.equals("ii") || kaarStr.equals("u") || kaarStr.equals("uu")) { |
|
|
|
changePronounciation = true; |
|
|
|
} |
|
|
|
} |
|
|
|
String singleton = m.group(11); |
|
|
|
if (singleton != null) { |
|
|
@@ -227,6 +266,9 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
appendableString = appendableString + singleStr; |
|
|
|
} |
|
|
|
} |
|
|
|
if (changePronounciation && lastChar.equals("a")) { |
|
|
|
sb.setCharAt(sb.length() - 1, 'o'); |
|
|
|
} |
|
|
|
String others = m.group(0); |
|
|
|
if (others != null) { |
|
|
|
|
|
|
@@ -234,7 +276,41 @@ public class BengaliLanguageUtils extends LanguageUtils { |
|
|
|
appendableString = appendableString + others; |
|
|
|
} |
|
|
|
} |
|
|
|
String whitespace = m.group(12); |
|
|
|
if (nextNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) { |
|
|
|
appendableString = appendableString + "o"; |
|
|
|
lastHadO++; |
|
|
|
thisNeedsO = false; |
|
|
|
} |
|
|
|
|
|
|
|
if (((kaar != null && lastHadO > 1) || whitespace != null) && !lastHadKaar && sb.length() > 0 |
|
|
|
&& sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) { |
|
|
|
sb.deleteCharAt(sb.length() - 1); |
|
|
|
lastHadO = 0; |
|
|
|
} |
|
|
|
nextNeedsO = false; |
|
|
|
if (thisNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) { |
|
|
|
appendableString = appendableString + "o"; |
|
|
|
lastHadO++; |
|
|
|
} |
|
|
|
if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0)) && kaar == null) { |
|
|
|
nextNeedsO = true; |
|
|
|
} |
|
|
|
if (reff != null || m.group(4) != null || m.group(6) != null) { |
|
|
|
lastHadComposition = true; |
|
|
|
} else { |
|
|
|
lastHadComposition = false; |
|
|
|
} |
|
|
|
if (kaar != null) { |
|
|
|
lastHadKaar = true; |
|
|
|
} else { |
|
|
|
lastHadKaar = false; |
|
|
|
} |
|
|
|
m.appendReplacement(sb, appendableString); |
|
|
|
lastChar = appendableString; |
|
|
|
} |
|
|
|
if (!lastHadKaar && sb.length() > 0 && sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) { |
|
|
|
sb.deleteCharAt(sb.length() - 1); |
|
|
|
} |
|
|
|
m.appendTail(sb); |
|
|
|
return sb.toString(); |