Free as in Freedom: Codeberg.org. Create your repos!
Browse Source

Enhancement and Fixes for Bengali Transliteration. (#1263)

* Added various fixes and enhancment for Bengali transliteration.

* various fixes and enhancment for Bengali transliteration

* fixed a coding typo [master]

* Boolean lowercase and added .project in .gitignore

* Boolean lowercase and added .project in .gitignore

* typo fix [master]

* fixed negative index error [master]

* fixed negative index error [master]

* unprinted character fix [master]

* enhanced transliteration [master]

* lowercased boolean and replaced Integer with int [master]

* removed .setting, .classpath and .project and added them to .gitignore too.

* bug fix and multilingual testcase [master]
tags/0.31.0
উৎসব রায়(Utsob Roy) 6 months ago
parent
commit
e66d0a2d10

+ 3
- 0
.gitignore View File

@@ -31,3 +31,6 @@ proguard/
31 31
 MPChartLib
32 32
 
33 33
 fw.dirs
34
+**/.project
35
+**/.settings
36
+**/.classpath

+ 2
- 0
.settings/org.eclipse.buildship.core.prefs View File

@@ -0,0 +1,2 @@
1
+connection.project.dir=
2
+eclipse.preferences.version=1

+ 0
- 11
app/src/main/.classpath View File

@@ -1,11 +0,0 @@
1
-<?xml version="1.0" encoding="UTF-8"?>
2
-<classpath>
3
-	<classpathentry kind="src" path="src"/>
4
-	<classpathentry kind="src" path="gen"/>
5
-	<classpathentry kind="src" path="java"/>
6
-	<classpathentry kind="src" path="aidl"/>
7
-	<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
8
-	<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
9
-	<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.DEPENDENCIES"/>
10
-	<classpathentry kind="output" path="bin/classes"/>
11
-</classpath>

+ 0
- 33
app/src/main/.project View File

@@ -1,33 +0,0 @@
1
-<?xml version="1.0" encoding="UTF-8"?>
2
-<projectDescription>
3
-	<name>Gadgetbridge</name>
4
-	<comment></comment>
5
-	<projects>
6
-	</projects>
7
-	<buildSpec>
8
-		<buildCommand>
9
-			<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
10
-			<arguments>
11
-			</arguments>
12
-		</buildCommand>
13
-		<buildCommand>
14
-			<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
15
-			<arguments>
16
-			</arguments>
17
-		</buildCommand>
18
-		<buildCommand>
19
-			<name>org.eclipse.jdt.core.javabuilder</name>
20
-			<arguments>
21
-			</arguments>
22
-		</buildCommand>
23
-		<buildCommand>
24
-			<name>com.android.ide.eclipse.adt.ApkBuilder</name>
25
-			<arguments>
26
-			</arguments>
27
-		</buildCommand>
28
-	</buildSpec>
29
-	<natures>
30
-		<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
31
-		<nature>org.eclipse.jdt.core.javanature</nature>
32
-	</natures>
33
-</projectDescription>

+ 0
- 12
app/src/main/.settings/org.eclipse.jdt.core.prefs View File

@@ -1,12 +0,0 @@
1
-eclipse.preferences.version=1
2
-org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
-org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
4
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
5
-org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6
-org.eclipse.jdt.core.compiler.compliance=1.7
7
-org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8
-org.eclipse.jdt.core.compiler.debug.localVariable=generate
9
-org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10
-org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11
-org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12
-org.eclipse.jdt.core.compiler.source=1.7

+ 82
- 6
app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java View File

@@ -22,7 +22,7 @@ import java.util.regex.*;
22 22
 // What's the reason to extending LanguageUtils?
23 23
 // Just doing it because already done in the previous code.
24 24
 public class BengaliLanguageUtils extends LanguageUtils {
25
-    // Composite Letters.
25
+        // Composite Letters.
26 26
     private final static HashMap<String, String> composites = new HashMap<String, String>() {
27 27
         {
28 28
             put("ক্ষ", "kkh");
@@ -39,7 +39,25 @@ public class BengaliLanguageUtils extends LanguageUtils {
39 39
             put("্ব", "w");
40 40
         }
41 41
     };
42
+
42 43
     // Vowels Only
44
+    private final static HashMap<String, String> vowels = new HashMap<String, String>() {
45
+        {
46
+            put("আ", "aa");
47
+            put("অ", "a");
48
+            put("ই", "i");
49
+            put("ঈ", "ii");
50
+            put("উ", "u");
51
+            put("ঊ", "uu");
52
+            put("ঋ", "ri");
53
+            put("এ", "e");
54
+            put("ঐ", "oi");
55
+            put("ও", "o");
56
+            put("ঔ", "ou");
57
+        }
58
+    };
59
+
60
+    // Vowels and Hasants
43 61
     private final static HashMap<String, String> vowelsAndHasants = new HashMap<String, String>() {
44 62
         {
45 63
             put("আ", "aa");
@@ -149,7 +167,8 @@ public class BengaliLanguageUtils extends LanguageUtils {
149 167
     };
150 168
 
151 169
     // The regex to extract Bengali characters in nested groups.
152
-    private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((‍){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])| ";
170
+    private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((‍){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])|(\\s)";
171
+
153 172
     private final static Pattern bengaliRegex = Pattern.compile(pattern);
154 173
 
155 174
     private static String getVal(String key) {
@@ -173,7 +192,15 @@ public class BengaliLanguageUtils extends LanguageUtils {
173 192
 
174 193
         Matcher m = bengaliRegex.matcher(txt);
175 194
         StringBuffer sb = new StringBuffer();
195
+        String lastChar = "";
196
+        boolean lastHadComposition = false;
197
+        boolean lastHadKaar = false;
198
+        boolean nextNeedsO = false;
199
+        int lastHadO = 0;
176 200
         while (m.find()) {
201
+            boolean thisNeedsO = false;
202
+            boolean changePronounciation = false;
203
+            boolean thisHadKaar = false;
177 204
             String appendableString = "";
178 205
             String reff = m.group(1);
179 206
             if (reff != null) {
@@ -200,6 +227,10 @@ public class BengaliLanguageUtils extends LanguageUtils {
200 227
                     g = g + 1;
201 228
                 }
202 229
             }
230
+            if (m.group(2) != null && m.group(2).equals("ক্ষ")) {
231
+                changePronounciation = true;
232
+                thisNeedsO = true;
233
+            }
203 234
             int g = 6;
204 235
             while (g < 10) {
205 236
                 String key = getVal(m.group(g));
@@ -209,16 +240,24 @@ public class BengaliLanguageUtils extends LanguageUtils {
209 240
                 }
210 241
                 g = g + 1;
211 242
             }
243
+            String phala = m.group(8);
244
+            if (phala != null && phala.equals("্য")) {
245
+                changePronounciation = true;
246
+                thisNeedsO = true;
247
+            }
248
+            String jukto = m.group(4);
249
+            if (jukto != null) {
250
+                thisNeedsO = true;
251
+            }
212 252
             String kaar = m.group(10);
213 253
             if (kaar != null) {
214 254
                 String kaarStr = letters.get(kaar);
215 255
                 if (kaarStr != null) {
216 256
                     appendableString = appendableString + kaarStr;
217 257
                 }
218
-            } else if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0))) {
219
-                // Adding 'a' like ITRANS if no vowel is present.
220
-                // TODO: Have to add it dynamically using Bengali grammer rules.
221
-                appendableString = appendableString + "a";
258
+                if (kaarStr.equals("i") || kaarStr.equals("ii") || kaarStr.equals("u") || kaarStr.equals("uu")) {
259
+                    changePronounciation = true;
260
+                }
222 261
             }
223 262
             String singleton = m.group(11);
224 263
             if (singleton != null) {
@@ -227,6 +266,9 @@ public class BengaliLanguageUtils extends LanguageUtils {
227 266
                     appendableString = appendableString + singleStr;
228 267
                 }
229 268
             }
269
+            if (changePronounciation && lastChar.equals("a")) {
270
+                sb.setCharAt(sb.length() - 1, 'o');
271
+            }
230 272
             String others = m.group(0);
231 273
             if (others != null) {
232 274
 
@@ -234,7 +276,41 @@ public class BengaliLanguageUtils extends LanguageUtils {
234 276
                     appendableString = appendableString + others;
235 277
                 }
236 278
             }
279
+            String whitespace = m.group(12);
280
+            if (nextNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) {
281
+                appendableString = appendableString + "o";
282
+                lastHadO++;
283
+                thisNeedsO = false;
284
+            }
285
+
286
+            if (((kaar != null && lastHadO > 1) || whitespace != null) && !lastHadKaar && sb.length() > 0
287
+                    && sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) {
288
+                sb.deleteCharAt(sb.length() - 1);
289
+                lastHadO = 0;
290
+            }
291
+            nextNeedsO = false;
292
+            if (thisNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) {
293
+                appendableString = appendableString + "o";
294
+                lastHadO++;
295
+            }
296
+            if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0)) && kaar == null) {
297
+                nextNeedsO = true;
298
+            }
299
+            if (reff != null || m.group(4) != null || m.group(6) != null) {
300
+                lastHadComposition = true;
301
+            } else {
302
+                lastHadComposition = false;
303
+            }
304
+            if (kaar != null) {
305
+                lastHadKaar = true;
306
+            } else {
307
+                lastHadKaar = false;
308
+            }
237 309
             m.appendReplacement(sb, appendableString);
310
+            lastChar = appendableString;
311
+        }
312
+        if (!lastHadKaar && sb.length() > 0 && sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) {
313
+            sb.deleteCharAt(sb.length() - 1);
238 314
         }
239 315
         m.appendTail(sb);
240 316
         return sb.toString();

+ 6
- 3
app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java View File

@@ -57,11 +57,14 @@ public class LanguageUtilsTest extends TestBase {
57 57
         assertEquals("Farsi transiteration failed", farsiExpected, farsiActual);
58 58
     }
59 59
 
60
+    @Test
60 61
     public void testStringTransliterateBengali() throws Exception {
61 62
         // input with cyrillic and diacritic letters
62
-        String[] inputs = { "অনিরুদ্ধ", "বিজ্ঞানযাত্রা চলছে চলবে।", "আমি সব দেখেশুনে ক্ষেপে গিয়ে করি বাঙলায় চিৎকার!" };
63
-        String[] outputs = { "aniruddha", "biggaanaJaatraa chalachhe chalabe.",
64
-                "aami saba dekheshune kkhepe giye kari baangalaaya chitkaara!" };
63
+        String[] inputs = { "অনিরুদ্ধ", "বিজ্ঞানযাত্রা চলছে চলবে।", "আমি সব দেখেশুনে ক্ষেপে গিয়ে করি বাঙলায় চিৎকার!",
64
+                "আমার জাভা কোড is so bad! কী আর বলবো!" };
65
+        String[] outputs = { "oniruddho", "biggaanJaatraa cholchhe cholbe.",
66
+                "aami sob dekheshune kkhepe giye kori baanglaay chitkaar!",
67
+                "aamaar jaabhaa koD is so bad! kii aar bolbo!"};
65 68
 
66 69
         String result;
67 70
 

Loading…
Cancel
Save