Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/LANGUAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ The contents of the file should be as follows:
```json
{
"name": string,
"quotationMarks": {
"primary": tuple[string, string],
"secondary": tuple[string, string]
},
"rightToLeft": boolean,
"joiningScript": boolean,
"orderedByFrequency": boolean,
Expand All @@ -29,6 +33,13 @@ The contents of the file should be as follows:
```

It is recommended that you familiarize yourselves with JSON before adding a language. For the `name` field, put the name of your language.
`quotationMarks` indicates the quotation marks used for that language, at depth 1 (`primary`) and 2 (`secondary`) respectively, and for the left quotation mark and the right one respectively. For instance, American English uses `“”` at depth 1 and `‘’` at depth 2. In that case it would look like:
```json
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["‘", "’"]
},
```
`rightToLeft` indicates how the language is written. If it is written right to left then put `true`, otherwise put `false`.
`joiningScript` indicates whether the language requires joining letters to render correctly. Set it to `true` if characters must join with surrounding characters or if their shapes change based on position in a word (initial, medial, final, or isolated), or if they use connecting marks (matras/vowel signs) that reshape the base characters. Otherwise, set it to `false.`
For `bcp47` put your languages [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag).
Expand Down
18 changes: 9 additions & 9 deletions frontend/__tests__/test/british-english.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,23 @@ describe("british-english", () => {
});

it("should convert double quotes to single quotes", async () => {
await expect(replace('"hello"', "")).resolves.toEqual("'hello'");
await expect(replace('"test"', "")).resolves.toEqual("'test'");
await expect(replace('"Hello World"', "")).resolves.toEqual(
"'Hello World'",
await expect(replace("“hello”", "")).resolves.toEqual("hello");
await expect(replace("“test”", "")).resolves.toEqual("test");
await expect(replace("“Hello World”", "")).resolves.toEqual(
"Hello World",
);
});

it("should convert double quotes and replace words", async () => {
await expect(replace('"color"', "")).resolves.toEqual("'colour'");
await expect(replace('"math"', "")).resolves.toEqual("'maths'");
await expect(replace('"Color"', "")).resolves.toEqual("'Colour'");
await expect(replace("“color”", "")).resolves.toEqual("colour");
await expect(replace("“math”", "")).resolves.toEqual("maths");
await expect(replace("“Color”", "")).resolves.toEqual("Colour");
});

it("should handle multiple double quotes in a word", async () => {
await expect(
replace('He said "hello" and "goodbye"', ""),
).resolves.toEqual("He said 'hello' and 'goodbye'");
replace("He said hello and goodbye”", ""),
).resolves.toEqual("He said hello and goodbye");
});

it("should not affect words without double quotes", async () => {
Expand Down
13 changes: 10 additions & 3 deletions frontend/src/ts/test/british-english.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ export async function replace(
previousWord: string,
): Promise<string> {
// Convert American-style double quotes to British-style single quotes
if (word.includes('"')) {
word = word.replace(/"/g, "'");
}
const us2ukQuotes = {
"“": "‘",
"”": "’",
"‘": "“",
"’": "”",
};
word = word.replace(
/[“”‘’]/g,
(char) => us2ukQuotes[char as keyof typeof us2ukQuotes],
);

if (word.includes("-")) {
//this handles hyphenated words (for example "cream-colored") to make sure
Expand Down
32 changes: 15 additions & 17 deletions frontend/src/ts/test/words-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ export async function punctuateWord(
currentWord: string,
index: number,
maxindex: number,
language: LanguageObject,
): Promise<string> {
let word = currentWord;

const currentLanguage = Config.language.split("_")[0];
const currentLanguage = language.name.split("_")[0];

const lastChar = Strings.getLastChar(previousWord);

Expand Down Expand Up @@ -140,22 +141,18 @@ export async function punctuateWord(
}
}
}
} else if (
random() < 0.01 &&
lastChar !== "," &&
lastChar !== "." &&
currentLanguage !== "russian"
) {
word = `"${word}"`;
} else if (
random() < 0.011 &&
lastChar !== "," &&
lastChar !== "." &&
currentLanguage !== "russian" &&
currentLanguage !== "ukrainian" &&
currentLanguage !== "slovak"
) {
word = `'${word}'`;
} else if (random() < 0.01 && lastChar !== "," && lastChar !== ".") {
if (language?.quotationMarks !== undefined) {
word = `${language.quotationMarks.primary[0]}${word}${language.quotationMarks.primary[1]}`;
} else {
word = `"${word}"`;
}
} else if (random() < 0.011 && lastChar !== "," && lastChar !== ".") {
if (language?.quotationMarks !== undefined) {
word = `${language.quotationMarks.secondary[0]}${word}${language.quotationMarks.secondary[1]}`;
} else {
word = `'${word}'`;
}
} else if (random() < 0.012 && lastChar !== "," && lastChar !== ".") {
if (currentLanguage === "code") {
const r = random();
Expand Down Expand Up @@ -941,6 +938,7 @@ export async function getNextWord(
randomWord,
wordIndex,
wordsBound,
currentLanguage,
);
}

Expand Down
19 changes: 17 additions & 2 deletions frontend/src/ts/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,23 @@ export function isWordRightToLeft(
}

export const CHAR_EQUIVALENCE_SETS = [
new Set(["’", "‘", "'", "ʼ", "׳", "ʻ", "᾽", "᾽"]),
new Set([`"`, "”", "“", "„"]),
new Set([
"’",
"‘",
"'",
"ʼ",
"׳",
"ʻ",
"᾽",
"᾽",
"‛",
"‚",
"‹",
"›",
"『",
"』",
]),
new Set([`"`, "”", "“", "‟", "„", "⹂", "«", "»", "《", "》", "「", "」"]),
new Set(["–", "—", "-", "‐"]),
new Set([",", "‚"]),
];
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/afrikaans_10k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "afrikaans_10k",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["‘", "’"]
},
"words": [
"aan",
"aanbetref",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/afrikaans_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "afrikaans_1k",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["‘", "’"]
},
"noLazyMode": true,
"words": [
"sterker",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/albanian.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "albanian",
"quotationMarks": {
"primary": ["„", "”"],
"secondary": ["‘", "’"]
},
"words": [
"të",
"e",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/albanian_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "albanian_1k",
"quotationMarks": {
"primary": ["„", "”"],
"secondary": ["‘", "’"]
},
"words": [
"I",
"e tij",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/amharic.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "amharic",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["‹", "›"]
},
"bcp47": "am-ET",
"words": [
"እግዚአብሔር",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/amharic_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "amharic_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["‹", "›"]
},
"bcp47": "am-ET",
"words": [
"መለየት",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/amharic_5k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "amharic_5k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["‹", "›"]
},
"bcp47": "am-ET",
"words": [
"ሙዚቀኝነት",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-SA",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_10k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_10k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-SA",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_egypt.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_egypt",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-EG",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_egypt_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_egypt_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-EG",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_morocco.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_morocco",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"orderedByFrequency": false,
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"noLazyMode": true,
"orderedByFrequency": false,
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"noLazyMode": true,
"orderedByFrequency": false,
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian_western.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian_western",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"bcp47": "hyw",
"words": [
"կանանց",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian_western_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian_western_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"bcp47": "hyw",
"words": [
"թարգմանուած",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/azerbaijani.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "azerbaijani",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["\"", "\""]
},
"bcp47": "az-AZ",
"words": [
"dərs",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/azerbaijani_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "azerbaijani_1k",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["\"", "\""]
},
"bcp47": "az-AZ",
"words": [
"dərs",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"noLazyMode": true,
"bcp47": "be-BY",
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_100k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_100k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_10k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_10k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"noLazyMode": true,
"bcp47": "be-BY",
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_25k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_25k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_50k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_50k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
Loading
Loading