Skip to content

Repos

jlpt

This module defines the JLPTRepo class used for querying data extracted from Jonathan Weller's website in the database.

JLPTRepo

Query JLPT Related Tables of Database

Source code in kotobase/src/kotobase/repos/jlpt.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class JLPTRepo:
    """
    Query JLPT Related Tables of Database
    """

    # ── vocab ───────────────────────────────────────────────────────────

    @staticmethod
    @lru_cache(maxsize=30_000)
    def vocab_by_word(word: str) -> Optional[dt.JLPTVocabDTO]:
        """
        Get vocabulary by word

        Args:
          word (str): Word to query

        Returns:
          JLPTVocabDTO: JLPT Vocab data object.
        """
        with get_db() as s:
            row = (
                s.query(orm.JlptVocab)
                .filter(
                    (orm.JlptVocab.kanji == word
                     ) | (orm.JlptVocab.hiragana == word))
                .first()
            )
        return dt.map_jlpt_vocab(row) if row else None

    @staticmethod
    def vocab_level(word: str) -> Optional[int]:
        """
        Get Vocab JLPT levels

        Args:
          word (str): Word to query.

        Returns:
          int: JLPT level if existent.
        """
        dto = JLPTRepo.vocab_by_word(word)
        return dto.level if dto else None

    # Kanji Levels

    @staticmethod
    def kanji_levels(chars: Iterable[str]) -> Dict[str, int]:
        """
        Get Kanji levels with bulk search

        Args:
          chars (Iterable[str]): Iterable of character to query.

        Returns:
          Dict[str, int]: Dictionary with character keys and level values.
        """
        with get_db() as s:
            rows = (
                s.query(orm.JlptKanji)
                .filter(orm.JlptKanji.kanji.in_(chars))
                .all()
            )
        return {r.kanji: r.level for r in rows}

    # Grammar Lookup

    @staticmethod
    def grammar_entries_like(pattern: str) -> List[dt.JLPTGrammarDTO]:
        """
        Wildcard search for grammar patterns

        Args:
          pattern (str): Wildcard Pattern

        Returns:
          List[JLPTGrammarDTO]: List of JLPT Grammar data objects.
        """
        pattern = pattern.replace("~", "%").replace("*", "%")
        with get_db() as s:
            rows = (
                s.query(orm.JlptGrammar)
                .filter(
                    orm.JlptGrammar.grammar.like(f"{pattern}%", escape="\\"))
                .all()
            )
        return dt.map_many(dt.map_jlpt_grammar, rows)

grammar_entries_like(pattern) staticmethod

Wildcard search for grammar patterns

Parameters:

Name Type Description Default
pattern str

Wildcard Pattern

required

Returns:

Type Description
List[JLPTGrammarDTO]

List[JLPTGrammarDTO]: List of JLPT Grammar data objects.

Source code in kotobase/src/kotobase/repos/jlpt.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
@staticmethod
def grammar_entries_like(pattern: str) -> List[dt.JLPTGrammarDTO]:
    """
    Wildcard search for grammar patterns

    Args:
      pattern (str): Wildcard Pattern

    Returns:
      List[JLPTGrammarDTO]: List of JLPT Grammar data objects.
    """
    pattern = pattern.replace("~", "%").replace("*", "%")
    with get_db() as s:
        rows = (
            s.query(orm.JlptGrammar)
            .filter(
                orm.JlptGrammar.grammar.like(f"{pattern}%", escape="\\"))
            .all()
        )
    return dt.map_many(dt.map_jlpt_grammar, rows)

kanji_levels(chars) staticmethod

Get Kanji levels with bulk search

Parameters:

Name Type Description Default
chars Iterable[str]

Iterable of character to query.

required

Returns:

Type Description
Dict[str, int]

Dict[str, int]: Dictionary with character keys and level values.

Source code in kotobase/src/kotobase/repos/jlpt.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
@staticmethod
def kanji_levels(chars: Iterable[str]) -> Dict[str, int]:
    """
    Get Kanji levels with bulk search

    Args:
      chars (Iterable[str]): Iterable of character to query.

    Returns:
      Dict[str, int]: Dictionary with character keys and level values.
    """
    with get_db() as s:
        rows = (
            s.query(orm.JlptKanji)
            .filter(orm.JlptKanji.kanji.in_(chars))
            .all()
        )
    return {r.kanji: r.level for r in rows}

vocab_by_word(word) cached staticmethod

Get vocabulary by word

Parameters:

Name Type Description Default
word str

Word to query

required

Returns:

Name Type Description
JLPTVocabDTO Optional[JLPTVocabDTO]

JLPT Vocab data object.

Source code in kotobase/src/kotobase/repos/jlpt.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@staticmethod
@lru_cache(maxsize=30_000)
def vocab_by_word(word: str) -> Optional[dt.JLPTVocabDTO]:
    """
    Get vocabulary by word

    Args:
      word (str): Word to query

    Returns:
      JLPTVocabDTO: JLPT Vocab data object.
    """
    with get_db() as s:
        row = (
            s.query(orm.JlptVocab)
            .filter(
                (orm.JlptVocab.kanji == word
                 ) | (orm.JlptVocab.hiragana == word))
            .first()
        )
    return dt.map_jlpt_vocab(row) if row else None

vocab_level(word) staticmethod

Get Vocab JLPT levels

Parameters:

Name Type Description Default
word str

Word to query.

required

Returns:

Name Type Description
int Optional[int]

JLPT level if existent.

Source code in kotobase/src/kotobase/repos/jlpt.py
44
45
46
47
48
49
50
51
52
53
54
55
56
@staticmethod
def vocab_level(word: str) -> Optional[int]:
    """
    Get Vocab JLPT levels

    Args:
      word (str): Word to query.

    Returns:
      int: JLPT level if existent.
    """
    dto = JLPTRepo.vocab_by_word(word)
    return dto.level if dto else None

jmdict

This module defines the JMDictRepo class used for querying data extracted from the JMDict XML file in the database.

JMDictRepo

Queries JMDict Related Tables of Database

Source code in kotobase/src/kotobase/repos/jmdict.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
class JMDictRepo:
    """
    Queries JMDict Related Tables of Database
    """

    # ── single-row lookups ──────────────────────────────────────────────

    @staticmethod
    @lru_cache(maxsize=100_000)
    def by_id(entry_id: int) -> Optional[dt.JMDictEntryDTO]:
        """
        Retrieve Entry by id.

        Args:
          entry_id (int): Entry ID in database.

        Returns:
          JMDictEntryDTO: JMDict Entry Data Object.
        """
        with get_db() as s:
            row = s.get(
                orm.JMDictEntry,
                entry_id,
                options=(
                    joinedload(orm.JMDictEntry.kana),
                    joinedload(orm.JMDictEntry.kanji),
                    joinedload(orm.JMDictEntry.senses),
                ),
            )
        return dt.map_jmdict(row) if row else None

    # ── search helpers ──────────────────────────────────────────────────

    @staticmethod
    @lru_cache(maxsize=100_000)
    def search_form(form: str,
                    /,
                    *,
                    wildcard: bool = False,
                    limit: Optional[int] = 50
                    ) -> List[dt.JMDictEntryDTO]:
        """
        Exact or wildcard search across `kana` and `kanji`.

        Args:
          form (str): Query string.

          wildcard (bool): If true, treat `*` and `%` and wildcards and perform
                           a LIKE search. If false, pass wildcards unchanged
                           and perform a simple comparison search.

          limit (int, optional): Limit of entries to return, can be set to
                                 `None` for no limit.
        Returns:
          List[JMDictEntryDTO]: List of JMDictEntry data objects.
        """
        if wildcard:
            pattern = form.replace("*", "%")
            comparator_kana = orm.JMDictKana.text.like(pattern)
            comparator_kanji = orm.JMDictKanji.text.like(pattern)
        else:
            pattern = form
            comparator_kana = orm.JMDictKana.text == form
            comparator_kanji = orm.JMDictKanji.text == form

        with get_db() as s:
            rows = (
                s.query(orm.JMDictEntry)
                .filter(
                    orm.JMDictEntry.kana.any(comparator_kana) |
                    orm.JMDictEntry.kanji.any(comparator_kanji)
                )
                .options(
                    selectinload(orm.JMDictEntry.kana),
                    selectinload(orm.JMDictEntry.kanji),
                    selectinload(orm.JMDictEntry.senses),
                )
                .order_by(orm.JMDictEntry.rank,
                          orm.JMDictEntry.id
                          )
                .limit(limit)
                .all()
            )
            return dt.map_many(dt.map_jmdict, rows)

by_id(entry_id) cached staticmethod

Retrieve Entry by id.

Parameters:

Name Type Description Default
entry_id int

Entry ID in database.

required

Returns:

Name Type Description
JMDictEntryDTO Optional[JMDictEntryDTO]

JMDict Entry Data Object.

Source code in kotobase/src/kotobase/repos/jmdict.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
@staticmethod
@lru_cache(maxsize=100_000)
def by_id(entry_id: int) -> Optional[dt.JMDictEntryDTO]:
    """
    Retrieve Entry by id.

    Args:
      entry_id (int): Entry ID in database.

    Returns:
      JMDictEntryDTO: JMDict Entry Data Object.
    """
    with get_db() as s:
        row = s.get(
            orm.JMDictEntry,
            entry_id,
            options=(
                joinedload(orm.JMDictEntry.kana),
                joinedload(orm.JMDictEntry.kanji),
                joinedload(orm.JMDictEntry.senses),
            ),
        )
    return dt.map_jmdict(row) if row else None

search_form(form, /, *, wildcard=False, limit=50) cached staticmethod

Exact or wildcard search across kana and kanji.

Parameters:

Name Type Description Default
form str

Query string.

required
wildcard bool

If true, treat * and % and wildcards and perform a LIKE search. If false, pass wildcards unchanged and perform a simple comparison search.

False
limit int

Limit of entries to return, can be set to None for no limit.

50

Returns: List[JMDictEntryDTO]: List of JMDictEntry data objects.

Source code in kotobase/src/kotobase/repos/jmdict.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@staticmethod
@lru_cache(maxsize=100_000)
def search_form(form: str,
                /,
                *,
                wildcard: bool = False,
                limit: Optional[int] = 50
                ) -> List[dt.JMDictEntryDTO]:
    """
    Exact or wildcard search across `kana` and `kanji`.

    Args:
      form (str): Query string.

      wildcard (bool): If true, treat `*` and `%` and wildcards and perform
                       a LIKE search. If false, pass wildcards unchanged
                       and perform a simple comparison search.

      limit (int, optional): Limit of entries to return, can be set to
                             `None` for no limit.
    Returns:
      List[JMDictEntryDTO]: List of JMDictEntry data objects.
    """
    if wildcard:
        pattern = form.replace("*", "%")
        comparator_kana = orm.JMDictKana.text.like(pattern)
        comparator_kanji = orm.JMDictKanji.text.like(pattern)
    else:
        pattern = form
        comparator_kana = orm.JMDictKana.text == form
        comparator_kanji = orm.JMDictKanji.text == form

    with get_db() as s:
        rows = (
            s.query(orm.JMDictEntry)
            .filter(
                orm.JMDictEntry.kana.any(comparator_kana) |
                orm.JMDictEntry.kanji.any(comparator_kanji)
            )
            .options(
                selectinload(orm.JMDictEntry.kana),
                selectinload(orm.JMDictEntry.kanji),
                selectinload(orm.JMDictEntry.senses),
            )
            .order_by(orm.JMDictEntry.rank,
                      orm.JMDictEntry.id
                      )
            .limit(limit)
            .all()
        )
        return dt.map_many(dt.map_jmdict, rows)

jmnedict

This module defines the JMneDictRepo class used for querying data extracted from the JMneDict XML file in the database.

JMNeDictRepo

Queries related to JMNeDict Tables of the database.

Source code in kotobase/src/kotobase/repos/jmnedict.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class JMNeDictRepo:
    """
    Queries related to JMNeDict Tables of the database.
    """

    @staticmethod
    @lru_cache(maxsize=40_000)
    def by_id(entry_id: int) -> Optional[dt.JMNeDictEntryDTO]:
        """
        Retrieve Entry by id.

        Args:
          entry_id (int): Entry ID in database.

        Returns:
          JMNeDictEntryDTO: JMNeDict Entry Data Object.
        """
        with get_db() as s:
            row = s.get(orm.JMnedictEntry, entry_id)
        return dt.map_jmnedict(row) if row else None

    @staticmethod
    @lru_cache(maxsize=40_000)
    def search(form: str,
               limit: Optional[int] = 50
               ) -> List[dt.JMNeDictEntryDTO]:
        """
        LIKE search on JMNeDict table.

        Args:
          form (str): Query string.

          limit (int, optional): Limit of entries to return, can be set to
                                 `None` for no limit.
        Returns:
          List[JMNeDictEntryDTO]: List of JMNeDictEntry data objects.
        """
        pattern = form.replace("*", "%")
        with get_db() as s:
            stmt = (
                select(orm.JMnedictEntry)
                .where(
                    or_(
                        orm.JMnedictEntry.kana.like(pattern),
                        orm.JMnedictEntry.kanji.like(pattern)
                    )
                )
            )
            if limit:
                stmt = stmt.limit(limit)
            rows = s.scalars(stmt).all()
        return dt.map_many(dt.map_jmnedict, rows)

by_id(entry_id) cached staticmethod

Retrieve Entry by id.

Parameters:

Name Type Description Default
entry_id int

Entry ID in database.

required

Returns:

Name Type Description
JMNeDictEntryDTO Optional[JMNeDictEntryDTO]

JMNeDict Entry Data Object.

Source code in kotobase/src/kotobase/repos/jmnedict.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
@staticmethod
@lru_cache(maxsize=40_000)
def by_id(entry_id: int) -> Optional[dt.JMNeDictEntryDTO]:
    """
    Retrieve Entry by id.

    Args:
      entry_id (int): Entry ID in database.

    Returns:
      JMNeDictEntryDTO: JMNeDict Entry Data Object.
    """
    with get_db() as s:
        row = s.get(orm.JMnedictEntry, entry_id)
    return dt.map_jmnedict(row) if row else None

search(form, limit=50) cached staticmethod

LIKE search on JMNeDict table.

Parameters:

Name Type Description Default
form str

Query string.

required
limit int

Limit of entries to return, can be set to None for no limit.

50

Returns: List[JMNeDictEntryDTO]: List of JMNeDictEntry data objects.

Source code in kotobase/src/kotobase/repos/jmnedict.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
@staticmethod
@lru_cache(maxsize=40_000)
def search(form: str,
           limit: Optional[int] = 50
           ) -> List[dt.JMNeDictEntryDTO]:
    """
    LIKE search on JMNeDict table.

    Args:
      form (str): Query string.

      limit (int, optional): Limit of entries to return, can be set to
                             `None` for no limit.
    Returns:
      List[JMNeDictEntryDTO]: List of JMNeDictEntry data objects.
    """
    pattern = form.replace("*", "%")
    with get_db() as s:
        stmt = (
            select(orm.JMnedictEntry)
            .where(
                or_(
                    orm.JMnedictEntry.kana.like(pattern),
                    orm.JMnedictEntry.kanji.like(pattern)
                )
            )
        )
        if limit:
            stmt = stmt.limit(limit)
        rows = s.scalars(stmt).all()
    return dt.map_many(dt.map_jmnedict, rows)

kanji

This module defines the KanjiRepo class used for querying data extracted from the KANJIDIC2 XML file in the database.

KanjiRepo

Queries Kanji related Tables of the database

Source code in kotobase/src/kotobase/repos/kanji.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class KanjiRepo:
    """
    Queries Kanji related Tables of the database
    """

    _cache: Dict[str, dt.KanjiDTO] = {}  # caches: literal → DTO

    @staticmethod
    def by_literal(lit: str) -> Optional[dt.KanjiDTO]:
        """
        Retrieve Kanji by literal.

        Args:
          lit (str): The Kanji Literal

        Returns:
          KanjiDTO: Kanji Data Object

        """
        if lit in KanjiRepo._cache:
            return KanjiRepo._cache[lit]

        with get_db() as s:
            row = s.get(orm.Kanjidic, lit)
            if not row:
                return None

            jlpt_row = (
                s.query(orm.JlptKanji)
                .filter(orm.JlptKanji.kanji == lit)
                .first()
            )
            dto = dt.map_kanjidic(
                row, jlpt_tanos_level=jlpt_row.level if jlpt_row else None
            )
            KanjiRepo._cache[lit] = dto
            return dto

    @staticmethod
    def bulk_fetch(chars: Iterable[str]) -> List[dt.KanjiDTO]:
        """
        Bulk-Fetch Kanji for performance.

        Args:
          chars (Iterable[str]): Iterable of kanjis.

        Returns:
          List[KanjiDTO]: List of Kanji Data Objects.

        """
        out: List[dt.KanjiDTO] = []
        missing: List[str] = []
        for c in chars:
            cached = KanjiRepo._cache.get(c)
            if cached:
                out.append(cached)
            else:
                missing.append(c)

        if missing:
            with get_db() as s:
                rows = (
                    s.query(orm.Kanjidic)
                    .filter(orm.Kanjidic.literal.in_(missing))
                    .all()
                )
                jlpt_map = {
                    r.kanji: r.level
                    for r in s.query(orm.JlptKanji)
                    .filter(orm.JlptKanji.kanji.in_(missing))
                    .all()
                }
            for r in rows:
                dto = dt.map_kanjidic(
                    r, jlpt_tanos_level=jlpt_map.get(r.literal)
                )
                KanjiRepo._cache[r.literal] = dto
                out.append(dto)

        # preserve original order
        ordering = {c: i for i, c in enumerate(chars)}
        out.sort(key=lambda k: ordering[k.literal])
        return out

bulk_fetch(chars) staticmethod

Bulk-Fetch Kanji for performance.

Parameters:

Name Type Description Default
chars Iterable[str]

Iterable of kanjis.

required

Returns:

Type Description
List[KanjiDTO]

List[KanjiDTO]: List of Kanji Data Objects.

Source code in kotobase/src/kotobase/repos/kanji.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
@staticmethod
def bulk_fetch(chars: Iterable[str]) -> List[dt.KanjiDTO]:
    """
    Bulk-Fetch Kanji for performance.

    Args:
      chars (Iterable[str]): Iterable of kanjis.

    Returns:
      List[KanjiDTO]: List of Kanji Data Objects.

    """
    out: List[dt.KanjiDTO] = []
    missing: List[str] = []
    for c in chars:
        cached = KanjiRepo._cache.get(c)
        if cached:
            out.append(cached)
        else:
            missing.append(c)

    if missing:
        with get_db() as s:
            rows = (
                s.query(orm.Kanjidic)
                .filter(orm.Kanjidic.literal.in_(missing))
                .all()
            )
            jlpt_map = {
                r.kanji: r.level
                for r in s.query(orm.JlptKanji)
                .filter(orm.JlptKanji.kanji.in_(missing))
                .all()
            }
        for r in rows:
            dto = dt.map_kanjidic(
                r, jlpt_tanos_level=jlpt_map.get(r.literal)
            )
            KanjiRepo._cache[r.literal] = dto
            out.append(dto)

    # preserve original order
    ordering = {c: i for i, c in enumerate(chars)}
    out.sort(key=lambda k: ordering[k.literal])
    return out

by_literal(lit) staticmethod

Retrieve Kanji by literal.

Parameters:

Name Type Description Default
lit str

The Kanji Literal

required

Returns:

Name Type Description
KanjiDTO Optional[KanjiDTO]

Kanji Data Object

Source code in kotobase/src/kotobase/repos/kanji.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
@staticmethod
def by_literal(lit: str) -> Optional[dt.KanjiDTO]:
    """
    Retrieve Kanji by literal.

    Args:
      lit (str): The Kanji Literal

    Returns:
      KanjiDTO: Kanji Data Object

    """
    if lit in KanjiRepo._cache:
        return KanjiRepo._cache[lit]

    with get_db() as s:
        row = s.get(orm.Kanjidic, lit)
        if not row:
            return None

        jlpt_row = (
            s.query(orm.JlptKanji)
            .filter(orm.JlptKanji.kanji == lit)
            .first()
        )
        dto = dt.map_kanjidic(
            row, jlpt_tanos_level=jlpt_row.level if jlpt_row else None
        )
        KanjiRepo._cache[lit] = dto
        return dto

sentences

This module defines the SentenceRepo class used for querying data extracted from the Japanese Tatoeba example sentences in the database.

SentenceRepo

Query database for Tatoeba example senteces Tables.

Source code in kotobase/src/kotobase/repos/sentences.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class SentenceRepo:
    """
    Query database for Tatoeba example senteces Tables.
    """

    @staticmethod
    def search_containing(
        text: str,
        /,
        limit: int = 50,
        wildcard: bool = False,
    ) -> List[dt.SentenceDTO]:
        """
        Basic LIKE search.  If `wildcard=True` every non-space char is wrapped
        in '%' to simulate a *contains all chars in order* fuzzy search.

        Args:
          text (str): Text to look for in sentences

          limit (int): Limit how many sentences are returned.

          wildcard (bool): If True every non-space char is wrapped
                           in '%' to simulate a `contains all chars in order`
                           fuzzy search.

        Returns:
          List[SentenceDTO]: List of Sentence data objects.
        """
        if wildcard:
            text = text.replace("*", "%")
            text = '%'.join(text)

        pattern = f"%{text}%"

        with get_db() as s:
            rows = (
                s.query(orm.TatoebaSentence)
                .filter(orm.TatoebaSentence.text.like(pattern, escape="\\"))
                .limit(limit)
                .all()
            )
        return dt.map_many(dt.map_sentence, rows)

search_containing(text, /, limit=50, wildcard=False) staticmethod

Basic LIKE search. If wildcard=True every non-space char is wrapped in '%' to simulate a contains all chars in order fuzzy search.

Parameters:

Name Type Description Default
text str

Text to look for in sentences

required
limit int

Limit how many sentences are returned.

50
wildcard bool

If True every non-space char is wrapped in '%' to simulate a contains all chars in order fuzzy search.

False

Returns:

Type Description
List[SentenceDTO]

List[SentenceDTO]: List of Sentence data objects.

Source code in kotobase/src/kotobase/repos/sentences.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@staticmethod
def search_containing(
    text: str,
    /,
    limit: int = 50,
    wildcard: bool = False,
) -> List[dt.SentenceDTO]:
    """
    Basic LIKE search.  If `wildcard=True` every non-space char is wrapped
    in '%' to simulate a *contains all chars in order* fuzzy search.

    Args:
      text (str): Text to look for in sentences

      limit (int): Limit how many sentences are returned.

      wildcard (bool): If True every non-space char is wrapped
                       in '%' to simulate a `contains all chars in order`
                       fuzzy search.

    Returns:
      List[SentenceDTO]: List of Sentence data objects.
    """
    if wildcard:
        text = text.replace("*", "%")
        text = '%'.join(text)

    pattern = f"%{text}%"

    with get_db() as s:
        rows = (
            s.query(orm.TatoebaSentence)
            .filter(orm.TatoebaSentence.text.like(pattern, escape="\\"))
            .limit(limit)
            .all()
        )
    return dt.map_many(dt.map_sentence, rows)