Skip to content

Kotobase Docs

Database

Database

`database` ¶

This module defines handles the database connection through the get_db context manager.

`get_db()` ¶

Context-managed SQLAlchemy session providing access to the database

Yields:

Name	Type	Description
`Session`	`Session`	`SQLAlchemy` Session object.

Raises:

Type	Description
`EnvironmentError`	If the `kotobase.db` file doesn't exist.

Source code in kotobase/src/kotobase/db/database.py

@contextmanager
def get_db():
    """
    Context-managed `SQLAlchemy` session providing access to the database

    Yields:
      Session (Session): `SQLAlchemy` Session object.

    Raises:
      EnvironmentError: If the `kotobase.db` file doesn't exist.
    """
    if not DATABASE_PATH.exists():
        raise EnvironmentError(
            "Couldn't find Database. Try running CLI build or pull command")
    new = not hasattr(_local, "db")
    if new:
        _local.db = Session(_engine(),
                            expire_on_commit=False,
                            autoflush=False)

    try:
        yield _local.db
    finally:
        if new:
            _local.db.close()
            del _local.db

`build_database` ¶

This module defines the click command which builds the kotobase.db database using SQLAlchemy.

`build(force)` ¶

Downloads source files, processes, and builds the Kotobase database.

Source code in kotobase/src/kotobase/db_builder/build_database.py

@click.command('build')
@click.option('--force',
              is_flag=True,
              help="Force re-build even if the file exists."
              )
def build(force):
    """
    Downloads source files, processes, and builds the Kotobase database.
    """

    if DATABASE_PATH.exists() and not force:
        click.echo("Database file already exists. Use --force to re-build.")
        return
    elif DATABASE_PATH.exists() and force:
        try:
            DATABASE_PATH.unlink()
            click.secho("Deleted Old Database File", fg="green")

        except FileNotFoundError:
            click.secho("Database File Doesn't Exist, Remove '--force' flag.",
                        fg="red",
                        err=True
                        )
            sys.exit(1)
        except PermissionError:
            click.secho("No Permission To Delete Database File",
                        fg="red",
                        err=True
                        )
            sys.exit(1)
        except Exception as e:
            click.secho(
                f"Unexpected Error While Deleting Database File: {e}",
                fg="red",
                err=True
                )
            sys.exit(1)
    session = Session()
    try:
        start = time.perf_counter()
        click.secho("--- Step 1: Downloading raw data files ---", fg="blue")
        download_data()

        click.secho("\n--- Step 2: Processing raw data into JSON ---",
                    fg="blue")
        parse_jmdict()
        parse_jmnedict()
        parse_kanjidic()
        parse_tatoeba()

        click.secho("\n--- Step 3: Building SQLite database ---", fg="blue")
        create_database()
        populate_jmdict(session)
        populate_jmnedict(session)
        populate_kanjidic(session)
        populate_tatoeba(session)
        populate_jlpt(session)
        end = time.perf_counter()
        click.secho("\nDatabase build process complete.",
                    fg="green", bold=True)
        build_time_sec = end - start
        build_date = str(datetime.datetime.now())
        build_file_size_mb = (os.path.getsize(DATABASE_PATH) / 1024) / 1024
        build_log_txt = dedent(
            f"""BUILD_TIME={build_time_sec}
BUILD_DATE={build_date}
SIZE_MB={build_file_size_mb}
""")

        # Log Build Info
        try:
            DB_BUILD_LOG_PATH.unlink(missing_ok=True)
            DB_BUILD_LOG_PATH.touch()
            DB_BUILD_LOG_PATH.write_text(build_log_txt)
        except Exception as e:
            click.secho(f"Couldn't write log: {e}",
                        fg="yellow")
            pass

        click.secho(f"\nBuild Time: {build_time_sec} seconds")
        click.secho(f"\nBuild Date: {build_date}")
        click.secho(f"\nFile Size: {build_file_size_mb} MB")
    finally:
        # Delete Raw Files
        for p in RAW_DATA_DIR.iterdir():
            p.unlink(missing_ok=True)
        # Delete JSON Files
        JMDICT_PATH.unlink(missing_ok=True)
        JMNEDICT_PATH.unlink(missing_ok=True)
        KANJIDIC2_PATH.unlink(missing_ok=True)
        TATOEBA_PATH.unlink(missing_ok=True)
        # Close Session
        session.close()

`create_database()` ¶

Click command helper which creates the database and all tables.

Source code in kotobase/src/kotobase/db_builder/build_database.py

def create_database() -> None:
    """
    Click command helper which creates the database and all tables.
    """
    # Rebuild even when existent
    DATABASE_PATH.unlink(missing_ok=True)
    DATABASE_PATH.touch()
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)
    with engine.begin() as conn:
        conn.exec_driver_sql(
            "CREATE INDEX IF NOT EXISTS idx_kana_text ON jmdict_kana(text)"
            )
        conn.exec_driver_sql(
            "CREATE INDEX IF NOT EXISTS idx_kanji_text ON jmdict_kanji(text)"
            )
    click.echo("Database Created Successfully")

`populate_jlpt(session)` ¶

Click command helper which populates JLPT tables in the database.

Parameters:

Name	Type	Description	Default
`session`	`Session`	SQLAlchemy Session object	required

Source code in kotobase/src/kotobase/db_builder/build_database.py

def populate_jlpt(session: SessionType) -> None:
    """
    Click command helper which populates JLPT tables in the database.

    Args:
      session (Session): SQLAlchemy Session object
    """

    click.echo("Populating JLPT Tables ...")
    jlpt_dir = JLPT_FOLDER
    json_files = list(jlpt_dir.glob("*.json"))
    level_pattern = re.compile(r'n(\d)')

    with click.progressbar(json_files,
                           label="Processing JLPT Files -> "
                           ) as bar:

        for json_file in bar:
            match = level_pattern.search(json_file.stem)
            if not match:
                continue
            level = int(match.group(1))

            with open(json_file, 'r', encoding='utf-8') as f:
                data = json.load(f)

            if "vocab" in json_file.name:
                for item in data:
                    item['level'] = level
                session.bulk_insert_mappings(JlptVocab, data)
            elif "kanji" in json_file.name:
                for item in data:
                    item['level'] = level
                    item['on_yomi'] = item.pop('on')
                    item['kun_yomi'] = item.pop('kun')
                session.bulk_insert_mappings(JlptKanji, data)
            elif "grammar" in json_file.name:
                for item in data:
                    item['level'] = level
                    item['examples'] = "\n".join(item['examples'])
                session.bulk_insert_mappings(JlptGrammar, data)

    session.commit()
    click.echo("\nJLPT Tables Populated")

`populate_jmdict(session)` ¶

Click command helper which populates JMDict tables in the database.

Parameters:

Name	Type	Description	Default
`session`	`Session`	SQLAlchemy Session object	required

Source code in kotobase/src/kotobase/db_builder/build_database.py

def populate_jmdict(session: SessionType) -> None:
    """
    Click command helper which populates JMDict tables in the database.

    Args:
      session (Session): SQLAlchemy Session object
    """
    click.echo("Populating JMDict Tables ...")
    with open(JMDICT_PATH, 'r', encoding='utf-8') as f:
        data = json.load(f)

    entries = []
    kanji_entries = []
    kana_entries = []
    sense_entries = []
    seen_ids: set[int] = set()
    with click.progressbar(data,
                           label="\nPreparing JMDict Data -> "
                           ) as bar:
        for item in bar:

            # Skip Duplicates
            if item['id'] in seen_ids:
                continue
            seen_ids.add(item['id'])
            entries.append({'id': item['id'],
                            'rank': item.get('rank', 99)
                            })

            # Kanji Readings
            for k in item.get('kanji', []):
                kanji_entries.append(
                    {'entry_id': item['id'],
                     'order': k.get('order', 0),
                     'text': k['text']
                     }
                    )

            # Kanji Readings
            for r in item.get('kana', []):
                kana_entries.append(
                    {'entry_id': item['id'],
                     'order': r.get('order', 0),
                     'text': r['text']
                     }
                    )
            # Senses
            for s in item.get('senses', []):
                sense_entries.append(
                    {'entry_id': item['id'],
                     'order': s.get('order'),
                     'pos': ", ".join(s.get('pos', [])),
                     'gloss': ", ".join(s.get('gloss', []))
                     }
                    )

    click.echo("\nInserting JMDict Data -> ")
    session.bulk_insert_mappings(JMDictEntry, entries)
    session.bulk_insert_mappings(JMDictKanji, kanji_entries)
    session.bulk_insert_mappings(JMDictKana, kana_entries)
    session.bulk_insert_mappings(JMDictSense, sense_entries)
    session.commit()
    click.echo("\nJMDict Tables Populated")

`populate_jmnedict(session)` ¶

Click command helper which populates JMNeDict tables in the database.

Parameters:

Name	Type	Description	Default
`session`	`Session`	SQLAlchemy Session object	required

Source code in kotobase/src/kotobase/db_builder/build_database.py

def populate_jmnedict(session: SessionType) -> None:
    """
    Click command helper which populates JMNeDict tables in the database.

    Args:
      session (Session): SQLAlchemy Session object
    """

    click.echo("Populating JMnedict Table ...")
    with open(JMNEDICT_PATH, 'r', encoding='utf-8') as f:
        data = json.load(f)

    entries = []
    with click.progressbar(data,
                           label="\nPreparing JMnedict Data -> "
                           ) as bar:
        for item in bar:
            types = [
                t for sublist in item['translations'] for t in sublist['type']
                ]
            translations = [
                t for sublist in item[
                    'translations'] for t in sublist['translation']
                ]
            entries.append(
                {'id': item['id'],
                 'kanji': ", ".join([k['text'] for k in item['kanji']]),
                 'kana': ", ".join([k['text'] for k in item['kana']]),
                 'translation_type': ", ".join(types),
                 'translation': ", ".join(translations)
                 }
                )

    click.echo("\nInserting JMnedict Entries -> ")
    session.bulk_insert_mappings(JMnedictEntry, entries)
    session.commit()
    click.echo("\nJMnedict Table Populated")

`populate_kanjidic(session)` ¶

Click command helper which populates KANJIDIC table in the database.

Parameters:

Name	Type	Description	Default
`session`	`Session`	SQLAlchemy Session object	required

Source code in kotobase/src/kotobase/db_builder/build_database.py

def populate_kanjidic(session: SessionType) -> None:
    """
    Click command helper which populates KANJIDIC table in the database.

    Args:
      session (Session): SQLAlchemy Session object
    """

    click.echo("Populating Kanjidic Table ...")
    with open(KANJIDIC2_PATH, 'r', encoding='utf-8') as f:
        data = json.load(f)

    entries = []
    with click.progressbar(data,
                           label="Preparing Kanjidic Data ->"
                           ) as bar:
        for item in bar:
            on_readings = [
                r['value'] for r in item['reading_meaning']['readings']
                if r['type'] == 'ja_on'
                ]
            kun_readings = [
                r['value'] for r in item['reading_meaning']['readings']
                if r['type'] == 'ja_kun'
                ]
            meanings = [
                m['value'] for m in item['reading_meaning']['meanings']
                if m['lang'] == 'en'
                ]

            entries.append(
                {'literal': item['literal'],
                 'grade': item.get('grade'),
                 'stroke_count': (
                     item['stroke_count'][0] if item['stroke_count'] else None
                     ),
                 'jlpt': item.get('jlpt'),
                 'on_readings': ", ".join(on_readings),
                 'kun_readings': ", ".join(kun_readings),
                 'meanings': ", ".join(meanings)
                 }
                )

    click.echo("\nInserting Kanjidic Entries -> ")
    session.bulk_insert_mappings(Kanjidic, entries)
    session.commit()
    click.echo("\nKanjidic Table Populated")

`populate_tatoeba(session)` ¶

Click command helper which populates Tatoeba table in the database.

Parameters:

Name	Type	Description	Default
`session`	`Session`	SQLAlchemy Session object	required

Source code in kotobase/src/kotobase/db_builder/build_database.py

def populate_tatoeba(session: SessionType) -> None:
    """
    Click command helper which populates Tatoeba table in the database.

    Args:
      session (Session): SQLAlchemy Session object
    """

    click.echo("Populating Tatoeba Table ...")
    with open(TATOEBA_PATH, 'r', encoding='utf-8') as f:
        data = json.load(f)

    entries = []
    with click.progressbar(data,
                           label="\nPreparing Tatoeba Data -> "
                           ) as bar:
        for item in bar:
            entries.append({'id': item['id'], 'text': item['text']})

    click.echo("\nInserting Tatoeba Entries ->")
    session.bulk_insert_mappings(TatoebaSentence, entries)
    session.commit()
    click.echo("Tatoeba Table Populated.")

`download` ¶

This module defines the helper function which downloads the zipped source files and extracts their content for processing.

`download_and_extract(url, output_filename)` ¶

Click command helper which downloads a file from an URL and extracts it if it is compressed.

Parameters:

Name	Type	Description	Default
`url`	`str`	File download URL	required
`output_filename`	`str`	Path of where to save the file.	required

Source code in kotobase/src/kotobase/db_builder/download.py

def download_and_extract(url: str,
                         output_filename: str
                         ) -> None:
    """
    Click command helper which downloads a file
    from an URL and extracts it if it is compressed.

    Args:
      url (str): File download URL
      output_filename (str): Path of where to save the file.
    """
    try:
        output_path = RAW_DATA_DIR / output_filename
        # Delete if it already exists
        output_path.unlink(missing_ok=True)
        click.secho(f"Downloading '{url}' ...",
                    fg="blue")
        response = requests.get(url, stream=True)
        response.raise_for_status()

        with open(output_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

        click.secho("  Download Successful",
                    fg="green")

        # Uncompress GZ file
        if url.endswith(".gz"):
            uncompressed_path = output_path.with_suffix('')
            # Delete if it already exists
            uncompressed_path.unlink(missing_ok=True)
            click.secho(f"  Extracting to '{uncompressed_path}' ...",
                        fg="blue")
            with gzip.open(output_path, "rb") as f_in:
                with open(uncompressed_path, "wb") as f_out:
                    f_out.write(f_in.read())
            output_path.unlink()
            click.secho("  Done!",
                        fg="green")

        # Uncompress BZ2 file
        elif url.endswith(".bz2"):
            uncompressed_path = output_path.with_suffix('')
            # Delete if it already exists
            uncompressed_path.unlink(missing_ok=True)
            click.secho(f"  Extracting to '{uncompressed_path}' ...",
                        fg="blue")
            with bz2.open(output_path, "rb") as f_in:
                with open(uncompressed_path, "wb") as f_out:
                    f_out.write(f_in.read())
            output_path.unlink()
            click.secho("  Done!",
                        fg="bright_green")
    except Exception as e:
        click.secho(f"Error while downloading '{url}' : {e}",
                    fg="red",
                    err=True
                    )
        sys.exit(1)

`main()` ¶

Main download function to download all data sources from URLs specified in the config module.

Source code in kotobase/src/kotobase/db_builder/download.py

def main() -> None:
    """
    Main download function to download all data sources
    from URLs specified in the `config` module.
    """
    RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
    download_and_extract(JMDICT_URL, "JMdict_e.xml.gz")
    download_and_extract(JMNEDICT_URL, "JMnedict.xml.gz")
    download_and_extract(KANJIDIC2_URL, "kanjidic2.xml.gz")
    download_and_extract(TATOEBA_URL, "jpn_sentences.tsv.bz2")

`process_jmdict` ¶

This module defines the helper function which processes the raw JMDict XML file into a JSON file using XSLT transform for performance.

`parse_jmdict()` ¶

Click helper function which parses JMdict_e.xml and saves it as a JSON file using an embedded XSLT.

Source code in kotobase/src/kotobase/db_builder/process_jmdict.py

def parse_jmdict() -> None:
    """
    Click helper function which parses JMdict_e.xml
    and saves it as a JSON file using an embedded XSLT.
    """

    raw_path = RAW_JMDICT_PATH
    processed_path = JMDICT_PATH
    # Delete if it already exists
    processed_path.unlink(missing_ok=True)

    processed_path.parent.mkdir(parents=True, exist_ok=True)

    click.echo(f"Parsing '{raw_path.name}' with embedded XSLT ...")

    xml_doc = etree.parse(str(raw_path))
    xslt_doc = etree.parse(BytesIO(XSLT_TRANSFORM))
    transform = etree.XSLT(xslt_doc)

    result_tree = transform(xml_doc)

    entries = []
    lines = str(result_tree).splitlines()

    with click.progressbar(lines,
                           label="Assembling JSON -> ",
                           item_show_func=lambda x: "") as bar:
        for i, line in enumerate(bar):
            parts = line.split('|')
            if len(parts) != 5:
                continue

            entry_id, kanji_str, kana_str, pri_str, senses_str = parts
            pri_list = [p for p in pri_str.split('~') if p]
            entry_rank = _rank(pri_list)
            senses = []
            for j, sense_part in enumerate(senses_str.split('^')):
                if not sense_part:
                    continue
                if ';' in sense_part:
                    gloss_str, pos_str = sense_part.split(';', 1)
                else:
                    gloss_str, pos_str = sense_part, ""

                senses.append({
                    "order": j,
                    "gloss": [g for g in gloss_str.split('~') if g],
                    "pos": [p for p in pos_str.split('~') if p]
                })

                entries.append({
                    "id": int(entry_id),
                    "rank": entry_rank,
                    "kanji": [
                        {"text": k, "order": i}
                        for i, k in enumerate(kanji_str.split('~')) if k
                        ],
                    "kana": [{"text": k, "order": i}
                             for i, k in enumerate(kana_str.split('~')) if k
                             ],
                    "senses": senses
                    })

    click.echo(
        f"\nWriting {len(entries)} entries to '{processed_path.name}' ..."
        )
    with open(processed_path, 'w', encoding='utf-8') as f:
        json.dump(entries, f, ensure_ascii=False)

    click.secho("Successfully Processed JMDict.", fg="green")

`process_jmnedict` ¶

This module defines the helper function which processes the raw JMNeDict XML file into a JSON file using XSLT transform for performance.

`parse_jmnedict()` ¶

Click helper function which parses JMNedict_e.xml and saves it as a JSON file using an embedded XSLT.

Source code in kotobase/src/kotobase/db_builder/process_jmnedict.py

def parse_jmnedict():
    """
    Click helper function which parses JMNedict_e.xml
    and saves it as a JSON file using an embedded XSLT.
    """

    raw_path = RAW_JMNEDICT_PATH
    processed_path = JMNEDICT_PATH
    # Delete if it already exists
    processed_path.unlink(missing_ok=True)

    processed_path.parent.mkdir(parents=True, exist_ok=True)

    click.echo(f"Parsing '{raw_path.name}' with embedded XSLT ...")

    # Load XML from file and XSLT from embedded string
    xml_doc = etree.parse(str(raw_path))
    xslt_doc = etree.parse(BytesIO(XSLT_TRANSFORM))
    transform = etree.XSLT(xslt_doc)

    # Apply transformation at C-level for speed
    result_tree = transform(xml_doc)

    # Process the simplified text output
    entries = []
    lines = str(result_tree).splitlines()

    with click.progressbar(lines,
                           label="Assembling JSON -> ",
                           item_show_func=lambda x: "") as bar:
        for i, line in enumerate(bar):
            parts = line.split('|')
            if len(parts) != 4:
                continue

            entry_id, kanji_str, kana_str, trans_str = parts
            translations = []
            for trans_part in trans_str.split('^'):
                if not trans_part:
                    continue
                if ';' in trans_part:
                    type_str, detail_str = trans_part.split(';', 1)
                else:
                    type_str, detail_str = trans_part, ""
                translations.append({
                    "type": [t for t in type_str.split('~') if t],
                    "translation": [d for d in detail_str.split('~') if d]
                })

            entries.append({
                "id": int(entry_id),
                "kanji": [{"text": k} for k in kanji_str.split('~') if k],
                "kana": [{"text": k} for k in kana_str.split('~') if k],
                "translations": translations
            })

    click.echo(
        f"\nWriting {len(entries)} entries to '{processed_path.name}' ...")
    with open(processed_path, 'w', encoding='utf-8') as f:
        json.dump(entries, f, ensure_ascii=False)

    click.secho("Successfully Processed JMnedict.", fg="green")

`process_kanjidic` ¶

This module defines the helper function which processes the raw KANJIDIC2 XML file into a JSON

`parse_kanjidic()` ¶

Click helper function which parses kanjidic.xml and saves it as a JSON file.

Source code in kotobase/src/kotobase/db_builder/process_kanjidic.py

def parse_kanjidic() -> None:
    """
    Click helper function which parses kanjidic.xml
    and saves it as a JSON file.
    """

    raw_path = RAW_KANJIDIC2_PATH
    processed_path = KANJIDIC2_PATH
    # Delete if it already exists
    processed_path.unlink(missing_ok=True)
    processed_path.parent.mkdir(parents=True, exist_ok=True)

    click.echo(f"Parsing '{raw_path.name}' ...")

    characters = []
    # Use iterparse for memory-efficient parsing
    with click.progressbar(etree.iterparse(raw_path, tag='character'),
                           label="Processing characters -> ",
                           item_show_func=lambda x: ""
                           ) as bar:
        for _, element in bar:
            character = {
                "literal": element.findtext('literal'),
                "codepoint": [
                    {"type": cp.get('cp_type'), "value": cp.text}
                    for cp in element.findall('codepoint/cp_value')
                ],
                "radical": [
                    {"type": rad.get('rad_type'), "value": rad.text}
                    for rad in element.findall('radical/rad_value')
                ],
                "grade": element.findtext('misc/grade'),
                "stroke_count": [
                    sc.text for sc in element.findall('misc/stroke_count')],
                "variants": [
                    {"type": var.get('var_type'), "value": var.text}
                    for var in element.findall('misc/variant')
                ],
                "freq": element.findtext('misc/freq'),
                "jlpt": element.findtext('misc/jlpt'),
                "dic_number": [
                    {"type": dr.get('dr_type'),
                     "m_vol": dr.get('m_vol'),
                     "m_page": dr.get('m_page'),
                     "value": dr.text}
                    for dr in element.findall('dic_number/dic_ref')
                ],
                "query_code": [
                    {"type": qc.get('qc_type'),
                     "skip_misclass": qc.get('skip_misclass'),
                     "value": qc.text}
                    for qc in element.findall('query_code/q_code')
                ],
                "reading_meaning": {
                    "readings": [
                        {"type": r.get('r_type'),
                         "on_type": r.get('on_type'),
                         "r_status": r.get('r_status'),
                         "value": r.text}
                        for r in element.findall(
                            'reading_meaning/rmgroup/reading')
                    ],
                    "meanings": [
                        {"lang": m.get('m_lang', 'en'),
                         "value": m.text}
                        for m in element.findall(
                            'reading_meaning/rmgroup/meaning')
                    ]
                }
            }

            characters.append(character)
            # Free up memory
            element.clear()
            while element.getprevious() is not None:
                del element.getparent()[0]

    click.echo(f"\nWriting {len(characters)} characters\
        to '{processed_path.name}' ...")
    with open(processed_path, 'w', encoding='utf-8') as f:
        json.dump(characters, f, ensure_ascii=False)

    click.secho("Successfully Processed Kanjidic2.", fg="green")

`process_tatoeba` ¶

This module defines the helper function which processes the raw Tatoeba Examples tsv file into a JSON

`parse_tatoeba()` ¶

Click helper function which parses tatoeba.tsv and saves it as a JSON file.

Source code in kotobase/src/kotobase/db_builder/process_tatoeba.py

def parse_tatoeba() -> None:
    """
    Click helper function which parses tatoeba.tsv
    and saves it as a JSON file.
    """
    raw_path = RAW_TATOEBA_PATH
    processed_path = TATOEBA_PATH
    # Delete if it already exists
    processed_path.unlink(missing_ok=True)
    processed_path.parent.mkdir(parents=True, exist_ok=True)

    click.echo(f"Parsing {raw_path.name}...")

    sentences = []
    with open(raw_path, 'r', encoding='utf-8') as f:
        # Get total number of lines for progress bar
        total_lines = sum(1 for line in f)
        f.seek(0)

        reader = csv.reader(f, delimiter='	', quoting=csv.QUOTE_NONE)
        with click.progressbar(reader, length=total_lines,
                               label="Processing sentences -> ") as bar:
            for row in bar:
                if len(row) == 3:
                    sentences.append({
                        "id": int(row[0]),
                        "lang": row[1],
                        "text": row[2]
                    })

    click.echo(f"\nWriting {len(sentences)} \
        sentences to '{processed_path.name}' ...")
    with open(processed_path, 'w', encoding='utf-8') as f:
        json.dump(sentences, f, ensure_ascii=False)

    click.secho("Successfully Processed Tatoeba Sentences.", fg="green")

`pull` ¶

This module defines the click command which pulls a pre-built database from a public Google Drive folder.

`pull_db(force)` ¶

Downloads the latest Kotobase database from Google Drive.

Source code in kotobase/src/kotobase/db_builder/pull.py

@click.command('pull-db')
@click.option('--force',
              is_flag=True,
              help="Force re-download even if the file exists."
              )
def pull_db(force):
    """
    Downloads the latest Kotobase database from Google Drive.
    """

    if DATABASE_PATH.exists() and not force:
        click.echo("Database file already exists. Use --force to re-download.")
        return
    elif DATABASE_PATH.exists() and force:
        try:
            DATABASE_PATH.unlink()
            DB_BUILD_LOG_PATH.unlink(missing_ok=True)
            click.secho("Deleted Old Database File", fg="green")

        except FileNotFoundError:
            click.secho("Database File Doesn't Exist, Remove '--force' flag.",
                        fg="red",
                        err=True
                        )
            sys.exit(1)
        except PermissionError:
            click.secho("No Permission To Delete Database File",
                        fg="red",
                        err=True
                        )
            sys.exit(1)
        except Exception as e:
            click.secho(
                f"Unexpected Error While Deleting Database File: {e}",
                fg="red",
                err=True
                )
            sys.exit(1)

    click.secho("Pulling Latest From Drive...",
                fg="blue")

    try:
        # Use the file ID directly
        gdown.download(id=DRIVE_FILE_ID,
                       output=str(DATABASE_PATH),
                       quiet=False)
        click.secho("Pulling Build Log...")
        # Also pull build log
        gdown.download(id=DRIVE_LOG_FILE_ID,
                       output=str(DB_BUILD_LOG_PATH),
                       quiet=False)
        click.secho("Database downloaded successfully.", fg="green")
    except Exception as e:
        click.secho(f"An error occurred: {e}", fg="red")
        click.echo("Please try building the \
            database manually with 'kotobase build'.")