{ "cells": [ { "metadata": {}, "cell_type": "markdown", "source": [ "# Analyzing Text Content of all Ontologies\n", "\n" ], "id": "91379b6d59f5c8ed" }, { "metadata": { "ExecuteTime": { "end_time": "2024-12-20T03:24:26.298342Z", "start_time": "2024-12-20T03:24:26.266440Z" } }, "cell_type": "code", "source": "", "id": "fbf2be05abef4f73", "outputs": [], "execution_count": 14 }, { "metadata": { "ExecuteTime": { "end_time": "2024-12-19T19:27:33.099743Z", "start_time": "2024-12-19T19:27:32.938042Z" } }, "cell_type": "code", "source": "!mkdir -p output", "id": "6a36717659364a16", "outputs": [], "execution_count": 1 }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": "!python -m oaklib.utilities.metrics.text_content_analysis ~/repos/semantic-sql/db/*.db > output/ontologies-tc.tsv", "id": "2edefc15b805bbcc" }, { "metadata": { "ExecuteTime": { "end_time": "2024-12-19T19:28:39.154652Z", "start_time": "2024-12-19T19:28:38.613182Z" } }, "cell_type": "code", "source": [ "import pandas as pd\n", "df = pd.read_csv('output/ontologies-tc.tsv', sep='\\t')\n", "df" ], "id": "4fd30caaf6c167cd", "outputs": [ { "data": { "text/plain": [ " ontology text_content\n", "0 /Users/cjm/repos/semantic-sql/db/aao.db NaN\n", "1 /Users/cjm/repos/semantic-sql/db/ado.db 0.945262\n", "2 /Users/cjm/repos/semantic-sql/db/adw.db NaN\n", "3 /Users/cjm/repos/semantic-sql/db/aeo.db 0.879694\n", "4 /Users/cjm/repos/semantic-sql/db/aero.db NaN\n", ".. ... ...\n", "145 /Users/cjm/repos/semantic-sql/db/go-lego.db 0.589013\n", "146 /Users/cjm/repos/semantic-sql/db/go-nucleus.db 0.846068\n", "147 /Users/cjm/repos/semantic-sql/db/go-plus.db 0.500000\n", "148 /Users/cjm/repos/semantic-sql/db/go.db 0.604229\n", "149 /Users/cjm/repos/semantic-sql/db/goa_uniprot_a... NaN\n", "\n", "[150 rows x 2 columns]" ], "text/html": [ "
\n", " | ontology | \n", "text_content | \n", "
---|---|---|
0 | \n", "/Users/cjm/repos/semantic-sql/db/aao.db | \n", "NaN | \n", "
1 | \n", "/Users/cjm/repos/semantic-sql/db/ado.db | \n", "0.945262 | \n", "
2 | \n", "/Users/cjm/repos/semantic-sql/db/adw.db | \n", "NaN | \n", "
3 | \n", "/Users/cjm/repos/semantic-sql/db/aeo.db | \n", "0.879694 | \n", "
4 | \n", "/Users/cjm/repos/semantic-sql/db/aero.db | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "
145 | \n", "/Users/cjm/repos/semantic-sql/db/go-lego.db | \n", "0.589013 | \n", "
146 | \n", "/Users/cjm/repos/semantic-sql/db/go-nucleus.db | \n", "0.846068 | \n", "
147 | \n", "/Users/cjm/repos/semantic-sql/db/go-plus.db | \n", "0.500000 | \n", "
148 | \n", "/Users/cjm/repos/semantic-sql/db/go.db | \n", "0.604229 | \n", "
149 | \n", "/Users/cjm/repos/semantic-sql/db/goa_uniprot_a... | \n", "NaN | \n", "
150 rows × 2 columns
\n", "\n", " | ontology | \n", "text_content | \n", "
---|---|---|
0 | \n", "aao | \n", "NaN | \n", "
1 | \n", "ado | \n", "0.945262 | \n", "
2 | \n", "adw | \n", "NaN | \n", "
3 | \n", "aeo | \n", "0.879694 | \n", "
4 | \n", "aero | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "
145 | \n", "go-lego | \n", "0.589013 | \n", "
146 | \n", "go-nucleus | \n", "0.846068 | \n", "
147 | \n", "go-plus | \n", "0.500000 | \n", "
148 | \n", "go | \n", "0.604229 | \n", "
149 | \n", "goa_uniprot_all | \n", "NaN | \n", "
150 rows × 2 columns
\n", "\n", " | ontology | \n", "text_content | \n", "
---|---|---|
1 | \n", "ado | \n", "0.945262 | \n", "
3 | \n", "aeo | \n", "0.879694 | \n", "
5 | \n", "agro | \n", "0.801219 | \n", "
6 | \n", "aio | \n", "0.940620 | \n", "
7 | \n", "aism | \n", "0.684825 | \n", "
... | \n", "... | \n", "... | \n", "
144 | \n", "go-amigo | \n", "0.592075 | \n", "
145 | \n", "go-lego | \n", "0.589013 | \n", "
146 | \n", "go-nucleus | \n", "0.846068 | \n", "
147 | \n", "go-plus | \n", "0.500000 | \n", "
148 | \n", "go | \n", "0.604229 | \n", "
126 rows × 2 columns
\n", "