{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Examples of pyesgf.search usage" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Prelude:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pyesgf.search import SearchConnection\n", "conn = SearchConnection('https://esgf.ceda.ac.uk/esg-search', \n", " distrib=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Warning**: don't use default search with `facets=*`.\n", "\n", "This behavior is kept for backward-compatibility, but ESGF indexes might not\n", "successfully perform a distributed search when this option is used, so some\n", "results may be missing. For full results, it is recommended to pass a list of\n", "facets of interest when instantiating a context object. For example,\n", "\n", " ctx = conn.new_context(facets='project,experiment_id')\n", "\n", "Only the facets that you specify will be present in the `facets_counts` dictionary.\n", "\n", "This warning is displayed when a distributed search is performed while using the\n", "`facets=*` default, a maximum of once per context object. To suppress this warning,\n", "set the environment variable `ESGF_PYCLIENT_NO_FACETS_STAR_WARNING` to any value\n", "or explicitly use `conn.new_context(facets='*')`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "facets='project,experiment_family'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Find how many datasets containing *humidity* in a given experiment family:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ctx = conn.new_context(project='CMIP5', query='humidity', facets=facets)\n", "ctx.hit_count" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ctx.facet_counts['experiment_family']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Search using a partial ESGF dataset ID (and get first download URL):" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conn = SearchConnection('https://esgf.ceda.ac.uk/esg-search', distrib=False)\n", "ctx = conn.new_context(facets=facets)\n", "dataset_id_pattern = \"cmip5.output1.MOHC.HadGEM2-CC.historical.mon.atmos.Amon.*\"\n", "results = ctx.search(query=\"id:%s\" % dataset_id_pattern)\n", "len(results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "files = results[0].file_context().search()\n", "len(files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "download_url = files[0].download_url\n", "print(download_url)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Find the OpenDAP URL for an aggregated dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conn = SearchConnection('http://esgf-data.dkrz.de/esg-search', distrib=False)\n", "ctx = conn.new_context(project='CMIP5', model='MPI-ESM-LR', experiment='decadal2000', time_frequency='day')\n", "print('Hits: {}, Realms: {}, Ensembles: {}'.format(\n", " ctx.hit_count, \n", " ctx.facet_counts['realm'], \n", " ctx.facet_counts['ensemble']))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ctx = ctx.constrain(realm='atmos', ensemble='r1i1p1')\n", "ctx.hit_count" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = ctx.search()[0]\n", "agg_ctx = result.aggregation_context()\n", "agg = agg_ctx.search()[0]\n", "print(agg.opendap_url)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Find download URLs for all files in a dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conn = SearchConnection('http://esgf-data.dkrz.de/esg-search', distrib=False)\n", "ctx = conn.new_context(project='obs4MIPs')\n", "ctx.hit_count\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = ctx.search()[0]\n", "files = ds.file_context().search()\n", "len(files)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for f in files:\n", " print(f.download_url)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define a search for datasets that includes a temporal range:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conn = SearchConnection('https://esgf.ceda.ac.uk/esg-search', distrib=False)\n", "ctx = conn.new_context(\n", " project=\"CMIP5\", model=\"HadGEM2-ES\",\n", " time_frequency=\"mon\", realm=\"atmos\", ensemble=\"r1i1p1\", latest=True,\n", " from_timestamp=\"2100-12-30T23:23:59Z\", to_timestamp=\"2200-01-01T00:00:00Z\")\n", "ctx.hit_count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Or do the same thing by searching without temporal constraints and then applying the constraint:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ctx = conn.new_context(\n", " project=\"CMIP5\", model=\"HadGEM2-ES\",\n", " time_frequency=\"mon\", realm=\"atmos\", ensemble=\"r1i1p1\", latest=True)\n", "ctx.hit_count" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ctx = ctx.constrain(from_timestamp = \"2100-12-30T23:23:59Z\", to_timestamp = \"2200-01-01T00:00:00Z\")\n", "ctx.hit_count" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "nbTranslate": { "displayLangs": [ "*" ], "hotkey": "alt-t", "langInMainMenu": true, "sourceLang": "en", "targetLang": "fr", "useGoogleTranslate": true } }, "nbformat": 4, "nbformat_minor": 4 }