Skip to content

Commit

Permalink
Fugue notebook extension (#156)
Browse files Browse the repository at this point in the history
* fugue_notebook

* update

* update

* update

* update

* update

* update

* update

* update notebook test

* update
  • Loading branch information
goodwanghan authored Jan 26, 2021
1 parent 3e19abe commit 658a192
Show file tree
Hide file tree
Showing 11 changed files with 655 additions and 2 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/testnotebook.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Test Notebook Experience

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: make devenv
- name: Test
run: make testnotebook
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ package:
python3 setup.py bdist_wheel

jupyter:
pip install .
jupyter nbextension install --py fugue_notebook
jupyter nbextension enable fugue_notebook --py
jupyter notebook --port=8888 --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password=''

test:
Expand All @@ -73,6 +76,12 @@ testdask:
testsql:
python3 -bb -m pytest tests/fugue_sql

testnotebook:
pip install .
jupyter nbextension install --user --py fugue_notebook
jupyter nbextension enable fugue_notebook --py
jupyter nbconvert --execute --clear-output tests/fugue_notebook/test_notebook.ipynb

sql:
java -Xmx500M -jar bin/antlr-4.9-complete.jar -Dlanguage=Python3 -visitor -no-listener fugue_sql/_antlr/fugue_sql.g4
rm fugue_sql/_antlr/*.interp
Expand Down
21 changes: 21 additions & 0 deletions fugue_notebook/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# flake8: noqa
from typing import Any

from fugue_version import __version__

from fugue_notebook.env import NotebookSetup, setup_fugue_notebook


def load_ipython_extension(ip: Any) -> None:
setup_fugue_notebook(ip, None)


def _jupyter_nbextension_paths():
return [
{
"section": "notebook",
"src": "nbextension",
"dest": "fugue_notebook",
"require": "fugue_notebook/main",
}
]
121 changes: 121 additions & 0 deletions fugue_notebook/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import html
import json
from typing import Any, Callable, Dict, List

import fugue_sql
import pandas as pd
from fugue import (
ExecutionEngine,
NativeExecutionEngine,
Yielded,
make_execution_engine,
register_execution_engine,
)
from fugue.extensions._builtins.outputters import Show
from IPython.core.magic import Magics, cell_magic, magics_class
from IPython.display import HTML, display
from triad import ParamDict, Schema
from triad.utils.convert import get_caller_global_local_vars, to_instance


@magics_class
class FugueSQLMagics(Magics):
"""Fugue SQL Magics"""

def __init__(self, shell, pre_conf, post_conf):
# You must call the parent constructor
super().__init__(shell)
self._pre_conf = pre_conf
self._post_conf = post_conf
self._yields: Dict[str, Yielded] = {}

@cell_magic("fsql")
def fsql(self, line: str, cell: str) -> None:
gc, lc = get_caller_global_local_vars(start=-2, end=-6)
gc.update(lc)
gc.update(self._yields)
if "__name__" in gc:
del gc["__name__"]
dag = fugue_sql.fsql(cell, gc)
dag.run(self.get_engine(line, gc))
self._yields.update(dag.yields)

def get_engine(self, line: str, lc: Dict[str, Any]) -> ExecutionEngine:
line = line.strip()
p = line.find("{")
if p >= 0:
engine = line[:p].strip()
conf = json.loads(line[p:])
else:
parts = line.split(" ", 1)
engine = parts[0]
conf = ParamDict(None if len(parts) == 1 else lc[parts[1]])
cf = dict(self._pre_conf)
cf.update(conf)
for k, v in self._post_conf.items():
if k in cf and cf[k] != v:
raise ValueError(
f"{k} must be {v}, but you set to {cf[k]}, you may unset it"
)
cf[k] = v
return make_execution_engine(engine, cf)


def default_pretty_print(
schema: Schema,
head_rows: List[List[Any]],
title: Any,
rows: int,
count: int,
):
components: List[Any] = []
if title is not None:
components.append(HTML(f"<h3>{html.escape(title)}</h3>"))
pdf = pd.DataFrame(head_rows, columns=list(schema.names))
components.append(pdf)
if count >= 0:
components.append(HTML(f"<strong>total count: {count}</strong>"))
components.append(HTML(f"<small>schema: {schema}</small>"))
display(*components)


class NotebookSetup(object):
def get_pre_conf(self) -> Dict[str, Any]:
return {}

def get_post_conf(self) -> Dict[str, Any]:
return {}

def get_pretty_print(self) -> Callable:
return default_pretty_print

def register_execution_engines(self):
register_execution_engine(
"native", lambda conf, **kwargs: NativeExecutionEngine(conf=conf)
)
try:
import pyspark # noqa: F401
from fugue_spark import SparkExecutionEngine

register_execution_engine(
"spark", lambda conf, **kwargs: SparkExecutionEngine(conf=conf)
)
except ImportError:
pass
try:
import dask.dataframe # noqa: F401
from fugue_dask import DaskExecutionEngine

register_execution_engine(
"dask", lambda conf, **kwargs: DaskExecutionEngine(conf=conf)
)
except ImportError:
pass


def setup_fugue_notebook(ipython: Any, setup_obj: Any) -> None:
s = NotebookSetup() if setup_obj is None else to_instance(setup_obj, NotebookSetup)
magics = FugueSQLMagics(ipython, dict(s.get_pre_conf()), dict(s.get_post_conf()))
ipython.register_magics(magics)
s.register_execution_engines()
Show.set_hook(s.get_pretty_print())
4 changes: 4 additions & 0 deletions fugue_notebook/nbextension/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Fugue Notebook Extension

* Add `%%fsql` magic to run Fugue SQL
* Add Fugue SQL highlight in code cells for `%%fsql``
9 changes: 9 additions & 0 deletions fugue_notebook/nbextension/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def _jupyter_nbextension_paths():
return [
{
"section": "notebook",
"src": "nbextension",
"dest": "fugue_notebook",
"require": "fugue_notebook/main",
}
]
7 changes: 7 additions & 0 deletions fugue_notebook/nbextension/description.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Type: Jupyter Notebook Extension
Compatibility: 3.x, 4.x, 5.x, 6.x
Name: Fugue
Main: main.js
Link: README.md
Description: |
Fugue Jupyter extension
57 changes: 57 additions & 0 deletions fugue_notebook/nbextension/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
define([
'require',
'jquery',
'base/js/namespace',
'notebook/js/cell',
'notebook/js/codecell',
'codemirror/lib/codemirror'
], function (
requirejs,
$,
Jupyter,
cell,
codecell,
CodeMirror
) {
"use strict";

function set(str) {
var obj = {}, words = str.split(" ");
for (var i = 0; i < words.length; ++i) obj[words[i]] = true;
return obj;
}

var fugue_keywords = "fill hash rand even presort persist broadcast params process output outtransform rowcount concurrency prepartition zip print title save append parquet csv json single checkpoint weak strong deterministic yield connect sample seed";

function load_extension() {

CodeMirror.defineMIME("text/x-fsql", {
name: "sql",
keywords: set(fugue_keywords + " add after all alter analyze and anti archive array as asc at between bucket buckets by cache cascade case cast change clear cluster clustered codegen collection column columns comment commit compact compactions compute concatenate cost create cross cube current current_date current_timestamp database databases datata dbproperties defined delete delimited deny desc describe dfs directories distinct distribute drop else end escaped except exchange exists explain export extended external false fields fileformat first following for format formatted from full function functions global grant group grouping having if ignore import in index indexes inner inpath inputformat insert intersect interval into is items join keys last lateral lazy left like limit lines list load local location lock locks logical macro map minus msck natural no not null nulls of on optimize option options or order out outer outputformat over overwrite partition partitioned partitions percent preceding principals purge range recordreader recordwriter recover reduce refresh regexp rename repair replace reset restrict revoke right rlike role roles rollback rollup row rows schema schemas select semi separated serde serdeproperties set sets show skewed sort sorted start statistics stored stratify struct table tables tablesample tblproperties temp temporary terminated then to touch transaction transactions transform true truncate unarchive unbounded uncache union unlock unset use using values view when where window with"),
builtin: set("tinyint smallint int bigint boolean float double string binary timestamp decimal array map struct uniontype delimited serde sequencefile textfile rcfile inputformat outputformat"),
atoms: set("false true null unknown"),
operatorChars: /^[*\/+\-%<>!=&|^\/#@?~]/,
dateSQL: set("datetime date time timestamp"),
support: set("ODBCdotTable doubleQuote binaryNumber hexNumber commentSlashSlash commentHash")
});

// Learned from: https://github.com/AmokHuginnsson/huginn/blob/86a5710f3a2495a0ebe38a95710d000349f9b965/src/codemirror.js
CodeMirror.modeInfo.push( {
name: "Fugue SQL",
mime: "text/x-fsql",
mode: "sql"
} );

Jupyter.notebook.config.loaded.then(function() {
require(['notebook/js/codecell'], function(codecell) {
codecell.CodeCell.options_default.highlight_modes['magic_text/x-fsql'] = {'reg':[/%%fsql/]} ;
Jupyter.notebook.events.on('kernel_ready.Kernel', function(){
Jupyter.notebook.get_cells().map(function(cell){
if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;
});
});
}).catch(function on_error (reason) { console.error('fugue_notebook', 'error loading:', reason); });
};

return {load_ipython_extension : load_extension};
});
13 changes: 11 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,19 @@ def get_version() -> str:
"sql": ["antlr4-python3-runtime", "jinja2"],
"spark": ["pyspark"],
"dask": ["qpd[dask]"],
"all": ["antlr4-python3-runtime", "jinja2", "pyspark", "qpd[dask]"],
"notebook": ["notebook", "jupyterlab"],
"all": [
"antlr4-python3-runtime",
"jinja2",
"pyspark",
"qpd[dask]",
"notebook",
"jupyterlab",
],
},
classifiers=[
# "3 - Alpha", "4 - Beta" or "5 - Production/Stable"
"Development Status :: 3 - Alpha",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
"License :: OSI Approved :: Apache Software License",
Expand All @@ -54,4 +62,5 @@ def get_version() -> str:
"Programming Language :: Python :: 3 :: Only",
],
python_requires=">=3.6",
package_data={"fugue_notebook": ["nbextension/*"]},
)
Empty file.
Loading

0 comments on commit 658a192

Please sign in to comment.