diff --git a/.gitignore b/.gitignore
index 33a3f6e0..abdd0c48 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,6 +130,7 @@ ipython_config.py
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
+poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
diff --git a/cli.py b/cli.py
index 04387197..5f1be95e 100644
--- a/cli.py
+++ b/cli.py
@@ -10,9 +10,9 @@ def ip_access_list_analyzer(**args):
import ip_access_list_analyzer.ip_acl_analyzer as analyzer
analyzer.main(args)
-def sql_migration_assistant(**args):
+def sql_migration_assistant(**kwargs):
from sql_migration_assistant import hello
- hello()
+ hello(**kwargs)
MAPPING = {
"ip-access-list-analyzer": ip_access_list_analyzer,
diff --git a/sql-migration-assistant/.gitignore b/sql-migration-assistant/.gitignore
new file mode 100644
index 00000000..b1ecc211
--- /dev/null
+++ b/sql-migration-assistant/.gitignore
@@ -0,0 +1,2 @@
+.databrickscfg
+.databricks
diff --git a/sql_migration_assistant/README.md b/sql-migration-assistant/README.md
similarity index 77%
rename from sql_migration_assistant/README.md
rename to sql-migration-assistant/README.md
index 6e3ac747..14db0732 100644
--- a/sql_migration_assistant/README.md
+++ b/sql-migration-assistant/README.md
@@ -15,34 +15,33 @@ tags:
# Project Legion - SQL Migration Assistant
Legion is a Databricks field project to accelerate migrations on to Databricks leveraging the platform’s generative AI
-capabilities. It uses an LLM for code conversion and intent summarisation, presented to users in a front end web
+capabilities. It uses an LLM for code conversion and intent summarisation, presented to users in a front end web
application.
-Legion provides a chatbot interface to users for translating input code (for example T-SQL to Databricks SQL) and
+Legion provides a chatbot interface to users for translating input code (for example T-SQL to Databricks SQL) and
summarising the intent and business purpose of the code. This intent is then embedded for serving in a Vector Search
index for finding similar pieces of code. This presents an opportunity for increased collaboration (find out who is
-working on similar projects), rationalisation (identify duplicates based on intent) and discoverability (semantic search).
+working on similar projects), rationalisation (identify duplicates based on intent) and discoverability (semantic
+search).
-Legion is a solution accelerator - it is *not* a fully baked solution. This is something for you the customer to take
-on and own. This allows you to present a project to upskill your employees, leverage GenAI for a real use case,
+Legion is a solution accelerator - it is *not* a fully baked solution. This is something for you the customer to take
+on and own. This allows you to present a project to upskill your employees, leverage GenAI for a real use case,
customise the application to their needs and entirely own the IP.
## Installation Videos
-
https://github.com/user-attachments/assets/e665bcf4-265f-4a47-81eb-60845a72c798
https://github.com/user-attachments/assets/fa622f96-a78c-40b8-9eb9-f6671c4d7b47
https://github.com/user-attachments/assets/1a58a1b5-2dcf-4624-b93f-214735162584
-
-
Setting Legion up is a simple and automated process. Ensure you have the [Databricks CLI]
(https://docs.databricks.com/en/dev-tools/cli/index.html) installed and configured with the correct workspace.
-Once the Databricks CLI has been installed and configured, run the following command to install the Databricks Labs
+Once the Databricks CLI has been installed and configured, run the following command to install the Databricks Labs
Sandbox and the SQL Migration Assistant.
+
```bash
databricks labs install sandbox && databricks labs sandbox sql-migration-assistant
```
@@ -50,10 +49,10 @@ databricks labs install sandbox && databricks labs sandbox sql-migration-assista
### What Legion needs - during setup above you will create or choose existing resources for the following:
- A no-isolation shared cluster to host the front end application.
-- A catalog and schema in Unity Catalog.
+- A catalog and schema in Unity Catalog.
- A table to store the code intent statements and their embeddings.
-- A vector search endpoint and an embedding model: see docs
-https://docs.databricks.com/en/generative-ai/vector-search.html#how-to-set-up-vector-search
-- A chat LLM. Pay Per Token is recomended where available, but the set up will also allow for creation of
-a provisioned throughput endpoint.
+- A vector search endpoint and an embedding model: see docs
+ https://docs.databricks.com/en/generative-ai/vector-search.html#how-to-set-up-vector-search
+- A chat LLM. Pay Per Token is recomended where available, but the set up will also allow for creation of
+ a provisioned throughput endpoint.
- A PAT stored in a secret scope chosen by you, under the key `sql-migration-pat`.
diff --git a/sql_migration_assistant/docs/Makefile b/sql-migration-assistant/docs/Makefile
similarity index 100%
rename from sql_migration_assistant/docs/Makefile
rename to sql-migration-assistant/docs/Makefile
diff --git a/sql_migration_assistant/docs/_static/css/custom.css b/sql-migration-assistant/docs/_static/css/custom.css
similarity index 100%
rename from sql_migration_assistant/docs/_static/css/custom.css
rename to sql-migration-assistant/docs/_static/css/custom.css
diff --git a/sql_migration_assistant/docs/conf.py b/sql-migration-assistant/docs/conf.py
similarity index 99%
rename from sql_migration_assistant/docs/conf.py
rename to sql-migration-assistant/docs/conf.py
index e338c42a..de230214 100644
--- a/sql_migration_assistant/docs/conf.py
+++ b/sql-migration-assistant/docs/conf.py
@@ -13,7 +13,6 @@
import os
import sys
-
sys.path.insert(0, os.path.abspath("../../python"))
sys.path.append(os.path.abspath("./_theme"))
# -- Project information -----------------------------------------------------
diff --git a/sql_migration_assistant/docs/images/intent_generation.png b/sql-migration-assistant/docs/images/intent_generation.png
similarity index 100%
rename from sql_migration_assistant/docs/images/intent_generation.png
rename to sql-migration-assistant/docs/images/intent_generation.png
diff --git a/sql_migration_assistant/docs/images/legion_logo.png b/sql-migration-assistant/docs/images/legion_logo.png
similarity index 100%
rename from sql_migration_assistant/docs/images/legion_logo.png
rename to sql-migration-assistant/docs/images/legion_logo.png
diff --git a/sql_migration_assistant/docs/images/similar_code.png b/sql-migration-assistant/docs/images/similar_code.png
similarity index 100%
rename from sql_migration_assistant/docs/images/similar_code.png
rename to sql-migration-assistant/docs/images/similar_code.png
diff --git a/sql_migration_assistant/docs/images/translation_prompt.png b/sql-migration-assistant/docs/images/translation_prompt.png
similarity index 100%
rename from sql_migration_assistant/docs/images/translation_prompt.png
rename to sql-migration-assistant/docs/images/translation_prompt.png
diff --git a/sql_migration_assistant/docs/images/translation_screen.png b/sql-migration-assistant/docs/images/translation_screen.png
similarity index 100%
rename from sql_migration_assistant/docs/images/translation_screen.png
rename to sql-migration-assistant/docs/images/translation_screen.png
diff --git a/sql_migration_assistant/docs/index.rst b/sql-migration-assistant/docs/index.rst
similarity index 100%
rename from sql_migration_assistant/docs/index.rst
rename to sql-migration-assistant/docs/index.rst
diff --git a/sql_migration_assistant/docs/reload.py b/sql-migration-assistant/docs/reload.py
similarity index 100%
rename from sql_migration_assistant/docs/reload.py
rename to sql-migration-assistant/docs/reload.py
diff --git a/sql_migration_assistant/docs/requirements.txt b/sql-migration-assistant/docs/requirements.txt
similarity index 100%
rename from sql_migration_assistant/docs/requirements.txt
rename to sql-migration-assistant/docs/requirements.txt
diff --git a/sql_migration_assistant/docs/usage/installation.rst b/sql-migration-assistant/docs/usage/installation.rst
similarity index 100%
rename from sql_migration_assistant/docs/usage/installation.rst
rename to sql-migration-assistant/docs/usage/installation.rst
diff --git a/sql_migration_assistant/docs/usage/usage.rst b/sql-migration-assistant/docs/usage/usage.rst
similarity index 100%
rename from sql_migration_assistant/docs/usage/usage.rst
rename to sql-migration-assistant/docs/usage/usage.rst
diff --git a/sql_migration_assistant/app/__init__.py b/sql-migration-assistant/jobs/__init__.py
similarity index 100%
rename from sql_migration_assistant/app/__init__.py
rename to sql-migration-assistant/jobs/__init__.py
diff --git a/sql_migration_assistant/jobs/bronze_to_silver.py b/sql-migration-assistant/jobs/bronze_to_silver.py
similarity index 97%
rename from sql_migration_assistant/jobs/bronze_to_silver.py
rename to sql-migration-assistant/jobs/bronze_to_silver.py
index d8d97191..3413dc11 100644
--- a/sql_migration_assistant/jobs/bronze_to_silver.py
+++ b/sql-migration-assistant/jobs/bronze_to_silver.py
@@ -1,28 +1,22 @@
# Databricks notebook source
# DBTITLE 1,get params
import json
+
from pyspark.sql.types import (
- ArrayType,
StructType,
StructField,
StringType,
MapType,
- IntegerType,
- TimestampType,
)
-import pyspark.sql.functions as f
-from pyspark.sql.functions import udf, pandas_udf
agent_configs = json.loads(dbutils.widgets.get("agent_configs"))
app_configs = json.loads(dbutils.widgets.get("app_configs"))
-
# COMMAND ----------
checkpoint_dir = app_configs["VOLUME_NAME_CHECKPOINT_PATH"]
volume_path = app_configs["VOLUME_NAME_INPUT_PATH"]
-
# COMMAND ----------
bronze_raw_code = f'{app_configs["CATALOG"]}.{app_configs["SCHEMA"]}.bronze_raw_code'
@@ -70,7 +64,6 @@
"""
)
-
silver_llm_responses = (
f'{app_configs["CATALOG"]}.{app_configs["SCHEMA"]}.silver_llm_responses'
)
@@ -87,7 +80,6 @@
"""
)
-
gold_table = (
f'{app_configs["CATALOG"]}.{app_configs["SCHEMA"]}.gold_transformed_notebooks'
)
@@ -104,7 +96,6 @@
"""
)
-
# COMMAND ----------
# DBTITLE 1,convert agent_configs input string to a dataframe
diff --git a/sql_migration_assistant/jobs/call_agents.py b/sql-migration-assistant/jobs/call_agents.py
similarity index 95%
rename from sql_migration_assistant/jobs/call_agents.py
rename to sql-migration-assistant/jobs/call_agents.py
index 598b16b7..aba81979 100644
--- a/sql_migration_assistant/jobs/call_agents.py
+++ b/sql-migration-assistant/jobs/call_agents.py
@@ -1,19 +1,14 @@
# Databricks notebook source
+import json
+
+import pyspark.sql.functions as f
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import ChatMessage, ChatMessageRole
-import json
-import os
+from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import (
- ArrayType,
- StructType,
- StructField,
StringType,
MapType,
- IntegerType,
- TimestampType,
)
-import pyspark.sql.functions as f
-from pyspark.sql.functions import udf, pandas_udf
# COMMAND ----------
diff --git a/sql_migration_assistant/jobs/silver_to_gold.py b/sql-migration-assistant/jobs/silver_to_gold.py
similarity index 99%
rename from sql_migration_assistant/jobs/silver_to_gold.py
rename to sql-migration-assistant/jobs/silver_to_gold.py
index 7228aa06..b57129b6 100644
--- a/sql_migration_assistant/jobs/silver_to_gold.py
+++ b/sql-migration-assistant/jobs/silver_to_gold.py
@@ -1,10 +1,11 @@
# Databricks notebook source
import base64
+import json
+
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.workspace import ImportFormat, Language
from pyspark.sql import functions as f
from pyspark.sql.types import *
-import json
# COMMAND ----------
@@ -34,6 +35,7 @@
prompt_id = dbutils.jobs.taskValues.get(taskKey="ingest_to_holding", key="promptID")
output_volume_path = app_configs["VOLUME_NAME_OUTPUT_PATH"]
+
# COMMAND ----------
@@ -110,7 +112,6 @@ def write_notebook_code(llm_responses):
gold_df.display()
-
# COMMAND ----------
temp_table_name = "gold_temp"
diff --git a/sql-migration-assistant/requirements.txt b/sql-migration-assistant/requirements.txt
new file mode 100644
index 00000000..49230bde
--- /dev/null
+++ b/sql-migration-assistant/requirements.txt
@@ -0,0 +1,11 @@
+databricks-sdk==0.30.0
+pyyaml
+databricks-labs-blueprint==0.8.2
+databricks-labs-lsql==0.9.0
+gradio==5.5.0
+aiohttp==3.10.5
+fastapi
+pydantic==2.8.2
+dbtunnel==0.14.6
+mlflow
+openai
\ No newline at end of file
diff --git a/sql_migration_assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py
similarity index 68%
rename from sql_migration_assistant/run_app_from_databricks_notebook.py
rename to sql-migration-assistant/run_app_from_databricks_notebook.py
index c3613b33..6cc9ad3f 100644
--- a/sql_migration_assistant/run_app_from_databricks_notebook.py
+++ b/sql-migration-assistant/run_app_from_databricks_notebook.py
@@ -7,17 +7,13 @@
# MAGIC If you want to share the app with users outside of Databricks, for example so non technical SMEs can contribute to LLM prompt development, the notebook needs to run on a no isolation shared cluster.
# COMMAND ----------
-pip install databricks-sdk -U -q
+%pip install .
# COMMAND ----------
-pip install gradio==4.27.0 pyyaml aiohttp==3.10.5 databricks-labs-blueprint==0.8.2 databricks-labs-lsql==0.9.0 -q
+dbutils.library.restartPython()
# COMMAND ----------
-pip install fastapi==0.112.2 pydantic==2.8.2 dbtunnel==0.14.6 openai -q
-# COMMAND ----------
-dbutils.library.restartPython()
+from sql_migration_assistant.utils.runindatabricks import run_app
-# COMMAND ----------
-from utils.runindatabricks import run_app
-run_app()
\ No newline at end of file
+run_app()
diff --git a/sql-migration-assistant/setup.py b/sql-migration-assistant/setup.py
new file mode 100644
index 00000000..4cb68723
--- /dev/null
+++ b/sql-migration-assistant/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup, find_packages
+
+
+# Read the requirements.txt file
+def load_requirements(filename="requirements.txt"):
+ with open(filename, "r") as file:
+ return file.read().splitlines()
+
+
+setup(
+ name="sql_migration_assistant",
+ version="0.1",
+ packages=find_packages(where="src"), # Specify src as the package directory
+ package_dir={"": "src"},
+ include_package_data=True, # Include files specified in MANIFEST.in
+ package_data={
+ "sql_migration_assistant": ["config.yml"], # Include YAML file
+ },
+ classifiers=[
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+ ],
+ install_requires=load_requirements(),
+ python_requires=">=3.10",
+)
diff --git a/sql_migration_assistant/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/__init__.py
similarity index 79%
rename from sql_migration_assistant/__init__.py
rename to sql-migration-assistant/src/sql_migration_assistant/__init__.py
index c0d899ca..02138bad 100644
--- a/sql_migration_assistant/__init__.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/__init__.py
@@ -1,12 +1,18 @@
-from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant
-from databricks.sdk import WorkspaceClient
-from databricks.labs.blueprint.tui import Prompts
-import yaml
from pathlib import Path
+import yaml
+from databricks.labs.blueprint.tui import Prompts
+from databricks.sdk import WorkspaceClient
+
+from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant
+
-def hello():
- w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1")
+def hello(**kwargs):
+ w = WorkspaceClient(
+ product="sql_migration_assistant",
+ product_version="0.0.1",
+ profile=kwargs.get("profile"),
+ )
p = Prompts()
setter_upper = SetUpMigrationAssistant()
setter_upper.check_cloud(w)
diff --git a/sql_migration_assistant/infra/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/app/__init__.py
similarity index 100%
rename from sql_migration_assistant/infra/__init__.py
rename to sql-migration-assistant/src/sql_migration_assistant/app/__init__.py
diff --git a/sql_migration_assistant/app/llm.py b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py
similarity index 91%
rename from sql_migration_assistant/app/llm.py
rename to sql-migration-assistant/src/sql_migration_assistant/app/llm.py
index 71e31caf..6769d89d 100644
--- a/sql_migration_assistant/app/llm.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py
@@ -1,8 +1,5 @@
import gradio as gr
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.serving import ChatMessage, ChatMessageRole
-
class LLMCalls:
def __init__(self, openai_client, foundation_llm_name):
@@ -44,7 +41,7 @@ def call_llm(self, messages, max_tokens, temperature):
def llm_translate(self, system_prompt, input_code, max_tokens, temperature):
messages = [
{"role": "system", "content": system_prompt},
- {"role": "user", "content": input_code}
+ {"role": "user", "content": input_code},
]
# call the LLM end point.
@@ -58,7 +55,7 @@ def llm_translate(self, system_prompt, input_code, max_tokens, temperature):
def llm_intent(self, system_prompt, input_code, max_tokens, temperature):
messages = [
{"role": "system", "content": system_prompt},
- {"role": "user", "content": input_code}
+ {"role": "user", "content": input_code},
]
# call the LLM end point.
diff --git a/sql_migration_assistant/app/prompt_helper.py b/sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py
similarity index 99%
rename from sql_migration_assistant/app/prompt_helper.py
rename to sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py
index ecc16f1d..d2b040e4 100644
--- a/sql_migration_assistant/app/prompt_helper.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py
@@ -1,4 +1,6 @@
import gradio as gr
+
+
class PromptHelper:
def __init__(self, see, catalog, schema, prompt_table):
self.see = see
diff --git a/sql_migration_assistant/app/similar_code.py b/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py
similarity index 100%
rename from sql_migration_assistant/app/similar_code.py
rename to sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py
index 45c33887..4bef3911 100644
--- a/sql_migration_assistant/app/similar_code.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py
@@ -1,5 +1,5 @@
-from databricks.sdk import WorkspaceClient
from databricks.labs.lsql.core import StatementExecutionExt
+from databricks.sdk import WorkspaceClient
class SimilarCode:
diff --git a/sql-migration-assistant/src/sql_migration_assistant/config.py b/sql-migration-assistant/src/sql_migration_assistant/config.py
new file mode 100644
index 00000000..eb44faf0
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/config.py
@@ -0,0 +1,16 @@
+import os
+
+FOUNDATION_MODEL_NAME = os.environ.get("SERVED_FOUNDATION_MODEL_NAME")
+SQL_WAREHOUSE_ID = os.environ.get("DATABRICKS_WAREHOUSE_ID")
+VECTOR_SEARCH_ENDPOINT_NAME = os.environ.get("VECTOR_SEARCH_ENDPOINT_NAME")
+VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME")
+CODE_INTENT_TABLE_NAME = os.environ.get("CODE_INTENT_TABLE_NAME")
+CATALOG = os.environ.get("CATALOG")
+SCHEMA = os.environ.get("SCHEMA")
+VOLUME_NAME = os.environ.get("VOLUME_NAME")
+DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST")
+TRANSFORMATION_JOB_ID = os.environ.get("TRANSFORMATION_JOB_ID")
+WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION")
+VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH")
+PROMPT_HISTORY_TABLE_NAME = os.environ.get("PROMPT_HISTORY_TABLE_NAME")
+DATABRICKS_TOKEN = os.environ.get("DATABRICKS_TOKEN")
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py
new file mode 100644
index 00000000..f8318e3f
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py
@@ -0,0 +1,113 @@
+import gradio as gr
+
+from sql_migration_assistant.frontend.Tabs.BatchInputCodeTab import BatchInputCodeTab
+from sql_migration_assistant.frontend.Tabs.BatchOutputTab import BatchOutputTab
+from sql_migration_assistant.frontend.Tabs.CodeExplanationTab import CodeExplanationTab
+from sql_migration_assistant.frontend.Tabs.InstructionsTab import InstructionsTab
+from sql_migration_assistant.frontend.Tabs.InteractiveInputCodeTab import (
+ InteractiveInputCodeTab,
+)
+from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import (
+ InteractiveOutputTab,
+)
+from sql_migration_assistant.frontend.Tabs.SimilarCodeTab import SimilarCodeTab
+from sql_migration_assistant.frontend.Tabs.TranslationTab import TranslationTab
+from sql_migration_assistant.frontend.callbacks import (
+ read_code_file,
+ produce_preview,
+ exectute_workflow,
+ save_intent_wrapper,
+)
+
+
+class GradioFrontend:
+ intro = """
+
+# Databricks Legion Migration Accelerator
+"""
+
+ def __init__(self):
+ with gr.Blocks(theme=gr.themes.Soft()) as self.app:
+ self.intro_markdown = gr.Markdown(self.intro)
+ self.instructions_tab = InstructionsTab()
+
+ self.interactive_input_code_tab = InteractiveInputCodeTab()
+ self.batch_input_code_tab = BatchInputCodeTab()
+ self.code_explanation_tab = CodeExplanationTab()
+ self.translation_tab = TranslationTab()
+ self.similar_code_tab = SimilarCodeTab()
+ self.batch_output_tab = BatchOutputTab()
+ self.interactive_output_tab = InteractiveOutputTab()
+
+ self.similar_code_tab.submit.click(
+ save_intent_wrapper,
+ inputs=[
+ self.translation_tab.translation_input_code,
+ self.code_explanation_tab.explained,
+ ],
+ )
+ self.batch_output_tab.execute.click(
+ exectute_workflow,
+ inputs=[
+ self.code_explanation_tab.intent_system_prompt,
+ self.code_explanation_tab.intent_temperature,
+ self.code_explanation_tab.intent_max_tokens,
+ self.translation_tab.translation_system_prompt,
+ self.translation_tab.translation_temperature,
+ self.translation_tab.translation_max_tokens,
+ ],
+ outputs=self.batch_output_tab.run_status,
+ )
+ self.interactive_output_tab.produce_preview_button.click(
+ produce_preview,
+ inputs=[
+ self.code_explanation_tab.explained,
+ self.translation_tab.translated,
+ ],
+ outputs=self.interactive_output_tab.preview,
+ )
+ self.add_logic_loading_batch_mode()
+ self.add_logic_loading_interactive_mode()
+ self.change_tabs_based_on_operation_mode()
+
+ def add_logic_loading_batch_mode(self):
+ for output in [
+ self.batch_input_code_tab.selected_file,
+ self.translation_tab.translation_input_code,
+ self.code_explanation_tab.intent_input_code,
+ self.similar_code_tab.similar_code_input,
+ ]:
+ self.batch_input_code_tab.select_code_file.select(
+ fn=read_code_file,
+ inputs=[
+ self.batch_input_code_tab.volume_path,
+ self.batch_input_code_tab.select_code_file,
+ ],
+ outputs=output,
+ )
+
+ def add_logic_loading_interactive_mode(self):
+ for output in [
+ self.translation_tab.translation_input_code,
+ self.code_explanation_tab.intent_input_code,
+ self.similar_code_tab.similar_code_input,
+ ]:
+ self.interactive_input_code_tab.interactive_code_button.click(
+ fn=lambda x: gr.update(value=x),
+ inputs=self.interactive_input_code_tab.interactive_code,
+ outputs=output,
+ )
+
+ def change_tabs_based_on_operation_mode(self):
+ for tab in [self.batch_input_code_tab, self.batch_output_tab]:
+ self.instructions_tab.operation.change(
+ lambda x: (gr.update(visible=(x != "Interactive mode"))),
+ self.instructions_tab.operation,
+ tab.tab,
+ )
+ for tab in [self.interactive_input_code_tab, self.interactive_output_tab]:
+ self.instructions_tab.operation.change(
+ lambda x: (gr.update(visible=(x == "Interactive mode"))),
+ self.instructions_tab.operation,
+ tab.tab,
+ )
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py
new file mode 100644
index 00000000..06739b49
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py
@@ -0,0 +1,37 @@
+import gradio as gr
+
+from sql_migration_assistant.config import (
+ DATABRICKS_HOST,
+ CATALOG,
+ SCHEMA,
+ VOLUME_NAME,
+ VOLUME_NAME_INPUT_PATH,
+)
+from sql_migration_assistant.frontend.callbacks import list_files
+
+
+class BatchInputCodeTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Select code", visible=False) as tab:
+ self.tab = tab
+ self.header = gr.Markdown(
+ f"""## Select a file to test your agents on.
+
+ Legion can batch process a Volume of files to generate Databricks notebooks. The files to translate must be
+ added to the *Input Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME}).
+
+ Here you can select a file to fine tune your agent prompts against.
+ """
+ )
+ self.volume_path = gr.Textbox(value=VOLUME_NAME_INPUT_PATH, visible=False)
+
+ self.load_files = gr.Button("Load Files from Volume")
+ self.select_code_file = gr.Radio(label="Select Code File")
+ self.selected_file = gr.Code(
+ label="Selected Code File", language="sql-msSQL"
+ )
+
+ self.load_files.click(list_files, self.volume_path, self.select_code_file)
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py
new file mode 100644
index 00000000..a8be9ea1
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py
@@ -0,0 +1,34 @@
+import gradio as gr
+
+from sql_migration_assistant.frontend.callbacks import make_status_box_visible
+
+
+class BatchOutputTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Execute Job", visible=False) as tab:
+ self.tab = tab
+ self.header = gr.Markdown(
+ """ ## Execute Job
+
+ This tab is for executing the job to covert the code files in the Unity Catalog Volume to Databricks
+ Notebooks. Once you are happy with your system prompts and and the explanation and translation outputs,
+ click the execute button below.
+
+ This will kick off a Workflow which will ingest the code files, write them to a Delta Table, apply the AI
+ agents, and output a Databricks Notebook per input code file. This notebook will have the intent at the top
+ of the notebook in a markdown cell, and the translated code in the cell below. These notebooks are found in
+ the workspace at *{WORKSPACE_LOCATION}/outputNotebooks* and in the *Output Code* folder in the UC Volume
+
+ The intent will also be stored in a Unity Catalog table and vector search index for finding similar code.
+ """
+ )
+ self.execute = gr.Button(
+ value="EXECUTE CODE TRANSFORMATION",
+ size="lg",
+ )
+ self.run_status = gr.Markdown(label="Job Status Page", visible=False)
+
+ self.execute.click(fn=make_status_box_visible, outputs=self.run_status)
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py
new file mode 100644
index 00000000..cb5ceb31
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py
@@ -0,0 +1,148 @@
+import gradio as gr
+
+from sql_migration_assistant.frontend.callbacks import (
+ llm_intent_wrapper,
+ get_prompt_details,
+ prompt_helper,
+)
+
+
+class CodeExplanationTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Code Explanation") as tab:
+ self.tab = tab
+ self.header = gr.Markdown(
+ """
+ ## An AI tool to generate the intent of your code.
+
+ In this panel you need to iterate on the system prompt to refine the intent the AI generates for your code.
+ This intent will be stored in Unity Catalog, and can be used for finding similar code, for documentation,
+ and to help with writing new code in Databricks to achieve the same goal.
+ """
+ )
+ with gr.Accordion(label="Advanced Intent Settings", open=True):
+ gr.Markdown(
+ """ ### Advanced settings for the generating the intent of the input code.
+
+ The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in
+ more creative responses, while lower values will result in more predictable responses.
+ """
+ )
+
+ with gr.Row():
+ self.intent_temperature = gr.Number(
+ label="Temperature. Float between 0.0 and 1.0", value=0.0
+ )
+ self.intent_max_tokens = gr.Number(
+ label="Max tokens. Check your LLM docs for limit.", value=3500
+ )
+ with gr.Row():
+ self.intent_system_prompt = gr.Textbox(
+ label="System prompt of the LLM to generate the intent.",
+ placeholder="Add your system prompt here, for example:\n"
+ "Explain the intent of this code with an example use case.",
+ lines=3,
+ )
+ # these bits relate to saving and loading of prompts
+ with gr.Row():
+ self.save_intent_prompt = gr.Button("Save intent prompt")
+ self.load_intent_prompt = gr.Button("Load intent prompt")
+ # hidden button and display box for saved prompts, made visible when the load button is clicked
+ self.intent_prompt_id_to_load = gr.Textbox(
+ label="Prompt ID to load",
+ visible=False,
+ placeholder="Enter the ID of the prompt to load from the table below.",
+ )
+ self.loaded_intent_prompts = gr.Dataframe(
+ label="Saved prompts.",
+ visible=False,
+ headers=[
+ "id",
+ "Prompt",
+ "Temperature",
+ "Max Tokens",
+ "Save Datetime",
+ ],
+ interactive=False,
+ wrap=True,
+ )
+ # get the prompts and populate the table and make it visible
+ self.load_intent_prompt.click(
+ fn=lambda: gr.update(
+ visible=True,
+ value=prompt_helper.get_prompts("intent_agent"),
+ ),
+ inputs=None,
+ outputs=[self.loaded_intent_prompts],
+ )
+ # make the input box for the prompt id visible
+ self.load_intent_prompt.click(
+ fn=lambda: gr.update(visible=True),
+ inputs=None,
+ outputs=[self.intent_prompt_id_to_load],
+ )
+
+ self.intent_prompt_id_to_load.change(
+ fn=get_prompt_details,
+ inputs=[
+ self.intent_prompt_id_to_load,
+ self.loaded_intent_prompts,
+ ],
+ outputs=[
+ self.intent_system_prompt,
+ self.intent_temperature,
+ self.intent_max_tokens,
+ ],
+ )
+ # save the prompt
+ self.save_intent_prompt.click(
+ fn=prompt_helper.save_prompt,
+ inputs=[
+ gr.Textbox("intent_agent"),
+ self.intent_system_prompt,
+ self.intent_temperature,
+ self.intent_max_tokens,
+ ],
+ outputs=None,
+ )
+
+ with gr.Accordion(label="Intent Pane", open=True):
+ gr.Markdown(
+ """ ## AI generated intent of what your code aims to do.
+ """
+ )
+ self.explain_button = gr.Button("Explain")
+ with gr.Row():
+ with gr.Column():
+ gr.Markdown(""" ## Input Code.""")
+
+ # input box for SQL code with nice formatting
+ self.intent_input_code = gr.Code(
+ label="Input SQL",
+ language="sql-msSQL",
+ )
+ # a button labelled translate
+
+ with gr.Column():
+ # divider subheader
+ gr.Markdown(""" ## Code intent""")
+ # output box of the T-SQL translated to Spark SQL
+ self.explained = gr.Textbox(
+ label="AI generated intent of your code."
+ )
+
+ # reset hidden chat history and prompt
+ # do translation
+ self.explain_button.click(
+ fn=llm_intent_wrapper,
+ inputs=[
+ self.intent_system_prompt,
+ self.intent_input_code,
+ self.intent_max_tokens,
+ self.intent_temperature,
+ ],
+ outputs=self.explained,
+ )
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py
new file mode 100644
index 00000000..82bbf7cb
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py
@@ -0,0 +1,19 @@
+import gradio as gr
+
+
+class FeedbackTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Instructions") as self.tab:
+ self.header = gr.Markdown(
+ """
+ ## Comments? Feature Suggestions? Bugs?
+
+ Below is the link to the Legion Github repo for you to raise an issue.
+
+ On the right hand side of the Issue page, please assign it to **robertwhiffin**, and select the project **Legion**.
+ Raise the issue on the Github repo for Legion [here](https://github.com/databrickslabs/sandbox/issues/new).
+ """
+ )
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py
new file mode 100644
index 00000000..e1de6c08
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py
@@ -0,0 +1,40 @@
+import gradio as gr
+
+
+class InstructionsTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Instructions") as self.tab:
+ self.header = gr.Markdown(
+ """
+ Legion is an AI powered tool that aims to accelerate the migration of code to Databricks for low cost and effort. It
+ does this by using AI to translate, explain, and make discoverable your code.
+
+ This interface is the Legion Control Panel. Here you are able to configure the AI agents for translation and explanation
+ to fit your needs, incorporating your expertise and knowledge of the codebase by adjusting the AI agents' instructions.
+
+ Legion can work in a batch or interactive fashion.
+
+ *Interactive operation*
+ Fine tune the AI agents on a single file and output the result as a Databricks notebook.
+ Use this UI to adjust the system prompts and instructions for the AI agents to generate the best translation and intent.
+
+ *Batch operation*
+ Process a Volume of files to generate Databricks notebooks. Use this UI to fine tune your agent prompts against selected
+ files before executing a Workflow to transform all files in the Volume, outputting Databricks notebooks with the AI
+ generated intent and translation.
+
+
+ Please select your mode of operation to get started.
+
+ """
+ )
+ self.operation = gr.Radio(
+ label="Select operation mode",
+ choices=["Interactive mode", "Batch mode"],
+ value="Interactive mode",
+ type="value",
+ interactive=True,
+ )
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py
new file mode 100644
index 00000000..d3bdb479
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py
@@ -0,0 +1,20 @@
+import gradio as gr
+
+
+class InteractiveInputCodeTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Input code", visible=True) as tab:
+ self.header = gr.Markdown(
+ f"""## Paste in some code to test your agents on.
+ """
+ )
+ self.interactive_code_button = gr.Button("Ingest code")
+ self.interactive_code = gr.Code(
+ label="Paste your code in here", language="sql-msSQL"
+ )
+ self.interactive_code_button.click(fn=lambda: gr.Info("Code ingested!"))
+
+ self.tab = tab
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py
new file mode 100644
index 00000000..cf23ad73
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py
@@ -0,0 +1,52 @@
+import gradio as gr
+
+from sql_migration_assistant.config import DATABRICKS_HOST, CATALOG, SCHEMA, VOLUME_NAME
+from sql_migration_assistant.frontend.callbacks import write_adhoc_to_workspace
+
+
+class InteractiveOutputTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Write file to Workspace") as tab:
+ self.tab = tab
+ self.header = gr.Markdown(
+ f""" ## Write to Workspace
+
+ Write out your explained and translated file to a notebook in the workspace.
+ You must provide a filename for the notebook. The notebook will be written to the workspace, saved to the
+ Output Code location in the Unity Catalog Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME})
+ , and the intent will be saved to the intent table.
+ """
+ )
+ template = """
+ -- Databricks notebook source
+ -- MAGIC %md
+ -- MAGIC # This notebook was AI generated. AI can make mistakes. This is provided as a tool to accelerate your migration.
+ -- MAGIC
+ -- MAGIC ### AI Generated Intent
+ -- MAGIC
+ -- MAGIC INTENT_GOES_HERE
+
+ -- COMMAND ----------
+
+ TRANSLATED_CODE_GOES_HERE
+ """
+ with gr.Row():
+ self.produce_preview_button = gr.Button("Produce Preview")
+ with gr.Column():
+ self.file_name = gr.Textbox(label="Filename for the notebook")
+ self.write_to_workspace_button = gr.Button("Write to Workspace")
+ self.adhoc_write_output = gr.Markdown(
+ label="Notebook output location"
+ )
+
+ self.preview = gr.Code(label="Preview", language="python")
+
+ # write file to notebook
+ self.write_to_workspace_button.click(
+ fn=write_adhoc_to_workspace,
+ inputs=[self.file_name, self.preview],
+ outputs=self.adhoc_write_output,
+ )
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py
new file mode 100644
index 00000000..796b3419
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py
@@ -0,0 +1,46 @@
+import gradio as gr
+
+from sql_migration_assistant.frontend.callbacks import similar_code_helper
+
+
+class SimilarCodeTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Find Similar Code") as tab:
+ self.tab = tab
+ self.header = gr.Markdown(
+ """
+ # ** Work in Progress **
+ ## An AI tool to find similar code.
+ """
+ )
+ with gr.Accordion(label="Similar Code Pane", open=True):
+ gr.Markdown(
+ """ ## Similar code
+
+ This code is thought to be similar to what you are doing, based on comparing the intent of your code with the intent of this code.
+ """
+ )
+ # a button
+ self.find_similar_code = gr.Button("Find similar code")
+ # a row with an code and text box to show the similar code
+ with gr.Row():
+ self.similar_code_input = gr.Code(
+ label="Input Code.", language="sql-sparkSQL"
+ )
+ self.similar_code_output = gr.Code(
+ label="Similar code to yours.", language="sql-sparkSQL"
+ )
+ self.similar_intent = gr.Textbox(label="The similar codes intent.")
+
+ # a button
+ self.submit = gr.Button("Save code and intent")
+
+ # assign actions to buttons when clicked.
+ self.find_similar_code.click(
+ fn=similar_code_helper.get_similar_code,
+ inputs=self.similar_code_input,
+ outputs=[self.similar_code_output, self.similar_intent],
+ )
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py
new file mode 100644
index 00000000..48552680
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py
@@ -0,0 +1,143 @@
+import gradio as gr
+
+from sql_migration_assistant.frontend.callbacks import (
+ llm_translate_wrapper,
+ prompt_helper,
+ get_prompt_details,
+)
+
+
+class TranslationTab:
+ header: gr.Markdown
+ tab: gr.Tab
+
+ def __init__(self):
+ with gr.Tab(label="Translation") as tab:
+ self.tab = tab
+ self.header = gr.Markdown(
+ """
+ ## An AI tool to translate your code.
+
+ In this panel you need to iterate on the system prompt to refine the translation the AI generates for your code.
+
+ """
+ )
+ with gr.Accordion(label="Translation Advanced Settings", open=True):
+ gr.Markdown(
+ """ ### Advanced settings for the translating the input code.
+
+ The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in
+ more creative responses, while lower values will result in more predictable responses.
+ """
+ )
+ with gr.Row():
+ self.translation_temperature = gr.Number(
+ label="Temperature. Float between 0.0 and 1.0", value=0.0
+ )
+ self.translation_max_tokens = gr.Number(
+ label="Max tokens. Check your LLM docs for limit.", value=3500
+ )
+ with gr.Row():
+ self.translation_system_prompt = gr.Textbox(
+ label="Instructions for the LLM translation tool.",
+ placeholder="Add your system prompt here, for example:\n"
+ "Translate this code to Spark SQL.",
+ lines=3,
+ )
+ with gr.Row():
+ self.save_translation_prompt = gr.Button("Save translation prompt")
+ self.load_translation_prompt = gr.Button("Load translation prompt")
+ # hidden button and display box for saved prompts, made visible when the load button is clicked
+ self.translation_prompt_id_to_load = gr.Textbox(
+ label="Prompt ID to load",
+ visible=False,
+ placeholder="Enter the ID of the prompt to load from the table below.",
+ )
+ self.loaded_translation_prompts = gr.Dataframe(
+ label="Saved prompts.",
+ visible=False,
+ headers=[
+ "id",
+ "Prompt",
+ "Temperature",
+ "Max Tokens",
+ "Save Datetime",
+ ],
+ interactive=False,
+ wrap=True,
+ )
+ # get the prompts and populate the table and make it visible
+ self.load_translation_prompt.click(
+ fn=lambda: gr.update(
+ visible=True,
+ value=prompt_helper.get_prompts("translation_agent"),
+ ),
+ inputs=None,
+ outputs=[self.loaded_translation_prompts],
+ )
+ # make the input box for the prompt id visible
+ self.load_translation_prompt.click(
+ fn=lambda: gr.update(visible=True),
+ inputs=None,
+ outputs=[self.translation_prompt_id_to_load],
+ )
+ # retrive the row from the table and populate the system prompt, temperature, and max tokens
+ self.translation_prompt_id_to_load.change(
+ fn=get_prompt_details,
+ inputs=[
+ self.translation_prompt_id_to_load,
+ self.loaded_translation_prompts,
+ ],
+ outputs=[
+ self.translation_system_prompt,
+ self.translation_temperature,
+ self.translation_max_tokens,
+ ],
+ )
+ self.save_translation_prompt.click(
+ fn=lambda prompt, temp, tokens: prompt_helper.save_prompt(
+ "translation_agent", prompt, temp, tokens
+ ),
+ inputs=[
+ self.translation_system_prompt,
+ self.translation_temperature,
+ self.translation_max_tokens,
+ ],
+ outputs=None,
+ )
+
+ with gr.Accordion(label="Translation Pane", open=True):
+ gr.Markdown(""" ### Source code for translation to Spark-SQL.""")
+ # a button labelled translate
+ self.translate_button = gr.Button("Translate")
+ with gr.Row():
+ with gr.Column():
+ gr.Markdown(""" ## Input code.""")
+
+ # input box for SQL code with nice formatting
+ self.translation_input_code = gr.Code(
+ label="Input SQL",
+ language="sql-msSQL",
+ )
+
+ with gr.Column():
+ # divider subheader
+ gr.Markdown(""" ## Translated Code""")
+ # output box of the T-SQL translated to Spark SQL
+ self.translated = gr.Code(
+ label="Your code translated to Spark SQL",
+ language="sql-sparkSQL",
+ )
+
+ # reset hidden chat history and prompt
+ # do translation
+ self.translate_button.click(
+ fn=llm_translate_wrapper,
+ inputs=[
+ self.translation_system_prompt,
+ self.translation_input_code,
+ self.translation_max_tokens,
+ self.translation_temperature,
+ ],
+ outputs=self.translated,
+ )
diff --git a/sql_migration_assistant/jobs/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py
similarity index 100%
rename from sql_migration_assistant/jobs/__init__.py
rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py
diff --git a/sql_migration_assistant/tests/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/__init__.py
similarity index 100%
rename from sql_migration_assistant/tests/__init__.py
rename to sql-migration-assistant/src/sql_migration_assistant/frontend/__init__.py
diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py
new file mode 100644
index 00000000..040cb5e9
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py
@@ -0,0 +1,207 @@
+import base64
+import datetime
+import json
+import os
+
+import gradio as gr
+from databricks.labs.lsql.core import StatementExecutionExt
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.workspace import ImportFormat, Language
+from openai import OpenAI
+
+from sql_migration_assistant.app.llm import LLMCalls
+from sql_migration_assistant.app.prompt_helper import PromptHelper
+from sql_migration_assistant.app.similar_code import SimilarCode
+from sql_migration_assistant.config import (
+ FOUNDATION_MODEL_NAME,
+ SQL_WAREHOUSE_ID,
+ CATALOG,
+ SCHEMA,
+ CODE_INTENT_TABLE_NAME,
+ VECTOR_SEARCH_ENDPOINT_NAME,
+ VS_INDEX_NAME,
+ DATABRICKS_HOST,
+ TRANSFORMATION_JOB_ID,
+ WORKSPACE_LOCATION,
+ VOLUME_NAME,
+ DATABRICKS_TOKEN,
+ PROMPT_HISTORY_TABLE_NAME,
+)
+
+openai_client = OpenAI(
+ api_key=DATABRICKS_TOKEN, base_url=f"{DATABRICKS_HOST}/serving-endpoints"
+)
+
+w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1")
+see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID)
+translation_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME)
+intent_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME)
+
+prompt_helper = PromptHelper(
+ see=see, catalog=CATALOG, schema=SCHEMA, prompt_table=PROMPT_HISTORY_TABLE_NAME
+)
+similar_code_helper = SimilarCode(
+ workspace_client=w,
+ see=see,
+ catalog=CATALOG,
+ schema=SCHEMA,
+ code_intent_table_name=CODE_INTENT_TABLE_NAME,
+ VS_index_name=VS_INDEX_NAME,
+ VS_endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME,
+)
+
+
+def list_files(path_to_volume):
+ file_infos = w.dbutils.fs.ls(path_to_volume)
+ file_names = [x.name for x in file_infos]
+ return file_names
+
+
+def make_status_box_visible():
+ return gr.Markdown(label="Job Run Status Page", visible=True)
+
+
+def read_code_file(volume_path, file_name):
+ file_name = os.path.join(volume_path, file_name)
+ file = w.files.download(file_name)
+ code = file.contents.read().decode("utf-8")
+ return code
+
+
+def llm_intent_wrapper(system_prompt, input_code, max_tokens, temperature):
+ intent = intent_llm.llm_intent(system_prompt, input_code, max_tokens, temperature)
+ return intent
+
+
+def llm_translate_wrapper(system_prompt, input_code, max_tokens, temperature):
+ translated_code = translation_llm.llm_translate(
+ system_prompt, input_code, max_tokens, temperature
+ )
+ return translated_code
+
+
+def produce_preview(explanation, translated_code):
+ template = """
+ -- Databricks notebook source
+ -- MAGIC %md
+ -- MAGIC # This notebook was AI generated. AI can make mistakes. This is provided as a tool to accelerate your migration.
+ -- MAGIC
+ -- MAGIC ### AI Generated Intent
+ -- MAGIC
+ -- MAGIC INTENT_GOES_HERE
+
+ -- COMMAND ----------
+
+ TRANSLATED_CODE_GOES_HERE
+ """
+ preview_code = template.replace("INTENT_GOES_HERE", explanation).replace(
+ "TRANSLATED_CODE_GOES_HERE", translated_code
+ )
+ return preview_code
+
+
+def write_adhoc_to_workspace(file_name, preview):
+ if len(file_name) == 0:
+ raise gr.Error("Please provide a filename")
+
+ notebook_path_root = f"{WORKSPACE_LOCATION}/outputNotebooks/{str(datetime.datetime.now()).replace(':', '_')}"
+ notebook_path = f"{notebook_path_root}/{file_name}"
+ content = preview
+ w.workspace.mkdirs(notebook_path_root)
+ w.workspace.import_(
+ content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
+ path=notebook_path,
+ format=ImportFormat.SOURCE,
+ language=Language.SQL,
+ overwrite=True,
+ )
+ _ = w.workspace.get_status(notebook_path)
+ id = _.object_id
+ url = f"{w.config.host}/#notebook/{id}"
+ output_message = f"Notebook {file_name} written to Databricks [here]({url})"
+ return output_message
+
+
+def exectute_workflow(
+ intent_prompt,
+ intent_temperature,
+ intent_max_tokens,
+ translation_prompt,
+ translation_temperature,
+ translation_max_tokens,
+):
+ gr.Info("Beginning code transformation workflow")
+ agent_config_payload = [
+ [
+ {
+ "translation_agent": {
+ "system_prompt": translation_prompt,
+ "endpoint": FOUNDATION_MODEL_NAME,
+ "max_tokens": translation_max_tokens,
+ "temperature": translation_temperature,
+ }
+ }
+ ],
+ [
+ {
+ "explanation_agent": {
+ "system_prompt": intent_prompt,
+ "endpoint": FOUNDATION_MODEL_NAME,
+ "max_tokens": intent_max_tokens,
+ "temperature": intent_temperature,
+ }
+ }
+ ],
+ ]
+
+ app_config_payload = {
+ "VOLUME_NAME_OUTPUT_PATH": os.environ.get("VOLUME_NAME_OUTPUT_PATH"),
+ "VOLUME_NAME_INPUT_PATH": os.environ.get("VOLUME_NAME_INPUT_PATH"),
+ "VOLUME_NAME_CHECKPOINT_PATH": os.environ.get("VOLUME_NAME_CHECKPOINT_PATH"),
+ "CATALOG": os.environ.get("CATALOG"),
+ "SCHEMA": os.environ.get("SCHEMA"),
+ "DATABRICKS_HOST": DATABRICKS_HOST,
+ "DATABRICKS_TOKEN_SECRET_SCOPE": os.environ.get(
+ "DATABRICKS_TOKEN_SECRET_SCOPE"
+ ),
+ "DATABRICKS_TOKEN_SECRET_KEY": os.environ.get("DATABRICKS_TOKEN_SECRET_KEY"),
+ "CODE_INTENT_TABLE_NAME": os.environ.get("CODE_INTENT_TABLE_NAME"),
+ "WORKSPACE_LOCATION": WORKSPACE_LOCATION,
+ }
+
+ app_configs = json.dumps(app_config_payload)
+ agent_configs = json.dumps(agent_config_payload)
+
+ response = w.jobs.run_now(
+ job_id=int(TRANSFORMATION_JOB_ID),
+ job_parameters={
+ "agent_configs": agent_configs,
+ "app_configs": app_configs,
+ },
+ )
+ run_id = response.run_id
+
+ job_url = f"{DATABRICKS_HOST}/jobs/{TRANSFORMATION_JOB_ID}"
+ textbox_message = (
+ f"Job run initiated. Click [here]({job_url}) to view the job status. "
+ f"You just executed the run with run_id: {run_id}\n"
+ f"Output notebooks will be written to the Workspace for immediate use at *{WORKSPACE_LOCATION}/outputNotebooks*"
+ f", and also in the *Output Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME})"
+ )
+ return textbox_message
+
+
+def save_intent_wrapper(input_code, explained):
+ gr.Info("Saving intent")
+ similar_code_helper.save_intent(input_code, explained)
+ gr.Info("Intent saved")
+
+
+# retreive the row from the table and populate the system prompt, temperature, and max tokens
+def get_prompt_details(prompt_id, prompts):
+ prompt = prompts[prompts["id"] == prompt_id]
+ return [
+ prompt["Prompt"].values[0],
+ prompt["Temperature"].values[0],
+ prompt["Max Tokens"].values[0],
+ ]
diff --git a/sql_migration_assistant/utils/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/infra/__init__.py
similarity index 100%
rename from sql_migration_assistant/utils/__init__.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/__init__.py
diff --git a/sql_migration_assistant/infra/app_serving_cluster_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py
similarity index 88%
rename from sql_migration_assistant/infra/app_serving_cluster_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py
index 60d18ea5..e0e6386c 100644
--- a/sql_migration_assistant/infra/app_serving_cluster_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py
@@ -1,9 +1,9 @@
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.compute import ListClustersFilterBy, State, DataSecurityMode
-from databricks.labs.blueprint.tui import Prompts
-
import logging
+from databricks.labs.blueprint.tui import Prompts
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.compute import State, DataSecurityMode
+
class AppServingClusterInfra:
@@ -32,9 +32,7 @@ def choose_serving_cluster(self):
cluster_name = self.cluster_name
cluster_id = response.response.cluster_id
else:
- clusters = self.w.clusters.list(
- filter_by=ListClustersFilterBy(cluster_states=[State.RUNNING])
- )
+ clusters = self.w.clusters.list()
# get cluster name and id
clusters = {
@@ -49,6 +47,10 @@ def choose_serving_cluster(self):
question = "Choose a cluster:"
cluster_name = self.prompts.choice(question, clusters.keys())
cluster_id = clusters[cluster_name]
+ cluster = self.w.clusters.get(cluster_id)
+ if cluster.state not in (State.RUNNING, State.PENDING):
+ logging.info("Cluster is not running. Trying to start it")
+ self.w.clusters.start(cluster_id)
self.config["SERVING_CLUSTER_NAME"] = cluster_name
self.config["SERVING_CLUSTER_ID"] = cluster_id
diff --git a/sql_migration_assistant/infra/chat_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/chat_infra.py
similarity index 100%
rename from sql_migration_assistant/infra/chat_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/chat_infra.py
index 0b8f29f1..b30da579 100644
--- a/sql_migration_assistant/infra/chat_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/chat_infra.py
@@ -1,8 +1,8 @@
-from databricks.sdk import WorkspaceClient
+import logging
from databricks.labs.blueprint.tui import Prompts
+from databricks.sdk import WorkspaceClient
-import logging
from sql_migration_assistant.utils.uc_model_version import get_latest_model_version
diff --git a/sql_migration_assistant/infra/jobs_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py
similarity index 92%
rename from sql_migration_assistant/infra/jobs_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py
index 6ec43968..e9d26e94 100644
--- a/sql_migration_assistant/infra/jobs_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py
@@ -1,7 +1,5 @@
from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors.platform import BadRequest
-from databricks.labs.blueprint.tui import Prompts
-from databricks.labs.lsql.core import StatementExecutionExt
+from databricks.sdk.service import compute
from databricks.sdk.service.compute import DataSecurityMode
from databricks.sdk.service.jobs import (
Task,
@@ -11,8 +9,6 @@
JobCluster,
JobParameterDefinition,
)
-from databricks.sdk.service import jobs, compute
-import os
"""
Approach
@@ -58,7 +54,7 @@ def __init__(
]
self.job_name = "sql_migration_code_transformation"
- self.notebook_root_path = f"/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant/jobs/"
+ self.notebook_root_path = f"/Workspace/Users/{self.w.current_user.me().user_name}/.sql-migration-assistant/jobs/"
self.job_parameters = [
JobParameterDefinition("agent_configs", ""),
JobParameterDefinition("app_configs", ""),
diff --git a/sql_migration_assistant/infra/model_def.py b/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py
similarity index 99%
rename from sql_migration_assistant/infra/model_def.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py
index c263fa87..a8c1ff18 100644
--- a/sql_migration_assistant/infra/model_def.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py
@@ -1,18 +1,17 @@
+import os
+from operator import itemgetter
+
+import mlflow
from langchain_community.chat_models import ChatDatabricks
-from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough
+from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (
ChatPromptTemplate,
MessagesPlaceholder,
)
-from langchain_core.messages import HumanMessage, AIMessage
-
-import mlflow
+from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough
from mlflow.tracking import MlflowClient
-import os
-from operator import itemgetter
-
def create_langchain_chat_model():
# ## Enable MLflow Tracing
diff --git a/sql_migration_assistant/infra/secrets_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/secrets_infra.py
similarity index 100%
rename from sql_migration_assistant/infra/secrets_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/secrets_infra.py
index bb49b0af..30f86434 100644
--- a/sql_migration_assistant/infra/secrets_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/secrets_infra.py
@@ -1,7 +1,7 @@
import logging
-from databricks.sdk import WorkspaceClient
from databricks.labs.blueprint.tui import Prompts
+from databricks.sdk import WorkspaceClient
class SecretsInfra:
diff --git a/sql_migration_assistant/infra/sql_warehouse_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py
similarity index 99%
rename from sql_migration_assistant/infra/sql_warehouse_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py
index 0c090f2c..9727c3e6 100644
--- a/sql_migration_assistant/infra/sql_warehouse_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py
@@ -1,9 +1,8 @@
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType
+import logging
from databricks.labs.blueprint.tui import Prompts
-
-import logging
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType
class SqlWarehouseInfra:
diff --git a/sql_migration_assistant/infra/unity_catalog_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py
similarity index 94%
rename from sql_migration_assistant/infra/unity_catalog_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py
index fca49263..26a36a12 100644
--- a/sql_migration_assistant/infra/unity_catalog_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py
@@ -1,12 +1,11 @@
import logging
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors.platform import BadRequest
from databricks.labs.blueprint.tui import Prompts
from databricks.labs.lsql.core import StatementExecutionExt
-from databricks.sdk.service.catalog import VolumeType
+from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import PermissionDenied
-import os
+from databricks.sdk.errors.platform import BadRequest
+from databricks.sdk.service.catalog import VolumeType
"""
Approach
@@ -40,8 +39,8 @@ def __init__(
# user cannot change these values
self.tables = {
- "code_intent" : f"(id BIGINT, code STRING, intent STRING) TBLPROPERTIES (delta.enableChangeDataFeed = true)",
- "prompt_history" : f"(id BIGINT GENERATED ALWAYS AS IDENTITY, agent STRING, prompt STRING, temperature DOUBLE, token_limit INT, save_time TIMESTAMP)",
+ "code_intent": f"(id BIGINT, code STRING, intent STRING) TBLPROPERTIES (delta.enableChangeDataFeed = true)",
+ "prompt_history": f"(id BIGINT GENERATED ALWAYS AS IDENTITY, agent STRING, prompt STRING, temperature DOUBLE, token_limit INT, save_time TIMESTAMP)",
}
self.volume_name = "sql_migration_assistant_volume"
self.volume_dirs = {
diff --git a/sql_migration_assistant/infra/vector_search_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py
similarity index 100%
rename from sql_migration_assistant/infra/vector_search_infra.py
rename to sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py
index 4ef8e7a0..36e98c2b 100644
--- a/sql_migration_assistant/infra/vector_search_infra.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py
@@ -1,4 +1,9 @@
+import logging
+import time
+
+from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors.platform import ResourceAlreadyExists, NotFound
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput
from databricks.sdk.service.vectorsearch import (
EndpointType,
@@ -7,13 +12,8 @@
EmbeddingSourceColumn,
VectorIndexType,
)
-from databricks.sdk.errors.platform import ResourceAlreadyExists, NotFound
-
-from databricks.labs.blueprint.tui import Prompts
-import logging
from sql_migration_assistant.utils.uc_model_version import get_latest_model_version
-import time
class VectorSearchInfra:
diff --git a/sql-migration-assistant/src/sql_migration_assistant/main.py b/sql-migration-assistant/src/sql_migration_assistant/main.py
new file mode 100644
index 00000000..6527e989
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/main.py
@@ -0,0 +1,15 @@
+import os
+
+from sql_migration_assistant.frontend.GradioFrontend import GradioFrontend
+
+
+def main():
+ frontend = GradioFrontend()
+ frontend.app.queue().launch(
+ server_name=os.getenv("GRADIO_SERVER_NAME", "localhost"),
+ server_port=int(os.getenv("GRADIO_SERVER_PORT", 3001)),
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sql_migration_assistant/utils/configloader.py b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py
similarity index 69%
rename from sql_migration_assistant/utils/configloader.py
rename to sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py
index 82f6177e..abee7613 100644
--- a/sql_migration_assistant/utils/configloader.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py
@@ -1,6 +1,16 @@
-from databricks.sdk import WorkspaceClient
-import yaml
import os
+from importlib.resources import files
+
+import yaml
+from databricks.sdk import WorkspaceClient
+
+
+def load_config():
+ # Access the YAML file as a resource
+ config_path = files("sql_migration_assistant").joinpath("config.yml")
+ with config_path.open("r") as f:
+ config = yaml.safe_load(f)
+ return config
class ConfigLoader:
@@ -9,17 +19,16 @@ class ConfigLoader:
environment variables.
"""
- def read_yaml_to_env(self, file_path):
+ def read_yaml_to_env(self):
"""Reads a YAML file and sets environment variables based on its contents.
Args:
file_path (str): The path to the YAML file.
"""
- with open(file_path, "r") as file:
- data = yaml.safe_load(file)
- for key, value in data.items():
- os.environ[key] = str(value)
+ data = load_config()
+ for key, value in data.items():
+ os.environ[key] = str(value)
w = WorkspaceClient()
dbutils = w.dbutils
@@ -36,3 +45,4 @@ def read_yaml_to_env(self, file_path):
if DATABRICKS_HOST[-1] == "/":
DATABRICKS_HOST = DATABRICKS_HOST[:-1]
os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST
+ return data
diff --git a/sql_migration_assistant/utils/initialsetup.py b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py
similarity index 86%
rename from sql_migration_assistant/utils/initialsetup.py
rename to sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py
index 027fcae3..aafb353c 100644
--- a/sql_migration_assistant/utils/initialsetup.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py
@@ -1,20 +1,43 @@
+import logging
+import os
+from pathlib import Path
+from typing import Iterator
+
from databricks.labs.lsql.core import StatementExecutionExt
+from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import ResourceAlreadyExists, BadRequest
from databricks.sdk.errors.platform import PermissionDenied
-from sql_migration_assistant.infra.sql_warehouse_infra import SqlWarehouseInfra
-from sql_migration_assistant.infra.unity_catalog_infra import UnityCatalogInfra
-from sql_migration_assistant.infra.vector_search_infra import VectorSearchInfra
-from sql_migration_assistant.infra.chat_infra import ChatInfra
-from sql_migration_assistant.infra.secrets_infra import SecretsInfra
-from sql_migration_assistant.infra.jobs_infra import JobsInfra
+
from sql_migration_assistant.infra.app_serving_cluster_infra import (
AppServingClusterInfra,
)
-
-import logging
-import os
-from sql_migration_assistant.utils.upload_files_to_workspace import FileUploader
+from sql_migration_assistant.infra.chat_infra import ChatInfra
+from sql_migration_assistant.infra.jobs_infra import JobsInfra
+from sql_migration_assistant.infra.secrets_infra import SecretsInfra
+from sql_migration_assistant.infra.sql_warehouse_infra import SqlWarehouseInfra
+from sql_migration_assistant.infra.unity_catalog_infra import UnityCatalogInfra
+from sql_migration_assistant.infra.vector_search_infra import VectorSearchInfra
from sql_migration_assistant.utils.run_review_app import RunReviewApp
+from sql_migration_assistant.utils.upload_files_to_workspace import FileUploader
+
+
+def list_files_recursive(parent_path: str | Path, sub_path: str) -> Iterator[str]:
+ # Get absolute paths of both directories
+ dir_to_list = Path(parent_path, sub_path).resolve()
+ base_dir = Path(parent_path).resolve()
+ # List all files in dir_to_list and make paths relative to base_dir
+ for path in dir_to_list.rglob("*"): # Match all files and directories
+ # Exclude hidden files/folders, 'venv', and '.egg-info' folders
+ if (
+ any(part.startswith(".") for part in path.parts)
+ or "venv" in path.parts # Hidden files/folders
+ or any( # Exclude 'venv'
+ part.endswith(".egg-info") for part in path.parts
+ ) # Exclude '.egg-info'
+ ):
+ continue
+ if path.is_file(): # Only yield files
+ yield str(path.relative_to(base_dir))
class SetUpMigrationAssistant:
@@ -158,30 +181,19 @@ def setup_migration_assistant(self, w, p):
config = self.update_config(w, config)
return config
- def upload_files(self, w, path):
+ def upload_files(self, w: WorkspaceClient, path):
# all this nastiness becomes unnecessary with lakehouse apps, or if we upload a whl it simplifies things.
# But for now, this is the way.
# TODO - MAKE THIS NICE!!
+ project_path = Path(path).parent.parent
+ files_to_upload = list_files_recursive(project_path, ".")
+
logging.info("Uploading files to workspace")
print("\nUploading files to workspace")
uploader = FileUploader(w)
- files_to_upload = [
- "utils/runindatabricks.py",
- "utils/configloader.py",
- "utils/run_review_app.py",
- "jobs/bronze_to_silver.py",
- "jobs/call_agents.py",
- "jobs/silver_to_gold.py",
- "app/llm.py",
- "app/similar_code.py",
- "app/prompt_helper.py",
- "gradio_app.py",
- "run_app_from_databricks_notebook.py",
- "config.yml",
- ]
def inner(f):
- full_file_path = os.path.join(path, f)
+ full_file_path = os.path.join(project_path, f)
logging.info(
f"Uploading {full_file_path} to {uploader.installer.install_folder()}/{f}"
)
diff --git a/sql_migration_assistant/utils/run_review_app.py b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py
similarity index 89%
rename from sql_migration_assistant/utils/run_review_app.py
rename to sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py
index 433d91b3..27087d5a 100644
--- a/sql_migration_assistant/utils/run_review_app.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py
@@ -1,10 +1,10 @@
import logging
+from urllib.parse import urlparse
+from databricks.labs.blueprint.commands import CommandExecutor
from databricks.sdk import WorkspaceClient
-from databricks.sdk.service import compute
from databricks.sdk.mixins.compute import ClustersExt
-from databricks.labs.blueprint.commands import CommandExecutor
-from urllib.parse import urlparse
+from databricks.sdk.service import compute
class RunReviewApp:
@@ -39,27 +39,17 @@ def _library_install(self):
for l in self.libraries:
self.executor.install_notebook_library(l)
+ self.executor.run("pip install .")
self.executor.run("dbutils.library.restartPython()")
- def _path_updates(self):
- self.executor.run(
- code=f"""
-import sys
-sys.path.insert(0, '/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant/utils')
-sys.path.insert(0, '/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant/app')
-import os
-path = '/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant'
-os.chdir(path)
-"""
- )
-
def _get_org_id(self):
return self.w.get_workspace_id()
def _launch_app(self):
self.executor.run(
- code="""
- from utils.runindatabricks import run_app
+ code=f"""
+ from sql_migration_assistant.utils.runindatabricks import run_app
+
run_app()
"""
)
@@ -121,7 +111,6 @@ def remove_lowest_subdomain_from_host(url):
def launch_review_app(self):
self._library_install()
- self._path_updates()
org_id = self._get_org_id()
proxy_url = self._get_proxy_url(org_id)
logging.info(
diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py
new file mode 100644
index 00000000..c0723118
--- /dev/null
+++ b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py
@@ -0,0 +1,16 @@
+# this is only run from within databricks, hence the import doesn't work in IDE
+from pathlib import Path
+
+from dbtunnel import dbtunnel
+
+from sql_migration_assistant.utils.configloader import ConfigLoader
+
+current_folder = Path(__file__).parent.resolve()
+
+
+def run_app():
+ cl = ConfigLoader()
+ cl.read_yaml_to_env()
+ dbtunnel.kill_port(8080)
+ app = str(Path(current_folder, "..", "main.py").absolute())
+ dbtunnel.gradio(path=app).run()
diff --git a/sql_migration_assistant/utils/uc_model_version.py b/sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py
similarity index 100%
rename from sql_migration_assistant/utils/uc_model_version.py
rename to sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py
diff --git a/sql_migration_assistant/utils/upload_files_to_workspace.py b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py
similarity index 83%
rename from sql_migration_assistant/utils/upload_files_to_workspace.py
rename to sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py
index 94e6c392..d908cb6d 100644
--- a/sql_migration_assistant/utils/upload_files_to_workspace.py
+++ b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py
@@ -3,16 +3,16 @@
This uploads the config, runindatabricks.py, and gradio_app_backup.py files to the Databricks workspace.
"""
+from dataclasses import make_dataclass
+
from databricks.labs.blueprint.installation import Installation
from databricks.sdk import WorkspaceClient
-from dataclasses import make_dataclass
-import os
class FileUploader:
def __init__(self, workspace_client: WorkspaceClient):
self.w = workspace_client
- self.installer = Installation(ws=self.w, product="sql_migration_assistant")
+ self.installer = Installation(ws=self.w, product="sql-migration-assistant")
def upload(
self,
@@ -34,4 +34,6 @@ def save_config(self, config):
config_class = X(**config)
- self.installer.save(config_class, filename="config.yml")
+ self.installer.save(
+ config_class, filename="src/sql_migration_assistant/config.yml"
+ )
diff --git a/sql-migration-assistant/tests/__init__.py b/sql-migration-assistant/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sql_migration_assistant/.gitignore b/sql_migration_assistant/.gitignore
deleted file mode 100644
index 3a1de4ad..00000000
--- a/sql_migration_assistant/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-.databrickscfg
\ No newline at end of file
diff --git a/sql_migration_assistant/gradio_app.py b/sql_migration_assistant/gradio_app.py
deleted file mode 100644
index aeef802a..00000000
--- a/sql_migration_assistant/gradio_app.py
+++ /dev/null
@@ -1,716 +0,0 @@
-import json
-import os
-import datetime
-from databricks.labs.lsql.core import StatementExecutionExt
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.workspace import ImportFormat, Language
-import base64
-import gradio as gr
-
-from openai import OpenAI
-from app.llm import LLMCalls
-from app.similar_code import SimilarCode
-from app.prompt_helper import PromptHelper
-import logging # For printing translation attempts in console (debugging)
-
-# Setting up logger
-logging.basicConfig
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-
-
-# # personal access token necessary for authenticating API requests. Stored using a secret
-
-FOUNDATION_MODEL_NAME = os.environ.get("SERVED_FOUNDATION_MODEL_NAME")
-MAX_TOKENS = os.environ.get("MAX_TOKENS")
-SQL_WAREHOUSE_ID = os.environ.get("DATABRICKS_WAREHOUSE_ID")
-VECTOR_SEARCH_ENDPOINT_NAME = os.environ.get("VECTOR_SEARCH_ENDPOINT_NAME")
-VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME")
-CODE_INTENT_TABLE_NAME = os.environ.get("CODE_INTENT_TABLE_NAME")
-PROMPT_HISTORY_TABLE_NAME = os.environ.get("PROMPT_HISTORY_TABLE_NAME")
-CATALOG = os.environ.get("CATALOG")
-SCHEMA = os.environ.get("SCHEMA")
-VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH")
-VOLUME_NAME = os.environ.get("VOLUME_NAME")
-DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST")
-DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
-TRANSFORMATION_JOB_ID = os.environ.get("TRANSFORMATION_JOB_ID")
-WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION")
-w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1")
-openai_client = OpenAI(
- api_key=DATABRICKS_TOKEN,
- base_url=f"{DATABRICKS_HOST}/serving-endpoints"
-)
-
-see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID)
-translation_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME)
-intent_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME)
-similar_code_helper = SimilarCode(
- workspace_client=w,
- see=see,
- catalog=CATALOG,
- schema=SCHEMA,
- code_intent_table_name=CODE_INTENT_TABLE_NAME,
- VS_index_name=VS_INDEX_NAME,
- VS_endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME,
-)
-prompt_helper = PromptHelper(see=see, catalog=CATALOG, schema=SCHEMA, prompt_table=PROMPT_HISTORY_TABLE_NAME)
-
-
-################################################################################
-################################################################################
-
-# this is the app UI. it uses gradio blocks https://www.gradio.app/docs/gradio/blocks
-# each gr.{} call adds a new element to UI, top to bottom.
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
- # title with Databricks image
- gr.Markdown(
- """
-
-# Databricks Legion Migration Accelerator
-""")
- with gr.Tab("Instructions"):
- gr.Markdown("""
- Legion is an AI powered tool that aims to accelerate the migration of code to Databricks for low cost and effort. It
- does this by using AI to translate, explain, and make discoverable your code.
-
- This interface is the Legion Control Panel. Here you are able to configure the AI agents for translation and explanation
- to fit your needs, incorporating your expertise and knowledge of the codebase by adjusting the AI agents' instructions.
-
- Legion can work in a batch or interactive fashion.
-
- *Interactive operation*
- Fine tune the AI agents on a single file and output the result as a Databricks notebook.
- Use this UI to adjust the system prompts and instructions for the AI agents to generate the best translation and intent.
-
- *Batch operation*
- Process a Volume of files to generate Databricks notebooks. Use this UI to fine tune your agent prompts against selected
- files before executing a Workflow to transform all files in the Volume, outputting Databricks notebooks with the AI
- generated intent and translation.
-
-
- Please select your mode of operation to get started.
-
- """
- )
-
- operation = gr.Radio(
- label="Select operation mode",
- choices=["Interactive mode", "Batch mode"],
- value="Interactive mode",
- type="value",
- interactive=True,
- )
- ################################################################################
- #### STORAGE SETTINGS TAB
- ################################################################################
-
- with gr.Tab(label="Input code", visible=True) as interactive_input_code_tab:
-
- gr.Markdown(
- f"""## Paste in some code to test your agents on.
- """
- )
- interactive_code_button = gr.Button("Ingest code")
- interactive_code = gr.Code(
- label="Paste your code in here", language="sql-msSQL"
- )
- interactive_code_button.click(fn=lambda: gr.Info("Code ingested!"))
-
- with gr.Tab(label="Select code", visible=False) as batch_input_code_tab:
-
- gr.Markdown(
- f"""## Select a file to test your agents on.
-
- Legion can batch process a Volume of files to generate Databricks notebooks. The files to translate must be
- added to the *Input Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME}).
-
- Here you can select a file to fine tune your agent prompts against.
- """
- )
- volume_path = gr.Textbox(value=VOLUME_NAME_INPUT_PATH, visible=False)
-
- load_files = gr.Button("Load Files from Volume")
- select_code_file = gr.Radio(label="Select Code File")
- selected_file = gr.Code(label="Selected Code File", language="sql-msSQL")
-
- def list_files(path_to_volume):
- file_infos = w.dbutils.fs.ls(path_to_volume)
- file_names = [x.name for x in file_infos]
- file_name_radio = gr.Radio(label="Select Code File", choices=file_names)
- return file_name_radio
-
- load_files.click(list_files, volume_path, select_code_file)
-
- def read_code_file(volume_path, file_name):
- file_name = os.path.join(volume_path, file_name)
- file = w.files.download(file_name)
- code = file.contents.read().decode("utf-8")
- return code
-
- ################################################################################
- #### EXPLANATION TAB
- ################################################################################
- with gr.Tab(label="Code Explanation"):
- gr.Markdown(
- """
- ## An AI tool to generate the intent of your code.
-
- In this panel you need to iterate on the system prompt to refine the intent the AI generates for your code.
- This intent will be stored in Unity Catalog, and can be used for finding similar code, for documentation,
- and to help with writing new code in Databricks to achieve the same goal.
- """
- )
- with gr.Accordion(label="Advanced Intent Settings", open=True):
- gr.Markdown(
- """ ### Advanced settings for the generating the intent of the input code.
-
- The *Temperature* parameter controls the randomness of the AI's response. Higher values will result in
- more creative responses, while lower values will result in more predictable responses.
- """
- )
-
- with gr.Row():
- intent_temperature = gr.Number(
- label="Temperature. Float between 0.0 and 1.0", value=0.0
- )
- intent_max_tokens = gr.Number(
- label="Max tokens. Check your LLM docs for limit.", value=3500
- )
- with gr.Row():
- intent_system_prompt = gr.Textbox(
- label="System prompt of the LLM to generate the intent.",
- placeholder="Add your system prompt here, for example:\n"
- "Explain the intent of this code with an example use case.",
- lines=3
- )
- # these bits relate to saving and loading of prompts
- with gr.Row():
- save_intent_prompt = gr.Button("Save intent prompt")
- load_intent_prompt = gr.Button("Load intent prompt")
- # hidden button and display box for saved prompts, made visible when the load button is clicked
- intent_prompt_id_to_load = gr.Textbox(
- label="Prompt ID to load",
- visible=False,
- placeholder="Enter the ID of the prompt to load from the table below."
- )
- loaded_intent_prompts = gr.Dataframe(
- label='Saved prompts.',
- visible=False,
- headers=["id", "Prompt", "Temperature", "Max Tokens", "Save Datetime"],
- interactive=False,
- wrap=True
- )
- # get the prompts and populate the table and make it visible
- load_intent_prompt.click(
- fn=lambda : gr.update(visible=True, value=prompt_helper.get_prompts("intent_agent")),
- inputs=None,
- outputs=[loaded_intent_prompts],
- )
- # make the input box for the prompt id visible
- load_intent_prompt.click(
- fn=lambda : gr.update(visible=True),
- inputs=None,
- outputs=[intent_prompt_id_to_load],
- )
- # retrive the row from the table and populate the system prompt, temperature, and max tokens
- def get_prompt_details(prompt_id, prompts):
- prompt = prompts[prompts["id"] == prompt_id]
- return [prompt["Prompt"].values[0], prompt["Temperature"].values[0], prompt["Max Tokens"].values[0]]
-
- intent_prompt_id_to_load.change(
- fn=get_prompt_details,
- inputs=[intent_prompt_id_to_load, loaded_intent_prompts],
- outputs=[intent_system_prompt, intent_temperature, intent_max_tokens]
- )
- # save the prompt
- save_intent_prompt.click(
- fn=lambda prompt, temp, tokens: prompt_helper.save_prompt("intent_agent", prompt, temp, tokens),
- inputs=[intent_system_prompt, intent_temperature, intent_max_tokens],
- outputs=None
- )
-
-
- with gr.Accordion(label="Intent Pane", open=True):
- gr.Markdown(
- """ ## AI generated intent of what your code aims to do.
- """
- )
- explain_button = gr.Button("Explain")
- with gr.Row():
- with gr.Column():
- gr.Markdown(""" ## Input Code.""")
-
- # input box for SQL code with nice formatting
- intent_input_code = gr.Code(
- label="Input SQL",
- language="sql-msSQL",
- )
-
- with gr.Column():
- # divider subheader
- gr.Markdown(""" ## Code intent""")
- # output box of the T-SQL translated to Spark SQL
- explained = gr.Textbox(label="AI generated intent of your code.")
-
- def llm_intent_wrapper(system_prompt, input_code, max_tokens, temperature):
- # call the LLM to translate the code
- intent = intent_llm.llm_intent(
- system_prompt, input_code, max_tokens, temperature
- )
- return intent
-
- # reset hidden chat history and prompt
- # do translation
- explain_button.click(
- fn=llm_intent_wrapper,
- inputs=[
- intent_system_prompt,
- intent_input_code,
- intent_max_tokens,
- intent_temperature,
- ],
- outputs=explained,
- )
- ################################################################################
- #### TRANSLATION TAB
- ################################################################################
- with gr.Tab(label="Translation"):
- gr.Markdown(
- """
- ## An AI tool to translate your code.
-
- In this panel you need to iterate on the system prompt to refine the translation the AI generates for your code.
-
- """
- )
- with gr.Accordion(label="Translation Advanced Settings", open=True):
- gr.Markdown(
- """ ### Advanced settings for the translating the input code.
-
- The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in
- more creative responses, while lower values will result in more predictable responses.
- """
- )
- with gr.Row():
- translation_temperature = gr.Number(
- label="Temperature. Float between 0.0 and 1.0", value=0.0
- )
- translation_max_tokens = gr.Number(
- label="Max tokens. Check your LLM docs for limit.", value=3500
- )
- with gr.Row():
- translation_system_prompt = gr.Textbox(
- label="Instructions for the LLM translation tool.",
- placeholder="Add your system prompt here, for example:\n"
- "Translate this code to Spark SQL.",
- lines=3
- )
- with gr.Row():
- save_translation_prompt = gr.Button("Save translation prompt")
- load_translation_prompt = gr.Button("Load translation prompt")
- # hidden button and display box for saved prompts, made visible when the load button is clicked
- translation_prompt_id_to_load = gr.Textbox(
- label="Prompt ID to load",
- visible=False,
- placeholder="Enter the ID of the prompt to load from the table below."
- )
- loaded_translation_prompts = gr.Dataframe(
- label='Saved prompts.',
- visible=False,
- headers=["id", "Prompt", "Temperature", "Max Tokens", "Save Datetime"],
- interactive=False,
- wrap=True
- )
- # get the prompts and populate the table and make it visible
- load_translation_prompt.click(
- fn=lambda : gr.update(visible=True, value=prompt_helper.get_prompts("translation_agent")),
- inputs=None,
- outputs=[loaded_translation_prompts],
- )
- # make the input box for the prompt id visible
- load_translation_prompt.click(
- fn=lambda : gr.update(visible=True),
- inputs=None,
- outputs=[translation_prompt_id_to_load],
- )
- # retrive the row from the table and populate the system prompt, temperature, and max tokens
- translation_prompt_id_to_load.change(
- fn=get_prompt_details,
- inputs=[translation_prompt_id_to_load, loaded_translation_prompts],
- outputs=[translation_system_prompt, translation_temperature, translation_max_tokens]
- )
-
- save_translation_prompt.click(
- fn=lambda prompt, temp, tokens: prompt_helper.save_prompt("translation_agent", prompt, temp, tokens),
- inputs=[translation_system_prompt, translation_temperature, translation_max_tokens],
- outputs=None
- )
-
-
- with gr.Accordion(label="Translation Pane", open=True):
- gr.Markdown(""" ### Source code for translation to Spark-SQL.""")
- # a button labelled translate
- translate_button = gr.Button("Translate")
- with gr.Row():
- with gr.Column():
- gr.Markdown(""" ## Input code.""")
-
- # input box for SQL code with nice formatting
- translation_input_code = gr.Code(
- label="Input SQL",
- language="sql-msSQL",
- )
-
- with gr.Column():
- # divider subheader
- gr.Markdown(""" ## Translated Code""")
- # output box of the T-SQL translated to Spark SQL
- translated = gr.Code(
- label="Your code translated to Spark SQL",
- language="sql-sparkSQL",
- )
-
- # helper function to take the output from llm_translate and return outputs for chatbox and textbox
- # chatbox input is a list of lists, each list is a message from the user and the response from the LLM
- # textbox input is a string
- def llm_translate_wrapper(
- system_prompt, input_code, max_tokens, temperature
- ):
- # call the LLM to translate the code
- translated_code = translation_llm.llm_translate(
- system_prompt, input_code, max_tokens, temperature
- )
- return translated_code
-
- # reset hidden chat history and prompt
- # do translation
- translate_button.click(
- fn=llm_translate_wrapper,
- inputs=[
- translation_system_prompt,
- translation_input_code,
- translation_max_tokens,
- translation_temperature,
- ],
- outputs=translated,
- )
-
- ################################################################################
- #### SIMILAR CODE TAB
- ################################################################################
- with gr.Tab(label="Find Similar Code"):
- gr.Markdown(
- """
- # ** Work in Progress **
- ## An AI tool to find similar code.
- """
- )
- with gr.Accordion(label="Similar Code Pane", open=True):
- gr.Markdown(
- """ ## Similar code
-
- This code is thought to be similar to what you are doing, based on comparing the intent of your code with the intent of this code.
- """
- )
- # a button
- find_similar_code = gr.Button("Find similar code")
- # a row with an code and text box to show the similar code
- with gr.Row():
- similar_code_input = gr.Code(
- label="Input Code.", language="sql-sparkSQL"
- )
- similar_code_output = gr.Code(
- label="Similar code to yours.", language="sql-sparkSQL"
- )
- similar_intent = gr.Textbox(label="The similar codes intent.")
-
- # a button
- submit = gr.Button("Save code and intent")
-
- # assign actions to buttons when clicked.
- find_similar_code.click(
- fn=similar_code_helper.get_similar_code,
- inputs=similar_code_input,
- outputs=[similar_code_output, similar_intent],
- )
-
- def save_intent_wrapper(input_code, explained):
- gr.Info("Saving intent")
- similar_code_helper.save_intent(input_code, explained)
- gr.Info("Intent saved")
-
- submit.click(save_intent_wrapper, inputs=[translation_input_code, explained])
-
- ################################################################################
- #### EXECUTE JOB TAB
- ################################################################################
- with gr.Tab(label="Execute Job", visible=False) as batch_output_tab:
- gr.Markdown(
- """ ## Execute Job
-
- This tab is for executing the job to covert the code files in the Unity Catalog Volume to Databricks
- Notebooks. Once you are happy with your system prompts and and the explanation and translation outputs,
- click the execute button below.
-
- This will kick off a Workflow which will ingest the code files, write them to a Delta Table, apply the AI
- agents, and output a Databricks Notebook per input code file. This notebook will have the intent at the top
- of the notebook in a markdown cell, and the translated code in the cell below. These notebooks are found in
- the workspace at *{WORKSPACE_LOCATION}/outputNotebooks* and in the *Output Code* folder in the UC Volume
-
- The intent will also be stored in a Unity Catalog table and vector search index for finding similar code.
- """
- )
- execute = gr.Button(
- value="EXECUTE CODE TRANSFORMATION",
- size="lg",
- )
- run_status = gr.Markdown(label="Job Status Page", visible=False)
-
- def exectute_workflow(
- intent_prompt,
- intent_temperature,
- intent_max_tokens,
- translation_prompt,
- translation_temperature,
- translation_max_tokens,
- ):
- gr.Info("Beginning code transformation workflow")
- agent_config_payload = [
- [
- {
- "translation_agent": {
- "system_prompt": translation_prompt,
- "endpoint": FOUNDATION_MODEL_NAME,
- "max_tokens": translation_max_tokens,
- "temperature": translation_temperature,
- }
- }
- ],
- [
- {
- "explanation_agent": {
- "system_prompt": intent_prompt,
- "endpoint": FOUNDATION_MODEL_NAME,
- "max_tokens": intent_max_tokens,
- "temperature": intent_temperature,
- }
- }
- ],
- ]
-
- app_config_payload = {
- "VOLUME_NAME_OUTPUT_PATH": os.environ.get("VOLUME_NAME_OUTPUT_PATH"),
- "VOLUME_NAME_INPUT_PATH": os.environ.get("VOLUME_NAME_INPUT_PATH"),
- "VOLUME_NAME_CHECKPOINT_PATH": os.environ.get(
- "VOLUME_NAME_CHECKPOINT_PATH"
- ),
- "CATALOG": os.environ.get("CATALOG"),
- "SCHEMA": os.environ.get("SCHEMA"),
- "DATABRICKS_HOST": DATABRICKS_HOST,
- "DATABRICKS_TOKEN_SECRET_SCOPE": os.environ.get(
- "DATABRICKS_TOKEN_SECRET_SCOPE"
- ),
- "DATABRICKS_TOKEN_SECRET_KEY": os.environ.get(
- "DATABRICKS_TOKEN_SECRET_KEY"
- ),
- "CODE_INTENT_TABLE_NAME": os.environ.get("CODE_INTENT_TABLE_NAME"),
- "WORKSPACE_LOCATION": WORKSPACE_LOCATION,
- }
-
- app_configs = json.dumps(app_config_payload)
- agent_configs = json.dumps(agent_config_payload)
-
- response = w.jobs.run_now(
- job_id=int(TRANSFORMATION_JOB_ID),
- job_parameters={
- "agent_configs": agent_configs,
- "app_configs": app_configs,
- },
- )
- run_id = response.run_id
-
- job_url = f"{DATABRICKS_HOST}/jobs/{TRANSFORMATION_JOB_ID}"
- textbox_message = (
- f"Job run initiated. Click [here]({job_url}) to view the job status. "
- f"You just executed the run with run_id: {run_id}\n"
- f"Output notebooks will be written to the Workspace for immediate use at *{WORKSPACE_LOCATION}/outputNotebooks*"
- f", and also in the *Output Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME})"
- )
- return textbox_message
-
- def make_status_box_visible():
- return gr.Markdown(label="Job Run Status Page", visible=True)
-
- execute.click(fn=make_status_box_visible, outputs=run_status)
- execute.click(
- exectute_workflow,
- inputs=[
- intent_system_prompt,
- intent_temperature,
- intent_max_tokens,
- translation_system_prompt,
- translation_temperature,
- translation_max_tokens,
- ],
- outputs=run_status,
- )
-
- with gr.Tab(label="Write file to Workspace") as interactive_output_tab:
- gr.Markdown(
- f""" ## Write to Workspace
-
- Write out your explained and translated file to a notebook in the workspace.
- You must provide a filename for the notebook. The notebook will be written to the workspace, saved to the
- Output Code location in the Unity Catalog Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME})
- , and the intent will be saved to the intent table.
- """
- )
- template = """
--- Databricks notebook source
--- MAGIC %md
--- MAGIC # This notebook was AI generated. AI can make mistakes. This is provided as a tool to accelerate your migration.
--- MAGIC
--- MAGIC ### AI Generated Intent
--- MAGIC
--- MAGIC INTENT_GOES_HERE
-
--- COMMAND ----------
-
-TRANSLATED_CODE_GOES_HERE
- """
- with gr.Row():
- produce_preview_button = gr.Button("Produce Preview")
- with gr.Column():
- file_name = gr.Textbox(label="Filename for the notebook")
- write_to_workspace_button = gr.Button("Write to Workspace")
- adhoc_write_output = gr.Markdown(label="Notebook output location")
-
- def produce_preview(explanation, translated_code):
- preview_code = template.replace("INTENT_GOES_HERE", explanation).replace(
- "TRANSLATED_CODE_GOES_HERE", translated_code
- )
- return preview_code
-
- def write_adhoc_to_workspace(file_name, preview):
-
- if len(file_name) == 0:
- raise gr.Error("Please provide a filename")
-
- notebook_path_root = f"{WORKSPACE_LOCATION}/outputNotebooks/{str(datetime.datetime.now()).replace(':', '_')}"
- notebook_path = f"{notebook_path_root}/{file_name}"
- content = preview
- w.workspace.mkdirs(notebook_path_root)
- w.workspace.import_(
- content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
- path=notebook_path,
- format=ImportFormat.SOURCE,
- language=Language.SQL,
- overwrite=True,
- )
- _ = w.workspace.get_status(notebook_path)
- id = _.object_id
- url = f"{w.config.host}/#notebook/{id}"
- output_message = f"Notebook {file_name} written to Databricks [here]({url})"
- return output_message
-
- preview = gr.Code(label="Preview", language="python")
- produce_preview_button.click(
- produce_preview, inputs=[explained, translated], outputs=preview
- )
-
- # write file to notebook
- write_to_workspace_button.click(
- fn=write_adhoc_to_workspace,
- inputs=[file_name, preview],
- outputs=adhoc_write_output,
- )
-
- with gr.Tab(label="Feedback"):
- gr.Markdown(
- """
- ## Comments? Feature Suggestions? Bugs?
-
- Below is the link to the Legion Github repo for you to raise an issue.
-
- On the right hand side of the Issue page, please assign it to **robertwhiffin**, and select the project **Legion**.
-
- Raise the issue on the Github repo for Legion [here](https://github.com/databrickslabs/sandbox/issues/new).
- """
- )
-
- # this handles the code loading for batch mode
- # read the selected code file and put it into the other panes
- for output in [
- selected_file,
- translation_input_code,
- intent_input_code,
- similar_code_input,
- ]:
- select_code_file.select(
- fn=read_code_file, inputs=[volume_path, select_code_file], outputs=output
- )
-
- # this handles the code loading for interative mode
- for output in [
- translation_input_code,
- intent_input_code,
- similar_code_input,
- ]:
- interactive_code_button.click(
- fn=lambda x: gr.update(value=x), inputs=interactive_code, outputs=output
- )
-
- # change the input tabs based on the operation mode
- operation.change(
- lambda x: (
- gr.update(visible=False)
- if x == "Interactive mode"
- else gr.update(visible=True)
- ),
- operation,
- batch_input_code_tab,
- )
- operation.change(
- lambda x: (
- gr.update(visible=True)
- if x == "Interactive mode"
- else gr.update(visible=False)
- ),
- operation,
- interactive_input_code_tab,
- )
-
- # change the output tabs based on the operation mode
- operation.change(
- lambda x: (
- gr.update(visible=False)
- if x == "Interactive mode"
- else gr.update(visible=True)
- ),
- operation,
- batch_output_tab,
- )
- operation.change(
- lambda x: (
- gr.update(visible=True)
- if x == "Interactive mode"
- else gr.update(visible=False)
- ),
- operation,
- interactive_output_tab,
- )
-
-# for local dev
-try:
- if os.environ["LOCALE"] == "local_dev":
- demo.queue().launch()
-except KeyError:
- pass
-
-# this is necessary to get the app to run on databricks
-if __name__ == "__main__":
- demo.queue().launch(
- server_name=os.getenv("GRADIO_SERVER_NAME"),
- server_port=int(os.getenv("GRADIO_SERVER_PORT")),
- )
diff --git a/sql_migration_assistant/requirements.txt b/sql_migration_assistant/requirements.txt
deleted file mode 100644
index c4dcd90e..00000000
--- a/sql_migration_assistant/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-databricks-sdk==0.30.0
-pyyaml
-databricks-labs-blueprint==0.8.2
-databricks-labs-lsql==0.9.0
\ No newline at end of file
diff --git a/sql_migration_assistant/tests/test_llm.py b/sql_migration_assistant/tests/test_llm.py
deleted file mode 100644
index 1d6dc497..00000000
--- a/sql_migration_assistant/tests/test_llm.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import unittest
-from unittest.mock import patch, MagicMock
-from app.llm import LLMCalls
-
-
-class TestLLMCalls(unittest.TestCase):
-
- @patch("app.llm.OpenAI")
- def setUp(self, MockOpenAI):
- """
- Set up the test environment before each test method.
- Mocks the OpenAI client and initializes the LLMCalls object with mock dependencies.
- """
- # Create a mock client instance
- self.mock_client = MagicMock()
- # Ensure the OpenAI client constructor returns the mock client
- MockOpenAI.return_value = self.mock_client
- # Initialize the LLMCalls instance with dummy parameters
- self.llm = LLMCalls(
- databricks_host="dummy_host",
- databricks_token="dummy_token",
- model_name="dummy_model",
- max_tokens=100,
- )
-
- def test_call_llm(self):
- """
- Test the call_llm method of the LLMCalls class.
- Verifies that the method correctly calls the OpenAI client and returns the expected response.
- """
- # Setup mock response
- mock_response = MagicMock()
- mock_response.choices[0].message.content = "Test response"
- self.mock_client.chat.completions.create.return_value = mock_response
-
- # Test the call_llm method
- messages = [{"role": "user", "content": "Hello"}]
- response = self.llm.call_llm(messages)
-
- # Verify that the OpenAI client was called with the correct parameters
- self.mock_client.chat.completions.create.assert_called_once_with(
- messages=messages, model="dummy_model", max_tokens=100
- )
- # Check that the response matches the expected value
- self.assertEqual(response, "Test response")
-
- def test_convert_chat_to_llm_input(self):
- """
- Test the convert_chat_to_llm_input method to ensure it correctly formats the chat history.
- """
- system_prompt = "You are a helpful assistant."
- chat = [("Hello", "Hi there!"), ("How are you?", "I'm good, thank you!")]
- expected_output = [
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": "Hello"},
- {"role": "assistant", "content": "Hi there!"},
- {"role": "user", "content": "How are you?"},
- {"role": "assistant", "content": "I'm good, thank you!"},
- ]
-
- result = self.llm.convert_chat_to_llm_input(system_prompt, chat)
- # Assert that the formatted messages are as expected
- self.assertEqual(result, expected_output)
-
- # Test the LLM functions for translating code, chatting, and determining intent
- @patch.object(LLMCalls, "call_llm", return_value="Final answer:\nTranslated code")
- def test_llm_translate(self, mock_call_llm):
- system_prompt = "Translate this code"
- input_code = "SELECT * FROM table"
-
- response = self.llm.llm_translate(system_prompt, input_code)
- self.assertEqual(response, "Translated code")
-
- @patch.object(LLMCalls, "call_llm", return_value="Chat response")
- def test_llm_chat(self, mock_call_llm):
- system_prompt = "You are a helpful assistant."
- query = "What is the weather today?"
- chat_history = [("Hello", "Hi there!")]
-
- response = self.llm.llm_chat(system_prompt, query, chat_history)
- self.assertEqual(response, "Chat response")
-
- @patch.object(LLMCalls, "call_llm", return_value="Intent response")
- def test_llm_intent(self, mock_call_llm):
- system_prompt = "Determine the intent of this code"
- input_code = "SELECT * FROM table"
-
- response = self.llm.llm_intent(system_prompt, input_code)
- self.assertEqual(response, "Intent response")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/sql_migration_assistant/tests/test_similar_code.py b/sql_migration_assistant/tests/test_similar_code.py
deleted file mode 100644
index b34c266d..00000000
--- a/sql_migration_assistant/tests/test_similar_code.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import unittest
-from unittest.mock import MagicMock, patch
-from app.similar_code import (
- SimilarCode,
-) # replace 'your_module' with the actual name of your module
-
-
-class TestSimilarCode(unittest.TestCase):
- """
- Unit test class for testing the SimilarCode class.
- """
-
- @patch("app.similar_code.VectorSearchClient")
- def setUp(self, MockVectorSearchClient):
- """
- Sets up the test case by initializing an instance of SimilarCode with mock dependencies.
-
- Mocking the VectorSearchClient to isolate the functionality of SimilarCode from external dependencies.
- """
- self.mock_vsc_instance = MockVectorSearchClient.return_value
- self.similar_code = SimilarCode(
- databricks_token="test_token",
- databricks_host="test_host",
- vector_search_endpoint_name="test_endpoint",
- vs_index_fullname="test_index",
- intent_table="test_table",
- )
-
- def test_save_intent(self):
- """
- Tests the save_intent method of SimilarCode class.
-
- This test ensures that the SQL insert statement is correctly formed and executed with the provided parameters.
- """
- # Mock the database cursor
- mock_cursor = MagicMock()
- code = "sample code"
- intent = "sample intent"
- code_hash = hash(code)
-
- # Call the method to test
- self.similar_code.save_intent(code, intent, mock_cursor)
-
- # Assert that the execute method was called with the correct SQL statement
- mock_cursor.execute.assert_called_once_with(
- f'INSERT INTO test_table VALUES ({code_hash}, "{code}", "{intent}")'
- )
-
- def test_get_similar_code(self):
- """
- Tests the get_similar_code method of SimilarCode class.
-
- This test verifies that the method calls the VectorSearchClient with the correct parameters and
- returns the expected results.
- """
- # Sample chat history and mock result
- chat_history = [(1, "first intent"), (2, "second intent")]
- mock_result = {"result": {"data_array": [["sample code", "sample intent"]]}}
-
- # Mock the similarity_search method's return value
- self.mock_vsc_instance.get_index.return_value.similarity_search.return_value = (
- mock_result
- )
-
- # Call the method to test
- code, intent = self.similar_code.get_similar_code(chat_history)
-
- # Assert that get_index was called with the correct parameters
- self.mock_vsc_instance.get_index.assert_called_once_with(
- "test_endpoint", "test_index"
- )
-
- # Assert that similarity_search was called with the correct parameters
- self.mock_vsc_instance.get_index.return_value.similarity_search.assert_called_once_with(
- query_text="second intent", columns=["code", "intent"], num_results=1
- )
-
- # Assert that the returned values are as expected
- self.assertEqual(code, "sample code")
- self.assertEqual(intent, "sample intent")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/sql_migration_assistant/tests/test_sql_interface.py b/sql_migration_assistant/tests/test_sql_interface.py
deleted file mode 100644
index 94f05388..00000000
--- a/sql_migration_assistant/tests/test_sql_interface.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import unittest
-from unittest.mock import MagicMock, patch
-from app.sql_interface import (
- SQLInterface,
-) # replace 'your_module' with the actual name of your module
-
-
-class TestSQLInterface(unittest.TestCase):
- """
- Unit test class for testing the SQLInterface class.
- """
-
- @patch("app.sql_interface.sql.connect")
- def setUp(self, mock_sql_connect):
- """
- Sets up the test case by initializing an instance of SQLInterface with mock dependencies.
-
- Mocking the sql.connect method to isolate the functionality of SQLInterface from external dependencies.
- """
- # Mock the connection and cursor
- self.mock_connection = MagicMock()
- self.mock_cursor = MagicMock()
- self.mock_connection.cursor.return_value = self.mock_cursor
- mock_sql_connect.return_value = self.mock_connection
-
- # Initialize the SQLInterface instance with mock parameters
- self.sql_interface = SQLInterface(
- databricks_host="test_host",
- databricks_token="test_token",
- sql_warehouse_http_path="test_http_path",
- )
-
- def test_execute_sql(self):
- """
- Tests the execute_sql method of SQLInterface class.
-
- This test ensures that the SQL statement is executed and the fetched results are returned correctly.
- """
- # Mock the execute and fetchall methods
- self.mock_cursor.execute.return_value = None
- self.mock_cursor.fetchall.return_value = [("result1",), ("result2",)]
-
- # SQL statement to test
- sql_statement = "SELECT * FROM test_table"
-
- # Call the method to test
- results = self.sql_interface.execute_sql(self.mock_cursor, sql_statement)
-
- # Assert that the execute method was called with the correct SQL statement
- self.mock_cursor.execute.assert_called_once_with(sql_statement)
-
- # Assert that fetchall method was called and returned the expected results
- self.mock_cursor.fetchall.assert_called_once()
- self.assertEqual(results, [("result1",), ("result2",)])
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/sql_migration_assistant/utils/runindatabricks.py b/sql_migration_assistant/utils/runindatabricks.py
deleted file mode 100644
index d2c9d36b..00000000
--- a/sql_migration_assistant/utils/runindatabricks.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# this is only run from within databricks, hence the import doesn't work in IDE
-from utils.configloader import ConfigLoader
-from utils.run_review_app import RunReviewApp
-from dbtunnel import dbtunnel
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.runtime import *
-import threading
-import yaml
-
-
-def run_app():
- cl = ConfigLoader()
- cl.read_yaml_to_env("config.yml")
- dbtunnel.kill_port(8080)
- app = "gradio_app.py"
- dbtunnel.gradio(path=app).run()
-
-