From bc1dcc7efc0886cddd3020259a4e2baa5d6c245a Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 5 Nov 2024 22:40:53 +0100 Subject: [PATCH 01/19] Refactors Gradio Frontend into multiple Classes --- .gitignore | 1 + sql_migration_assistant/config.py | 15 + .../frontend/GradioFrontend.py | 125 ++++ .../frontend/Tabs/BatchInputCodeTab.py | 29 + .../frontend/Tabs/BatchOutputTab.py | 34 + .../frontend/Tabs/CodeExplanationTab.py | 79 +++ .../frontend/Tabs/InteractiveInputCodeTab.py | 20 + .../frontend/Tabs/InteractiveOutputTab.py | 50 ++ .../frontend/Tabs/SimilarCodeTab.py | 46 ++ sql_migration_assistant/frontend/Tabs/Tab.py | 20 + .../frontend/Tabs/TranslationTab.py | 104 +++ .../frontend/Tabs/__init__.py | 0 sql_migration_assistant/frontend/__init__.py | 0 sql_migration_assistant/frontend/callbacks.py | 172 +++++ sql_migration_assistant/gradio_app.py | 628 ------------------ sql_migration_assistant/gradio_app_backup.py | 298 --------- sql_migration_assistant/main.py | 13 + 17 files changed, 708 insertions(+), 926 deletions(-) create mode 100644 sql_migration_assistant/config.py create mode 100644 sql_migration_assistant/frontend/GradioFrontend.py create mode 100644 sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/BatchOutputTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/Tab.py create mode 100644 sql_migration_assistant/frontend/Tabs/TranslationTab.py create mode 100644 sql_migration_assistant/frontend/Tabs/__init__.py create mode 100644 sql_migration_assistant/frontend/__init__.py create mode 100644 sql_migration_assistant/frontend/callbacks.py delete mode 100644 sql_migration_assistant/gradio_app.py delete mode 100644 sql_migration_assistant/gradio_app_backup.py create mode 100644 sql_migration_assistant/main.py diff --git a/.gitignore b/.gitignore index 33a3f6e0..abdd0c48 100644 --- a/.gitignore +++ b/.gitignore @@ -130,6 +130,7 @@ ipython_config.py # in version control. # https://pdm.fming.dev/#use-with-ide .pdm.toml +poetry.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ diff --git a/sql_migration_assistant/config.py b/sql_migration_assistant/config.py new file mode 100644 index 00000000..fac4b2ea --- /dev/null +++ b/sql_migration_assistant/config.py @@ -0,0 +1,15 @@ +import os + + +FOUNDATION_MODEL_NAME = os.environ.get("SERVED_FOUNDATION_MODEL_NAME") +SQL_WAREHOUSE_ID = os.environ.get("DATABRICKS_WAREHOUSE_ID") +VECTOR_SEARCH_ENDPOINT_NAME = os.environ.get("VECTOR_SEARCH_ENDPOINT_NAME") +VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME") +CODE_INTENT_TABLE_NAME = os.environ.get("CODE_INTENT_TABLE_NAME") +CATALOG = os.environ.get("CATALOG") +SCHEMA = os.environ.get("SCHEMA") +VOLUME_NAME = os.environ.get("VOLUME_NAME") +DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST") +TRANSFORMATION_JOB_ID = os.environ.get("TRANSFORMATION_JOB_ID") +WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION") +VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH") \ No newline at end of file diff --git a/sql_migration_assistant/frontend/GradioFrontend.py b/sql_migration_assistant/frontend/GradioFrontend.py new file mode 100644 index 00000000..4bb91999 --- /dev/null +++ b/sql_migration_assistant/frontend/GradioFrontend.py @@ -0,0 +1,125 @@ +import gradio as gr + +from sql_migration_assistant.frontend.Tabs.BatchInputCodeTab import BatchInputCodeTab +from sql_migration_assistant.frontend.Tabs.CodeExplanationTab import CodeExplanationTab +from sql_migration_assistant.frontend.Tabs.BatchOutputTab import BatchOutputTab +from sql_migration_assistant.frontend.Tabs.InteractiveInputCodeTab import InteractiveInputCodeTab +from sql_migration_assistant.frontend.Tabs.SimilarCodeTab import SimilarCodeTab +from sql_migration_assistant.frontend.Tabs.TranslationTab import TranslationTab +from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import InteractiveOutputTab +from sql_migration_assistant.frontend.callbacks import ( + read_code_file, + produce_preview, + exectute_workflow, + save_intent_wrapper, +) + + +class GradioFrontend: + intro = """logo + +# Databricks Legion Migration Accelerator + +Legion is an AI powered tool that aims to accelerate the migration of code to Databricks for low cost and effort. It +does this by using AI to translate, explain, and make discoverable your code. + +This interface is the Legion Control Panel. Here you are able to configure the AI agents for translation and explanation +to fit your needs, incorporating your expertise and knowledge of the codebase by adjusting the AI agents' instructions. + +Legion can work in a batch or interactive fashion. + +*Interactive operation* +Fine tune the AI agents on a single file and output the result as a Databricks notebook. +Use this UI to adjust the system prompts and instructions for the AI agents to generate the best translation and intent. + +*Batch operation* +Process a Volume of files to generate Databricks notebooks. Use this UI to fine tune your agent prompts against selected + files before executing a Workflow to transform all files in the Volume, outputting Databricks notebooks with the AI + generated intent and translation. + + +Please select your mode of operation to get started. + +""" + + def __init__(self): + with gr.Blocks(theme=gr.themes.Soft()) as self.app: + self.intro_markdown = gr.Markdown(self.intro) + self.operation = gr.Radio( + label="Select operation mode", + choices=["Interactive mode", "Batch mode"], + value="Interactive mode", + type="value", + interactive=True, + ) + + self.interactive_input_code_tab = InteractiveInputCodeTab() + self.batch_input_code_tab = BatchInputCodeTab() + self.code_explanation_tab = CodeExplanationTab() + self.translation_tab = TranslationTab() + self.similar_code_tab = SimilarCodeTab() + self.batch_output_tab = BatchOutputTab() + self.interactive_output_tab = InteractiveOutputTab() + + self.similar_code_tab.submit.click(save_intent_wrapper, inputs=[self.translation_tab.translation_input_code, + self.code_explanation_tab.explained]) + self.batch_output_tab.execute.click( + exectute_workflow, + inputs=[ + self.code_explanation_tab.intent_system_prompt, + self.code_explanation_tab.intent_temperature, + self.code_explanation_tab.intent_max_tokens, + self.translation_tab.translation_system_prompt, + self.translation_tab.translation_temperature, + self.translation_tab.translation_max_tokens, + ], + outputs=self.batch_output_tab.run_status, + ) + self.interactive_output_tab.produce_preview_button.click( + produce_preview, inputs=[self.code_explanation_tab.explained, self.translation_tab.translated], + outputs=self.interactive_output_tab.preview + ) + self.add_logic_loading_batch_mode() + self.add_logic_loading_interactive_mode() + self.change_tabs_based_on_operation_mode() + + def add_logic_loading_batch_mode(self): + for output in [ + self.batch_input_code_tab.selected_file, + self.translation_tab.translation_input_code, + self.code_explanation_tab.intent_input_code, + self.similar_code_tab.similar_code_input, + ]: + self.batch_input_code_tab.select_code_file.select( + fn=read_code_file, + inputs=[self.batch_input_code_tab.volume_path, self.batch_input_code_tab.select_code_file], + outputs=output + ) + + def add_logic_loading_interactive_mode(self): + for output in [ + self.translation_tab.translation_input_code, + self.code_explanation_tab.intent_input_code, + self.similar_code_tab.similar_code_input, + ]: + self.interactive_input_code_tab.interactive_code_button.click( + fn=lambda x: gr.update(value=x), inputs=self.interactive_input_code_tab.interactive_code, outputs=output + ) + + def change_tabs_based_on_operation_mode(self): + for tab in [self.batch_input_code_tab, self.batch_output_tab]: + self.operation.change( + lambda x: ( + gr.update(visible=(x != "Interactive mode")) + ), + self.operation, + tab.tab, + ) + for tab in [self.interactive_input_code_tab, self.interactive_output_tab]: + self.operation.change( + lambda x: ( + gr.update(visible=(x == "Interactive mode")) + ), + self.operation, + tab.tab, + ) diff --git a/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py b/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py new file mode 100644 index 00000000..4a4d4f26 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py @@ -0,0 +1,29 @@ +import gradio as gr + +from sql_migration_assistant.config import DATABRICKS_HOST, CATALOG, SCHEMA, VOLUME_NAME, VOLUME_NAME_INPUT_PATH +from sql_migration_assistant.frontend.callbacks import list_files + + +class BatchInputCodeTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Select code", visible=False) as tab: + self.tab = tab + self.header = gr.Markdown( + f"""## Select a file to test your agents on. + + Legion can batch process a Volume of files to generate Databricks notebooks. The files to translate must be + added to the *Input Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME}). + + Here you can select a file to fine tune your agent prompts against. + """ + ) + self.volume_path = gr.Textbox(value=VOLUME_NAME_INPUT_PATH, visible=False) + + self.load_files = gr.Button("Load Files from Volume") + self.select_code_file = gr.Radio(label="Select Code File") + self.selected_file = gr.Code(label="Selected Code File", language="sql-msSQL") + + self.load_files.click(list_files, self.volume_path, self.select_code_file) diff --git a/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py b/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py new file mode 100644 index 00000000..a8be9ea1 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py @@ -0,0 +1,34 @@ +import gradio as gr + +from sql_migration_assistant.frontend.callbacks import make_status_box_visible + + +class BatchOutputTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Execute Job", visible=False) as tab: + self.tab = tab + self.header = gr.Markdown( + """ ## Execute Job + + This tab is for executing the job to covert the code files in the Unity Catalog Volume to Databricks + Notebooks. Once you are happy with your system prompts and and the explanation and translation outputs, + click the execute button below. + + This will kick off a Workflow which will ingest the code files, write them to a Delta Table, apply the AI + agents, and output a Databricks Notebook per input code file. This notebook will have the intent at the top + of the notebook in a markdown cell, and the translated code in the cell below. These notebooks are found in + the workspace at *{WORKSPACE_LOCATION}/outputNotebooks* and in the *Output Code* folder in the UC Volume + + The intent will also be stored in a Unity Catalog table and vector search index for finding similar code. + """ + ) + self.execute = gr.Button( + value="EXECUTE CODE TRANSFORMATION", + size="lg", + ) + self.run_status = gr.Markdown(label="Job Status Page", visible=False) + + self.execute.click(fn=make_status_box_visible, outputs=self.run_status) diff --git a/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py new file mode 100644 index 00000000..00a1f218 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py @@ -0,0 +1,79 @@ +import gradio as gr + +from sql_migration_assistant.frontend.callbacks import llm_intent_wrapper + + +class CodeExplanationTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Code Explanation") as tab: + self.tab = tab + self.header = gr.Markdown( + """ + ## An AI tool to generate the intent of your code. + + In this panel you need to iterate on the system prompt to refine the intent the AI generates for your code. + This intent will be stored in Unity Catalog, and can be used for finding similar code, for documentation, + and to help with writing new code in Databricks to achieve the same goal. + """ + ) + with gr.Accordion(label="Advanced Intent Settings", open=True): + gr.Markdown( + """ ### Advanced settings for the generating the intent of the input code. + + The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in + more creative responses, while lower values will result in more predictable responses. + """ + ) + + with gr.Row(): + self.intent_temperature = gr.Number( + label="Temperature. Float between 0.0 and 1.0", value=0.0 + ) + self.intent_max_tokens = gr.Number( + label="Max tokens. Check your LLM docs for limit.", value=3500 + ) + with gr.Row(): + self.intent_system_prompt = gr.Textbox( + label="System prompt of the LLM to generate the intent.", + value="""Your job is to explain intent of the provided SQL code. + """.strip(), + ) + + with gr.Accordion(label="Intent Pane", open=True): + gr.Markdown( + """ ## AI generated intent of what your code aims to do. + """ + ) + self.explain_button = gr.Button("Explain") + with gr.Row(): + with gr.Column(): + gr.Markdown(""" ## Input Code.""") + + # input box for SQL code with nice formatting + self.intent_input_code = gr.Code( + label="Input SQL", + language="sql-msSQL", + ) + # a button labelled translate + + with gr.Column(): + # divider subheader + gr.Markdown(""" ## Code intent""") + # output box of the T-SQL translated to Spark SQL + self.explained = gr.Textbox(label="AI generated intent of your code.") + + # reset hidden chat history and prompt + # do translation + self.explain_button.click( + fn=llm_intent_wrapper, + inputs=[ + self.intent_system_prompt, + self.intent_input_code, + self.intent_max_tokens, + self.intent_temperature, + ], + outputs=self.explained, + ) diff --git a/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py b/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py new file mode 100644 index 00000000..d3bdb479 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py @@ -0,0 +1,20 @@ +import gradio as gr + + +class InteractiveInputCodeTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Input code", visible=True) as tab: + self.header = gr.Markdown( + f"""## Paste in some code to test your agents on. + """ + ) + self.interactive_code_button = gr.Button("Ingest code") + self.interactive_code = gr.Code( + label="Paste your code in here", language="sql-msSQL" + ) + self.interactive_code_button.click(fn=lambda: gr.Info("Code ingested!")) + + self.tab = tab diff --git a/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py b/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py new file mode 100644 index 00000000..8555c470 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py @@ -0,0 +1,50 @@ +import gradio as gr + +from sql_migration_assistant.config import DATABRICKS_HOST, CATALOG, SCHEMA, VOLUME_NAME +from sql_migration_assistant.frontend.callbacks import write_adhoc_to_workspace + + +class InteractiveOutputTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Write file to Workspace") as tab: + self.tab = tab + self.header = gr.Markdown( + f""" ## Write to Workspace + + Write out your explained and translated file to a notebook in the workspace. + You must provide a filename for the notebook. The notebook will be written to the workspace, saved to the + Output Code location in the Unity Catalog Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME}) + , and the intent will be saved to the intent table. + """ + ) + template = """ + -- Databricks notebook source + -- MAGIC %md + -- MAGIC # This notebook was AI generated. AI can make mistakes. This is provided as a tool to accelerate your migration. + -- MAGIC + -- MAGIC ### AI Generated Intent + -- MAGIC + -- MAGIC INTENT_GOES_HERE + + -- COMMAND ---------- + + TRANSLATED_CODE_GOES_HERE + """ + with gr.Row(): + self.produce_preview_button = gr.Button("Produce Preview") + with gr.Column(): + self.file_name = gr.Textbox(label="Filename for the notebook") + self.write_to_workspace_button = gr.Button("Write to Workspace") + self.adhoc_write_output = gr.Markdown(label="Notebook output location") + + self.preview = gr.Code(label="Preview", language="python") + + # write file to notebook + self.write_to_workspace_button.click( + fn=write_adhoc_to_workspace, + inputs=[self.file_name, self.preview], + outputs=self.adhoc_write_output, + ) diff --git a/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py b/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py new file mode 100644 index 00000000..796b3419 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py @@ -0,0 +1,46 @@ +import gradio as gr + +from sql_migration_assistant.frontend.callbacks import similar_code_helper + + +class SimilarCodeTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Find Similar Code") as tab: + self.tab = tab + self.header = gr.Markdown( + """ + # ** Work in Progress ** + ## An AI tool to find similar code. + """ + ) + with gr.Accordion(label="Similar Code Pane", open=True): + gr.Markdown( + """ ## Similar code + + This code is thought to be similar to what you are doing, based on comparing the intent of your code with the intent of this code. + """ + ) + # a button + self.find_similar_code = gr.Button("Find similar code") + # a row with an code and text box to show the similar code + with gr.Row(): + self.similar_code_input = gr.Code( + label="Input Code.", language="sql-sparkSQL" + ) + self.similar_code_output = gr.Code( + label="Similar code to yours.", language="sql-sparkSQL" + ) + self.similar_intent = gr.Textbox(label="The similar codes intent.") + + # a button + self.submit = gr.Button("Save code and intent") + + # assign actions to buttons when clicked. + self.find_similar_code.click( + fn=similar_code_helper.get_similar_code, + inputs=self.similar_code_input, + outputs=[self.similar_code_output, self.similar_intent], + ) diff --git a/sql_migration_assistant/frontend/Tabs/Tab.py b/sql_migration_assistant/frontend/Tabs/Tab.py new file mode 100644 index 00000000..c7a94f00 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/Tab.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod + +import gradio as gr + + +class Tab(ABC): + header: gr.Markdown + label: str + tab: gr.Tab + + def __init__(self, header: str, label: str, **kwargs): + with gr.Tab(label=label, *+kwargs) as tab: + self.header = gr.Markdown(header) + self.tab = tab + self.build() + + @abstractmethod + def build(self): + """Build your Tab components here. Use self. to store components you need again""" + pass diff --git a/sql_migration_assistant/frontend/Tabs/TranslationTab.py b/sql_migration_assistant/frontend/Tabs/TranslationTab.py new file mode 100644 index 00000000..75128cf0 --- /dev/null +++ b/sql_migration_assistant/frontend/Tabs/TranslationTab.py @@ -0,0 +1,104 @@ +import gradio as gr + +from sql_migration_assistant.frontend.callbacks import llm_translate_wrapper + + +class TranslationTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Translation") as tab: + self.tab = tab + self.header = gr.Markdown( + """ + ## An AI tool to translate your code. + + In this panel you need to iterate on the system prompt to refine the translation the AI generates for your code. + + """ + ) + with gr.Accordion(label="Translation Advanced Settings", open=True): + gr.Markdown( + """ ### Advanced settings for the translating the input code. + + The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in + more creative responses, while lower values will result in more predictable responses. + """ + ) + with gr.Row(): + self.translation_temperature = gr.Number( + label="Temperature. Float between 0.0 and 1.0", value=0.0 + ) + self.translation_max_tokens = gr.Number( + label="Max tokens. Check your LLM docs for limit.", value=3500 + ) + with gr.Row(): + self.translation_system_prompt = gr.Textbox( + label="Instructions for the LLM translation tool.", + value=""" + You are an expert in multiple SQL dialects. You only reply with SQL code and with no other text. + Your purpose is to translate the given SQL query to Databricks Spark SQL. + You must follow these rules: + - You must keep all original catalog, schema, table, and field names. + - Convert all dates to dd-MMM-yyyy format using the date_format() function. + - Subqueries must end with a semicolon. + - Ensure queries do not have # or @ symbols. + - ONLY if the original query uses temporary tables (e.g. "INTO #temptable"), re-write these as either CREATE OR REPLACE TEMPORARY VIEW or CTEs. . + - Square brackets must be replaced with backticks. + - Custom field names should be surrounded by backticks. + - Ensure queries do not have # or @ symbols. + - Only if the original query contains DECLARE and SET statements, re-write them according to the following format: + DECLARE VARIABLE variable TYPE DEFAULT value; For example: DECLARE VARIABLE number INT DEFAULT 9; + SET VAR variable = value; For example: SET VAR number = 9; + + Write an initial draft of the translated query. Then double check the output for common mistakes, including: + - Using NOT IN with NULL values + - Using UNION when UNION ALL should have been used + - Using BETWEEN for exclusive ranges + - Data type mismatch in predicates + - Properly quoting identifiers + - Using the correct number of arguments for functions + - Casting to the correct data type + - Using the proper columns for joins + + Return the final translated query only. Include comments. Include only SQL. + """.strip(), + lines=20, + ) + + with gr.Accordion(label="Translation Pane", open=True): + gr.Markdown(""" ### Input your code here for translation to Spark-SQL.""") + # a button labelled translate + self.translate_button = gr.Button("Translate") + with gr.Row(): + with gr.Column(): + gr.Markdown(""" ## Input code.""") + + # input box for SQL code with nice formatting + self.translation_input_code = gr.Code( + label="Input SQL", + language="sql-msSQL", + ) + + with gr.Column(): + # divider subheader + gr.Markdown(""" ## Translated Code""") + # output box of the T-SQL translated to Spark SQL + self.translated = gr.Code( + label="Your code translated to Spark SQL", + language="sql-sparkSQL", + ) + + # reset hidden chat history and prompt + # do translation + self.translate_button.click( + fn=llm_translate_wrapper, + inputs=[ + self.translation_system_prompt, + self.translation_input_code, + self.translation_max_tokens, + self.translation_temperature, + ], + outputs=self.translated, + ) diff --git a/sql_migration_assistant/frontend/Tabs/__init__.py b/sql_migration_assistant/frontend/Tabs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sql_migration_assistant/frontend/__init__.py b/sql_migration_assistant/frontend/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sql_migration_assistant/frontend/callbacks.py b/sql_migration_assistant/frontend/callbacks.py new file mode 100644 index 00000000..7e1d9568 --- /dev/null +++ b/sql_migration_assistant/frontend/callbacks.py @@ -0,0 +1,172 @@ +import base64 +import datetime +import json +import os + +import gradio as gr +from app.llm import LLMCalls +from app.similar_code import SimilarCode +from config import ( + FOUNDATION_MODEL_NAME, + SQL_WAREHOUSE_ID, + CATALOG, + SCHEMA, + CODE_INTENT_TABLE_NAME, + VECTOR_SEARCH_ENDPOINT_NAME, + VS_INDEX_NAME, + DATABRICKS_HOST, + TRANSFORMATION_JOB_ID, + WORKSPACE_LOCATION, VOLUME_NAME, +) +from databricks.labs.lsql.core import StatementExecutionExt +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.workspace import ImportFormat, Language + +w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") +see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID) +translation_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME) +intent_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME) +similar_code_helper = SimilarCode( + workspace_client=w, + see=see, + catalog=CATALOG, + schema=SCHEMA, + code_intent_table_name=CODE_INTENT_TABLE_NAME, + VS_index_name=VS_INDEX_NAME, + VS_endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME, +) + + +def list_files(path_to_volume): + file_infos = w.dbutils.fs.ls(path_to_volume) + file_names = [x.name for x in file_infos] + return file_names + + +def make_status_box_visible(): + return gr.Markdown(label="Job Run Status Page", visible=True) + +def read_code_file(volume_path, file_name): + file_name = os.path.join(volume_path, file_name) + file = w.files.download(file_name) + code = file.contents.read().decode("utf-8") + return code + + +def llm_intent_wrapper(system_prompt, input_code, max_tokens, temperature): + intent = intent_llm.llm_intent(system_prompt, input_code, max_tokens, temperature) + return intent + + +def llm_translate_wrapper(system_prompt, input_code, max_tokens, temperature): + translated_code = translation_llm.llm_translate(system_prompt, input_code, max_tokens, temperature) + return translated_code + + +def produce_preview(explanation, translated_code): + template = """ + -- Databricks notebook source + -- MAGIC %md + -- MAGIC # This notebook was AI generated. AI can make mistakes. This is provided as a tool to accelerate your migration. + -- MAGIC + -- MAGIC ### AI Generated Intent + -- MAGIC + -- MAGIC INTENT_GOES_HERE + + -- COMMAND ---------- + + TRANSLATED_CODE_GOES_HERE + """ + preview_code = template.replace("INTENT_GOES_HERE", explanation).replace( + "TRANSLATED_CODE_GOES_HERE", translated_code + ) + return preview_code + + +def write_adhoc_to_workspace(file_name, preview): + if len(file_name) == 0: + raise gr.Error("Please provide a filename") + + notebook_path_root = f"{WORKSPACE_LOCATION}/outputNotebooks/{str(datetime.datetime.now()).replace(':', '_')}" + notebook_path = f"{notebook_path_root}/{file_name}" + content = preview + w.workspace.mkdirs(notebook_path_root) + w.workspace.import_( + content=base64.b64encode(content.encode("utf-8")).decode("utf-8"), + path=notebook_path, + format=ImportFormat.SOURCE, + language=Language.SQL, + overwrite=True, + ) + _ = w.workspace.get_status(notebook_path) + id = _.object_id + url = f"{w.config.host}/#notebook/{id}" + output_message = f"Notebook {file_name} written to Databricks [here]({url})" + return output_message + + +def exectute_workflow(intent_prompt, intent_temperature, intent_max_tokens, translation_prompt, translation_temperature, + translation_max_tokens): + gr.Info("Beginning code transformation workflow") + agent_config_payload = [ + [ + { + "translation_agent": { + "system_prompt": translation_prompt, + "endpoint": FOUNDATION_MODEL_NAME, + "max_tokens": translation_max_tokens, + "temperature": translation_temperature, + } + } + ], + [ + { + "explanation_agent": { + "system_prompt": intent_prompt, + "endpoint": FOUNDATION_MODEL_NAME, + "max_tokens": intent_max_tokens, + "temperature": intent_temperature, + } + } + ], + ] + + app_config_payload = { + "VOLUME_NAME_OUTPUT_PATH": os.environ.get("VOLUME_NAME_OUTPUT_PATH"), + "VOLUME_NAME_INPUT_PATH": os.environ.get("VOLUME_NAME_INPUT_PATH"), + "VOLUME_NAME_CHECKPOINT_PATH": os.environ.get("VOLUME_NAME_CHECKPOINT_PATH"), + "CATALOG": os.environ.get("CATALOG"), + "SCHEMA": os.environ.get("SCHEMA"), + "DATABRICKS_HOST": DATABRICKS_HOST, + "DATABRICKS_TOKEN_SECRET_SCOPE": os.environ.get("DATABRICKS_TOKEN_SECRET_SCOPE"), + "DATABRICKS_TOKEN_SECRET_KEY": os.environ.get("DATABRICKS_TOKEN_SECRET_KEY"), + "CODE_INTENT_TABLE_NAME": os.environ.get("CODE_INTENT_TABLE_NAME"), + "WORKSPACE_LOCATION": WORKSPACE_LOCATION, + } + + app_configs = json.dumps(app_config_payload) + agent_configs = json.dumps(agent_config_payload) + + response = w.jobs.run_now( + job_id=int(TRANSFORMATION_JOB_ID), + job_parameters={ + "agent_configs": agent_configs, + "app_configs": app_configs, + }, + ) + run_id = response.run_id + + job_url = f"{DATABRICKS_HOST}/jobs/{TRANSFORMATION_JOB_ID}" + textbox_message = ( + f"Job run initiated. Click [here]({job_url}) to view the job status. " + f"You just executed the run with run_id: {run_id}\n" + f"Output notebooks will be written to the Workspace for immediate use at *{WORKSPACE_LOCATION}/outputNotebooks*" + f", and also in the *Output Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME})" + ) + return textbox_message + + +def save_intent_wrapper(input_code, explained): + gr.Info("Saving intent") + similar_code_helper.save_intent(input_code, explained) + gr.Info("Intent saved") diff --git a/sql_migration_assistant/gradio_app.py b/sql_migration_assistant/gradio_app.py deleted file mode 100644 index b1b15d31..00000000 --- a/sql_migration_assistant/gradio_app.py +++ /dev/null @@ -1,628 +0,0 @@ -import json -import os -import datetime -from databricks.labs.lsql.core import StatementExecutionExt -from databricks.sdk import WorkspaceClient -from databricks.sdk.service.workspace import ImportFormat, Language -import base64 -import gradio as gr - -from app.llm import LLMCalls -from app.similar_code import SimilarCode -import logging # For printing translation attempts in console (debugging) - -# Setting up logger -logging.basicConfig -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -# # personal access token necessary for authenticating API requests. Stored using a secret - -FOUNDATION_MODEL_NAME = os.environ.get("SERVED_FOUNDATION_MODEL_NAME") -MAX_TOKENS = os.environ.get("MAX_TOKENS") -SQL_WAREHOUSE_ID = os.environ.get("DATABRICKS_WAREHOUSE_ID") -VECTOR_SEARCH_ENDPOINT_NAME = os.environ.get("VECTOR_SEARCH_ENDPOINT_NAME") -VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME") -CODE_INTENT_TABLE_NAME = os.environ.get("CODE_INTENT_TABLE_NAME") -CATALOG = os.environ.get("CATALOG") -SCHEMA = os.environ.get("SCHEMA") -VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH") -VOLUME_NAME = os.environ.get("VOLUME_NAME") -DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST") -TRANSFORMATION_JOB_ID = os.environ.get("TRANSFORMATION_JOB_ID") -WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION") -w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") - -see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID) -translation_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME) -intent_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME) -similar_code_helper = SimilarCode( - workspace_client=w, - see=see, - catalog=CATALOG, - schema=SCHEMA, - code_intent_table_name=CODE_INTENT_TABLE_NAME, - VS_index_name=VS_INDEX_NAME, - VS_endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME, -) - -################################################################################ -################################################################################ - -# this is the app UI. it uses gradio blocks https://www.gradio.app/docs/gradio/blocks -# each gr.{} call adds a new element to UI, top to bottom. -with gr.Blocks(theme=gr.themes.Soft()) as demo: - # title with Databricks image - gr.Markdown( - """logo - -# Databricks Legion Migration Accelerator - -Legion is an AI powered tool that aims to accelerate the migration of code to Databricks for low cost and effort. It -does this by using AI to translate, explain, and make discoverable your code. - -This interface is the Legion Control Panel. Here you are able to configure the AI agents for translation and explanation -to fit your needs, incorporating your expertise and knowledge of the codebase by adjusting the AI agents' instructions. - -Legion can work in a batch or interactive fashion. - -*Interactive operation* -Fine tune the AI agents on a single file and output the result as a Databricks notebook. -Use this UI to adjust the system prompts and instructions for the AI agents to generate the best translation and intent. - -*Batch operation* -Process a Volume of files to generate Databricks notebooks. Use this UI to fine tune your agent prompts against selected - files before executing a Workflow to transform all files in the Volume, outputting Databricks notebooks with the AI - generated intent and translation. - - -Please select your mode of operation to get started. - -""" - ) - operation = gr.Radio( - label="Select operation mode", - choices=["Interactive mode", "Batch mode"], - value="Interactive mode", - type="value", - interactive=True, - ) - ################################################################################ - #### STORAGE SETTINGS TAB - ################################################################################ - - with gr.Tab(label="Input code", visible=True) as interactive_input_code_tab: - - gr.Markdown( - f"""## Paste in some code to test your agents on. - """ - ) - interactive_code_button = gr.Button("Ingest code") - interactive_code = gr.Code( - label="Paste your code in here", language="sql-msSQL" - ) - interactive_code_button.click(fn=lambda: gr.Info("Code ingested!")) - - with gr.Tab(label="Select code", visible=False) as batch_input_code_tab: - - gr.Markdown( - f"""## Select a file to test your agents on. - - Legion can batch process a Volume of files to generate Databricks notebooks. The files to translate must be - added to the *Input Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME}). - - Here you can select a file to fine tune your agent prompts against. - """ - ) - volume_path = gr.Textbox(value=VOLUME_NAME_INPUT_PATH, visible=False) - - load_files = gr.Button("Load Files from Volume") - select_code_file = gr.Radio(label="Select Code File") - selected_file = gr.Code(label="Selected Code File", language="sql-msSQL") - - def list_files(path_to_volume): - file_infos = w.dbutils.fs.ls(path_to_volume) - file_names = [x.name for x in file_infos] - file_name_radio = gr.Radio(label="Select Code File", choices=file_names) - return file_name_radio - - load_files.click(list_files, volume_path, select_code_file) - - def read_code_file(volume_path, file_name): - file_name = os.path.join(volume_path, file_name) - file = w.files.download(file_name) - code = file.contents.read().decode("utf-8") - return code - - ################################################################################ - #### EXPLANATION TAB - ################################################################################ - with gr.Tab(label="Code Explanation"): - gr.Markdown( - """ - ## An AI tool to generate the intent of your code. - - In this panel you need to iterate on the system prompt to refine the intent the AI generates for your code. - This intent will be stored in Unity Catalog, and can be used for finding similar code, for documentation, - and to help with writing new code in Databricks to achieve the same goal. - """ - ) - with gr.Accordion(label="Advanced Intent Settings", open=True): - gr.Markdown( - """ ### Advanced settings for the generating the intent of the input code. - - The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in - more creative responses, while lower values will result in more predictable responses. - """ - ) - - with gr.Row(): - intent_temperature = gr.Number( - label="Temperature. Float between 0.0 and 1.0", value=0.0 - ) - intent_max_tokens = gr.Number( - label="Max tokens. Check your LLM docs for limit.", value=3500 - ) - with gr.Row(): - intent_system_prompt = gr.Textbox( - label="System prompt of the LLM to generate the intent.", - value="""Your job is to explain intent of the provided SQL code. - """.strip(), - ) - with gr.Accordion(label="Intent Pane", open=True): - gr.Markdown( - """ ## AI generated intent of what your code aims to do. - """ - ) - explain_button = gr.Button("Explain") - with gr.Row(): - with gr.Column(): - gr.Markdown(""" ## Input Code.""") - - # input box for SQL code with nice formatting - intent_input_code = gr.Code( - label="Input SQL", - language="sql-msSQL", - ) - # a button labelled translate - - with gr.Column(): - # divider subheader - gr.Markdown(""" ## Code intent""") - # output box of the T-SQL translated to Spark SQL - explained = gr.Textbox(label="AI generated intent of your code.") - - def llm_intent_wrapper(system_prompt, input_code, max_tokens, temperature): - # call the LLM to translate the code - intent = intent_llm.llm_intent( - system_prompt, input_code, max_tokens, temperature - ) - return intent - - # reset hidden chat history and prompt - # do translation - explain_button.click( - fn=llm_intent_wrapper, - inputs=[ - intent_system_prompt, - intent_input_code, - intent_max_tokens, - intent_temperature, - ], - outputs=explained, - ) - ################################################################################ - #### TRANSLATION TAB - ################################################################################ - with gr.Tab(label="Translation"): - gr.Markdown( - """ - ## An AI tool to translate your code. - - In this panel you need to iterate on the system prompt to refine the translation the AI generates for your code. - - """ - ) - with gr.Accordion(label="Translation Advanced Settings", open=True): - gr.Markdown( - """ ### Advanced settings for the translating the input code. - - The *Temperature* paramater controls the randomness of the AI's response. Higher values will result in - more creative responses, while lower values will result in more predictable responses. - """ - ) - with gr.Row(): - translation_temperature = gr.Number( - label="Temperature. Float between 0.0 and 1.0", value=0.0 - ) - translation_max_tokens = gr.Number( - label="Max tokens. Check your LLM docs for limit.", value=3500 - ) - with gr.Row(): - translation_system_prompt = gr.Textbox( - label="Instructions for the LLM translation tool.", - value=""" - You are an expert in multiple SQL dialects. You only reply with SQL code and with no other text. - Your purpose is to translate the given SQL query to Databricks Spark SQL. - You must follow these rules: - - You must keep all original catalog, schema, table, and field names. - - Convert all dates to dd-MMM-yyyy format using the date_format() function. - - Subqueries must end with a semicolon. - - Ensure queries do not have # or @ symbols. - - ONLY if the original query uses temporary tables (e.g. "INTO #temptable"), re-write these as either CREATE OR REPLACE TEMPORARY VIEW or CTEs. . - - Square brackets must be replaced with backticks. - - Custom field names should be surrounded by backticks. - - Ensure queries do not have # or @ symbols. - - Only if the original query contains DECLARE and SET statements, re-write them according to the following format: - DECLARE VARIABLE variable TYPE DEFAULT value; For example: DECLARE VARIABLE number INT DEFAULT 9; - SET VAR variable = value; For example: SET VAR number = 9; - - Write an initial draft of the translated query. Then double check the output for common mistakes, including: - - Using NOT IN with NULL values - - Using UNION when UNION ALL should have been used - - Using BETWEEN for exclusive ranges - - Data type mismatch in predicates - - Properly quoting identifiers - - Using the correct number of arguments for functions - - Casting to the correct data type - - Using the proper columns for joins - - Return the final translated query only. Include comments. Include only SQL. - """.strip(), - lines=20, - ) - - with gr.Accordion(label="Translation Pane", open=True): - gr.Markdown(""" ### Input your code here for translation to Spark-SQL.""") - # a button labelled translate - translate_button = gr.Button("Translate") - with gr.Row(): - with gr.Column(): - gr.Markdown(""" ## Input code.""") - - # input box for SQL code with nice formatting - translation_input_code = gr.Code( - label="Input SQL", - language="sql-msSQL", - ) - - with gr.Column(): - # divider subheader - gr.Markdown(""" ## Translated Code""") - # output box of the T-SQL translated to Spark SQL - translated = gr.Code( - label="Your code translated to Spark SQL", - language="sql-sparkSQL", - ) - - # helper function to take the output from llm_translate and return outputs for chatbox and textbox - # chatbox input is a list of lists, each list is a message from the user and the response from the LLM - # textbox input is a string - def llm_translate_wrapper( - system_prompt, input_code, max_tokens, temperature - ): - # call the LLM to translate the code - translated_code = translation_llm.llm_translate( - system_prompt, input_code, max_tokens, temperature - ) - return translated_code - - # reset hidden chat history and prompt - # do translation - translate_button.click( - fn=llm_translate_wrapper, - inputs=[ - translation_system_prompt, - translation_input_code, - translation_max_tokens, - translation_temperature, - ], - outputs=translated, - ) - - ################################################################################ - #### SIMILAR CODE TAB - ################################################################################ - with gr.Tab(label="Find Similar Code"): - gr.Markdown( - """ - # ** Work in Progress ** - ## An AI tool to find similar code. - """ - ) - with gr.Accordion(label="Similar Code Pane", open=True): - gr.Markdown( - """ ## Similar code - - This code is thought to be similar to what you are doing, based on comparing the intent of your code with the intent of this code. - """ - ) - # a button - find_similar_code = gr.Button("Find similar code") - # a row with an code and text box to show the similar code - with gr.Row(): - similar_code_input = gr.Code( - label="Input Code.", language="sql-sparkSQL" - ) - similar_code_output = gr.Code( - label="Similar code to yours.", language="sql-sparkSQL" - ) - similar_intent = gr.Textbox(label="The similar codes intent.") - - # a button - submit = gr.Button("Save code and intent") - - # assign actions to buttons when clicked. - find_similar_code.click( - fn=similar_code_helper.get_similar_code, - inputs=similar_code_input, - outputs=[similar_code_output, similar_intent], - ) - - def save_intent_wrapper(input_code, explained): - gr.Info("Saving intent") - similar_code_helper.save_intent(input_code, explained) - gr.Info("Intent saved") - - submit.click(save_intent_wrapper, inputs=[translation_input_code, explained]) - - ################################################################################ - #### EXECUTE JOB TAB - ################################################################################ - with gr.Tab(label="Execute Job", visible=False) as batch_output_tab: - gr.Markdown( - """ ## Execute Job - - This tab is for executing the job to covert the code files in the Unity Catalog Volume to Databricks - Notebooks. Once you are happy with your system prompts and and the explanation and translation outputs, - click the execute button below. - - This will kick off a Workflow which will ingest the code files, write them to a Delta Table, apply the AI - agents, and output a Databricks Notebook per input code file. This notebook will have the intent at the top - of the notebook in a markdown cell, and the translated code in the cell below. These notebooks are found in - the workspace at *{WORKSPACE_LOCATION}/outputNotebooks* and in the *Output Code* folder in the UC Volume - - The intent will also be stored in a Unity Catalog table and vector search index for finding similar code. - """ - ) - execute = gr.Button( - value="EXECUTE CODE TRANSFORMATION", - size="lg", - ) - run_status = gr.Markdown(label="Job Status Page", visible=False) - - def exectute_workflow( - intent_prompt, - intent_temperature, - intent_max_tokens, - translation_prompt, - translation_temperature, - translation_max_tokens, - ): - gr.Info("Beginning code transformation workflow") - agent_config_payload = [ - [ - { - "translation_agent": { - "system_prompt": translation_prompt, - "endpoint": FOUNDATION_MODEL_NAME, - "max_tokens": translation_max_tokens, - "temperature": translation_temperature, - } - } - ], - [ - { - "explanation_agent": { - "system_prompt": intent_prompt, - "endpoint": FOUNDATION_MODEL_NAME, - "max_tokens": intent_max_tokens, - "temperature": intent_temperature, - } - } - ], - ] - - app_config_payload = { - "VOLUME_NAME_OUTPUT_PATH": os.environ.get("VOLUME_NAME_OUTPUT_PATH"), - "VOLUME_NAME_INPUT_PATH": os.environ.get("VOLUME_NAME_INPUT_PATH"), - "VOLUME_NAME_CHECKPOINT_PATH": os.environ.get( - "VOLUME_NAME_CHECKPOINT_PATH" - ), - "CATALOG": os.environ.get("CATALOG"), - "SCHEMA": os.environ.get("SCHEMA"), - "DATABRICKS_HOST": DATABRICKS_HOST, - "DATABRICKS_TOKEN_SECRET_SCOPE": os.environ.get( - "DATABRICKS_TOKEN_SECRET_SCOPE" - ), - "DATABRICKS_TOKEN_SECRET_KEY": os.environ.get( - "DATABRICKS_TOKEN_SECRET_KEY" - ), - "CODE_INTENT_TABLE_NAME": os.environ.get("CODE_INTENT_TABLE_NAME"), - "WORKSPACE_LOCATION": WORKSPACE_LOCATION, - } - - app_configs = json.dumps(app_config_payload) - agent_configs = json.dumps(agent_config_payload) - - response = w.jobs.run_now( - job_id=int(TRANSFORMATION_JOB_ID), - job_parameters={ - "agent_configs": agent_configs, - "app_configs": app_configs, - }, - ) - run_id = response.run_id - - job_url = f"{DATABRICKS_HOST}/jobs/{TRANSFORMATION_JOB_ID}" - textbox_message = ( - f"Job run initiated. Click [here]({job_url}) to view the job status. " - f"You just executed the run with run_id: {run_id}\n" - f"Output notebooks will be written to the Workspace for immediate use at *{WORKSPACE_LOCATION}/outputNotebooks*" - f", and also in the *Output Code* folder in the UC Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME})" - ) - return textbox_message - - def make_status_box_visible(): - return gr.Markdown(label="Job Run Status Page", visible=True) - - execute.click(fn=make_status_box_visible, outputs=run_status) - execute.click( - exectute_workflow, - inputs=[ - intent_system_prompt, - intent_temperature, - intent_max_tokens, - translation_system_prompt, - translation_temperature, - translation_max_tokens, - ], - outputs=run_status, - ) - - with gr.Tab(label="Write file to Workspace") as interactive_output_tab: - gr.Markdown( - f""" ## Write to Workspace - - Write out your explained and translated file to a notebook in the workspace. - You must provide a filename for the notebook. The notebook will be written to the workspace, saved to the - Output Code location in the Unity Catalog Volume [here]({DATABRICKS_HOST}/explore/data/volumes/{CATALOG}/{SCHEMA}/{VOLUME_NAME}) - , and the intent will be saved to the intent table. - """ - ) - template = """ --- Databricks notebook source --- MAGIC %md --- MAGIC # This notebook was AI generated. AI can make mistakes. This is provided as a tool to accelerate your migration. --- MAGIC --- MAGIC ### AI Generated Intent --- MAGIC --- MAGIC INTENT_GOES_HERE - --- COMMAND ---------- - -TRANSLATED_CODE_GOES_HERE - """ - with gr.Row(): - produce_preview_button = gr.Button("Produce Preview") - with gr.Column(): - file_name = gr.Textbox(label="Filename for the notebook") - write_to_workspace_button = gr.Button("Write to Workspace") - adhoc_write_output = gr.Markdown(label="Notebook output location") - - def produce_preview(explanation, translated_code): - preview_code = template.replace("INTENT_GOES_HERE", explanation).replace( - "TRANSLATED_CODE_GOES_HERE", translated_code - ) - return preview_code - - def write_adhoc_to_workspace(file_name, preview): - - if len(file_name) == 0: - raise gr.Error("Please provide a filename") - - notebook_path_root = f"{WORKSPACE_LOCATION}/outputNotebooks/{str(datetime.datetime.now()).replace(':', '_')}" - notebook_path = f"{notebook_path_root}/{file_name}" - content = preview - w.workspace.mkdirs(notebook_path_root) - w.workspace.import_( - content=base64.b64encode(content.encode("utf-8")).decode("utf-8"), - path=notebook_path, - format=ImportFormat.SOURCE, - language=Language.SQL, - overwrite=True, - ) - _ = w.workspace.get_status(notebook_path) - id = _.object_id - url = f"{w.config.host}/#notebook/{id}" - output_message = f"Notebook {file_name} written to Databricks [here]({url})" - return output_message - - preview = gr.Code(label="Preview", language="python") - produce_preview_button.click( - produce_preview, inputs=[explained, translated], outputs=preview - ) - - # write file to notebook - write_to_workspace_button.click( - fn=write_adhoc_to_workspace, - inputs=[file_name, preview], - outputs=adhoc_write_output, - ) - - # this handles the code loading for batch mode - # read the selected code file and put it into the other panes - for output in [ - selected_file, - translation_input_code, - intent_input_code, - similar_code_input, - ]: - select_code_file.select( - fn=read_code_file, inputs=[volume_path, select_code_file], outputs=output - ) - - # this handles the code loading for interative mode - for output in [ - translation_input_code, - intent_input_code, - similar_code_input, - ]: - interactive_code_button.click( - fn=lambda x: gr.update(value=x), inputs=interactive_code, outputs=output - ) - - # change the input tabs based on the operation mode - operation.change( - lambda x: ( - gr.update(visible=False) - if x == "Interactive mode" - else gr.update(visible=True) - ), - operation, - batch_input_code_tab, - ) - operation.change( - lambda x: ( - gr.update(visible=True) - if x == "Interactive mode" - else gr.update(visible=False) - ), - operation, - interactive_input_code_tab, - ) - - # change the output tabs based on the operation mode - operation.change( - lambda x: ( - gr.update(visible=False) - if x == "Interactive mode" - else gr.update(visible=True) - ), - operation, - batch_output_tab, - ) - operation.change( - lambda x: ( - gr.update(visible=True) - if x == "Interactive mode" - else gr.update(visible=False) - ), - operation, - interactive_output_tab, - ) - -# for local dev -try: - if os.environ["LOCALE"] == "local_dev": - demo.queue().launch() -except KeyError: - pass - -# this is necessary to get the app to run on databricks -if __name__ == "__main__": - demo.queue().launch( - server_name=os.getenv("GRADIO_SERVER_NAME"), - server_port=int(os.getenv("GRADIO_SERVER_PORT")), - ) diff --git a/sql_migration_assistant/gradio_app_backup.py b/sql_migration_assistant/gradio_app_backup.py deleted file mode 100644 index 10407adb..00000000 --- a/sql_migration_assistant/gradio_app_backup.py +++ /dev/null @@ -1,298 +0,0 @@ -import os -from databricks.labs.lsql.core import StatementExecutionExt -from databricks.sdk import WorkspaceClient -import gradio as gr - -from app.llm import LLMCalls -from app.similar_code import SimilarCode -import logging # For printing translation attempts in console (debugging) - -# Setting up logger -logging.basicConfig -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - - -# # personal access token necessary for authenticating API requests. Stored using a secret - -FOUNDATION_MODEL_NAME = os.environ.get("SERVED_FOUNDATION_MODEL_NAME") -MAX_TOKENS = os.environ.get("MAX_TOKENS") -SQL_WAREHOUSE_ID = os.environ.get("DATABRICKS_WAREHOUSE_ID") -VECTOR_SEARCH_ENDPOINT_NAME = os.environ.get("VECTOR_SEARCH_ENDPOINT_NAME") -VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME") -CODE_INTENT_TABLE_NAME = os.environ.get("CODE_INTENT_TABLE_NAME") -CATALOG = os.environ.get("CATALOG") -SCHEMA = os.environ.get("SCHEMA") - -w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") - -see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID) -translation_llm = LLMCalls( - foundation_llm_name=FOUNDATION_MODEL_NAME, max_tokens=MAX_TOKENS -) -intent_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME, max_tokens=MAX_TOKENS) -similar_code_helper = SimilarCode( - workspace_client=w, - see=see, - catalog=CATALOG, - schema=SCHEMA, - code_intent_table_name=CODE_INTENT_TABLE_NAME, - VS_index_name=VS_INDEX_NAME, - VS_endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME, -) - -################################################################################ -################################################################################ - -# this is the app UI. it uses gradio blocks https://www.gradio.app/docs/gradio/blocks -# each gr.{} call adds a new element to UI, top to bottom. -with gr.Blocks(theme=gr.themes.Soft()) as demo: - # title with Databricks image - gr.Markdown( - """logo - -## A migration assistant for explaining the intent of SQL code and conversion to Spark SQL - -#### This demo relies on the tables and columns referenced in the SQL query being present in Unity Catalogue and having their table comments and column comments populated. For the purpose of the demo, this was generated using the Databricks AI Generated Comments tool. - -""" - ) - - ################################################################################ - #### TRANSLATION ADVANCED OPTIONS PANE - ################################################################################ - with gr.Accordion(label="Translation Advanced Settings", open=False): - with gr.Row(): - transation_system_prompt = gr.Textbox( - label="Instructions for the LLM translation tool.", - value=""" - You are an expert in multiple SQL dialects. You only reply with SQL code and with no other text. - Your purpose is to translate the given SQL query to Databricks Spark SQL. - You must follow these rules: - - You must keep all original catalog, schema, table, and field names. - - Convert all dates to dd-MMM-yyyy format using the date_format() function. - - Subqueries must end with a semicolon. - - Ensure queries do not have # or @ symbols. - - ONLY if the original query uses temporary tables (e.g. "INTO #temptable"), re-write these as either CREATE OR REPLACE TEMPORARY VIEW or CTEs. . - - Square brackets must be replaced with backticks. - - Custom field names should be surrounded by backticks. - - Ensure queries do not have # or @ symbols. - - Only if the original query contains DECLARE and SET statements, re-write them according to the following format: - DECLARE VARIABLE variable TYPE DEFAULT value; For example: DECLARE VARIABLE number INT DEFAULT 9; - SET VAR variable = value; For example: SET VAR number = 9; - - Write an initial draft of the translated query. Then double check the output for common mistakes, including: - - Using NOT IN with NULL values - - Using UNION when UNION ALL should have been used - - Using BETWEEN for exclusive ranges - - Data type mismatch in predicates - - Properly quoting identifiers - - Using the correct number of arguments for functions - - Casting to the correct data type - - Using the proper columns for joins - - Return the final translated query only. Include comments. Include only SQL. - """.strip(), - lines=40, - ) - - ################################################################################ - #### TRANSLATION PANE - ################################################################################ - # subheader - - with gr.Accordion(label="Translation Pane", open=True): - gr.Markdown( - """ ### Input your T-SQL code here for automatic translation to Spark-SQL and use AI to generate a statement of intent for the code's purpose.""" - ) - # hidden chat interface - to enable chatbot functionality - translation_chat = gr.Chatbot(visible=False) - with gr.Row(): - with gr.Column(): - gr.Markdown( - """ ### Input your T-SQL code here for translation to Spark-SQL.""" - ) - - # input box for SQL code with nice formatting - input_code = gr.Code( - label="Input SQL", - language="sql-msSQL", - value="""SELECT - c.[country_name], - AVG([dep_count]) AS average_dependents -FROM - ( - SELECT - e.[employee_id] - ,e.[department_id] - ,COUNT(d.[dependent_id]) AS dep_count - FROM - [robert_whiffin].[code_assistant].[employees] e - LEFT JOIN [robert_whiffin].[code_assistant].[dependents] d ON e.[employee_id] = d.[employee_id] - GROUP BY - e.[employee_id] - ,e.[department_id] - ) AS subquery - JOIN [robert_whiffin].[code_assistant].[departments] dep ON subquery.[department_id] = dep.[department_id] - JOIN [robert_whiffin].[code_assistant].[locations] l ON dep.[location_id] = l.[location_id] - JOIN [robert_whiffin].[code_assistant].[countries] c ON l.[country_id] = c.[country_id] -GROUP BY - c.[country_name] -ORDER BY - c.[country_name]""", - ) - # a button labelled translate - translate_button = gr.Button("Translate") - - with gr.Column(): - # divider subheader - gr.Markdown(""" ### Your Code Translated to Spark-SQL""") - # output box of the T-SQL translated to Spark SQL - translated = gr.Code( - label="Your code translated to Spark SQL", language="sql-sparkSQL" - ) - translation_prompt = gr.Textbox(label="Adjustments for translation") - - def translate_respond(system_prompt, message, chat_history): - bot_message = translation_llm.llm_chat(system_prompt, message, chat_history) - chat_history.append([message, bot_message]) - return chat_history, chat_history[-1][1] - - # helper function to take the output from llm_translate and return outputs for chatbox and textbox - # chatbox input is a list of lists, each list is a message from the user and the response from the LLM - # textbox input is a string - def llm_translate_wrapper(system_prompt, input_code): - # call the LLM to translate the code - translated_code = translation_llm.llm_translate(system_prompt, input_code) - # wrap the translated code in a list of lists for the chatbot - chat_history = [[input_code, translated_code]] - return chat_history, translated_code - - # reset hidden chat history and prompt - translate_button.click( - fn=lambda: ([["", ""]], ""), - inputs=None, - outputs=[translation_chat, translation_prompt], - ) - # do translation - translate_button.click( - fn=llm_translate_wrapper, - inputs=[transation_system_prompt, input_code], - outputs=[translation_chat, translated], - ) - # refine translation - translation_prompt.submit( - fn=translate_respond, - inputs=[transation_system_prompt, translation_prompt, translation_chat], - outputs=[translation_chat, translated], - ) - - ################################################################################ - #### AI GENERATED INTENT PANE - ################################################################################ - # divider subheader - with gr.Accordion(label="Advanced Intent Settings", open=False): - gr.Markdown( - """ ### Advanced settings for the generating the intent of the input code.""" - ) - with gr.Row(): - intent_system_prompt = gr.Textbox( - label="System prompt of the LLM to generate the intent. Editing will reset the intent.", - value="""Your job is to explain intent of the provided SQL code. - """.strip(), - ) - with gr.Accordion(label="Intent Pane", open=True): - gr.Markdown( - """ ## AI generated intent of what your code aims to do. - - Intent is determined by an LLM which uses the code and table & column metadata. - - ***If the intent is incorrect, please edit***. Once you are happy that the description is correct, please click the button below to save the intent. - - """ - ) - # a box to give the LLM generated intent of the code. This is editable as well. - explain_button = gr.Button("Explain code intent using AI.") - explained = gr.Textbox(label="AI generated intent of your code.", visible=False) - - chatbot = gr.Chatbot(label="AI Chatbot for Intent Extraction", height="70%") - - msg = gr.Textbox(label="Instruction") - clear = gr.ClearButton([msg, chatbot]) - - def intent_respond(system_prompt, message, chat_history): - bot_message = intent_llm.llm_chat(system_prompt, message, chat_history) - chat_history.append([message, bot_message]) - return chat_history, "", bot_message - - def llm_chat_wrapper(system_prompt, input_code): - # call the LLM to translate the code - intent = intent_llm.llm_intent(system_prompt, input_code) - # wrap the translated code in a list of lists for the chatbot - chat_history = [[input_code, intent]] - return chat_history, "", intent - - explain_button.click( - fn=llm_chat_wrapper, - inputs=[intent_system_prompt, input_code], - outputs=[chatbot, msg, explained], - ) - msg.submit( - fn=intent_respond, - inputs=[intent_system_prompt, msg, chatbot], - outputs=[chatbot, msg, explained], - ) - clear.click(lambda: None, None, chatbot, queue=False) - - ################################################################################ - #### SIMILAR CODE PANE - ################################################################################ - # divider subheader - - with gr.Accordion(label="Similar Code Pane", open=True): - gr.Markdown( - """ ## Similar code - - This code is thought to be similar to what you are doing, based on comparing the intent of your code with the intent of this code. - """ - ) - # a button - find_similar_code = gr.Button("Find similar code") - # a row with an code and text box to show the similar code - with gr.Row(): - similar_code = gr.Code( - label="Similar code to yours.", language="sql-sparkSQL" - ) - similar_intent = gr.Textbox(label="The similar codes intent.") - - # a button - submit = gr.Button("Save code and intent") - - # assign actions to buttons when clicked. - find_similar_code.click( - fn=similar_code_helper.get_similar_code, - inputs=chatbot, - outputs=[similar_code, similar_intent], - ) - - def save_intent_wrapper(input_code, explained): - gr.Info("Saving intent") - similar_code_helper.save_intent(input_code, explained) - gr.Info("Intent saved") - - submit.click(save_intent_wrapper, inputs=[input_code, explained]) - - -# for local dev -try: - if os.environ["LOCALE"] == "local_dev": - demo.queue().launch() -except KeyError: - pass - -# this is necessary to get the app to run on databricks -if __name__ == "__main__": - demo.queue().launch( - server_name=os.getenv("GRADIO_SERVER_NAME"), - server_port=int(os.getenv("GRADIO_SERVER_PORT")), - ) diff --git a/sql_migration_assistant/main.py b/sql_migration_assistant/main.py new file mode 100644 index 00000000..9388b734 --- /dev/null +++ b/sql_migration_assistant/main.py @@ -0,0 +1,13 @@ + +from sql_migration_assistant.frontend.GradioFrontend import GradioFrontend +import os + +def main(): + frontend = GradioFrontend() + frontend.app.queue().launch( + server_name=os.getenv("GRADIO_SERVER_NAME", "localhost"), + server_port=int(os.getenv("GRADIO_SERVER_PORT", 3001)), + ) + +if __name__ == "__main__": + main() \ No newline at end of file From cea94f1e1895e2cc0260e401dc3c0f8125bae5df Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 5 Nov 2024 22:50:42 +0100 Subject: [PATCH 02/19] Unifies all imports to sql_migration_assistant. --- sql_migration_assistant/frontend/callbacks.py | 6 +- sql_migration_assistant/tests/test_llm.py | 2 +- .../tests/test_similar_code.py | 2 +- .../tests/test_sql_interface.py | 58 ------------------- sql_migration_assistant/utils/initialsetup.py | 2 +- .../utils/runindatabricks.py | 6 +- 6 files changed, 9 insertions(+), 67 deletions(-) delete mode 100644 sql_migration_assistant/tests/test_sql_interface.py diff --git a/sql_migration_assistant/frontend/callbacks.py b/sql_migration_assistant/frontend/callbacks.py index 7e1d9568..46404828 100644 --- a/sql_migration_assistant/frontend/callbacks.py +++ b/sql_migration_assistant/frontend/callbacks.py @@ -4,9 +4,9 @@ import os import gradio as gr -from app.llm import LLMCalls -from app.similar_code import SimilarCode -from config import ( +from sql_migration_assistant.app.llm import LLMCalls +from sql_migration_assistant.app.similar_code import SimilarCode +from sql_migration_assistant.config import ( FOUNDATION_MODEL_NAME, SQL_WAREHOUSE_ID, CATALOG, diff --git a/sql_migration_assistant/tests/test_llm.py b/sql_migration_assistant/tests/test_llm.py index 1d6dc497..ebb613ea 100644 --- a/sql_migration_assistant/tests/test_llm.py +++ b/sql_migration_assistant/tests/test_llm.py @@ -1,6 +1,6 @@ import unittest from unittest.mock import patch, MagicMock -from app.llm import LLMCalls +from sql_migration_assistant.app.llm import LLMCalls class TestLLMCalls(unittest.TestCase): diff --git a/sql_migration_assistant/tests/test_similar_code.py b/sql_migration_assistant/tests/test_similar_code.py index b34c266d..7c8d84dd 100644 --- a/sql_migration_assistant/tests/test_similar_code.py +++ b/sql_migration_assistant/tests/test_similar_code.py @@ -1,6 +1,6 @@ import unittest from unittest.mock import MagicMock, patch -from app.similar_code import ( +from sql_migration_assistant.app.similar_code import ( SimilarCode, ) # replace 'your_module' with the actual name of your module diff --git a/sql_migration_assistant/tests/test_sql_interface.py b/sql_migration_assistant/tests/test_sql_interface.py deleted file mode 100644 index 94f05388..00000000 --- a/sql_migration_assistant/tests/test_sql_interface.py +++ /dev/null @@ -1,58 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch -from app.sql_interface import ( - SQLInterface, -) # replace 'your_module' with the actual name of your module - - -class TestSQLInterface(unittest.TestCase): - """ - Unit test class for testing the SQLInterface class. - """ - - @patch("app.sql_interface.sql.connect") - def setUp(self, mock_sql_connect): - """ - Sets up the test case by initializing an instance of SQLInterface with mock dependencies. - - Mocking the sql.connect method to isolate the functionality of SQLInterface from external dependencies. - """ - # Mock the connection and cursor - self.mock_connection = MagicMock() - self.mock_cursor = MagicMock() - self.mock_connection.cursor.return_value = self.mock_cursor - mock_sql_connect.return_value = self.mock_connection - - # Initialize the SQLInterface instance with mock parameters - self.sql_interface = SQLInterface( - databricks_host="test_host", - databricks_token="test_token", - sql_warehouse_http_path="test_http_path", - ) - - def test_execute_sql(self): - """ - Tests the execute_sql method of SQLInterface class. - - This test ensures that the SQL statement is executed and the fetched results are returned correctly. - """ - # Mock the execute and fetchall methods - self.mock_cursor.execute.return_value = None - self.mock_cursor.fetchall.return_value = [("result1",), ("result2",)] - - # SQL statement to test - sql_statement = "SELECT * FROM test_table" - - # Call the method to test - results = self.sql_interface.execute_sql(self.mock_cursor, sql_statement) - - # Assert that the execute method was called with the correct SQL statement - self.mock_cursor.execute.assert_called_once_with(sql_statement) - - # Assert that fetchall method was called and returned the expected results - self.mock_cursor.fetchall.assert_called_once() - self.assertEqual(results, [("result1",), ("result2",)]) - - -if __name__ == "__main__": - unittest.main() diff --git a/sql_migration_assistant/utils/initialsetup.py b/sql_migration_assistant/utils/initialsetup.py index 629a4202..8383aa0b 100644 --- a/sql_migration_assistant/utils/initialsetup.py +++ b/sql_migration_assistant/utils/initialsetup.py @@ -174,7 +174,7 @@ def upload_files(self, w, path): "jobs/silver_to_gold.py", "app/llm.py", "app/similar_code.py", - "gradio_app.py", + "main.py", "run_app_from_databricks_notebook.py", "config.yml", ] diff --git a/sql_migration_assistant/utils/runindatabricks.py b/sql_migration_assistant/utils/runindatabricks.py index 795a159c..40973e97 100644 --- a/sql_migration_assistant/utils/runindatabricks.py +++ b/sql_migration_assistant/utils/runindatabricks.py @@ -1,6 +1,6 @@ # this is only run from within databricks, hence the import doesn't work in IDE -from utils.configloader import ConfigLoader -from utils.run_review_app import RunReviewApp +from sql_migration_assistant.utils.configloader import ConfigLoader +from sql_migration_assistant.utils.run_review_app import RunReviewApp from dbtunnel import dbtunnel from databricks.sdk import WorkspaceClient from databricks.sdk.runtime import * @@ -12,7 +12,7 @@ def thread_func(): cl = ConfigLoader() cl.read_yaml_to_env("config.yml") dbtunnel.kill_port(8080) - app = "gradio_app.py" + app = "main.py" dbtunnel.gradio(path=app).run() From f4820edb0c2eee3f5ec03e3e4933ad710baceb2a Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 5 Nov 2024 22:54:43 +0100 Subject: [PATCH 03/19] Optimizes Imports and reformats files --- sql_migration_assistant/README.md | 27 ++++---- sql_migration_assistant/__init__.py | 10 +-- sql_migration_assistant/app/similar_code.py | 18 +++--- sql_migration_assistant/config.py | 3 +- sql_migration_assistant/docs/conf.py | 1 - .../frontend/GradioFrontend.py | 4 +- sql_migration_assistant/frontend/callbacks.py | 8 ++- .../infra/app_serving_cluster_infra.py | 6 +- sql_migration_assistant/infra/chat_infra.py | 4 +- sql_migration_assistant/infra/jobs_infra.py | 12 ++-- sql_migration_assistant/infra/model_def.py | 61 +++++++++---------- .../infra/secrets_infra.py | 2 +- .../infra/sql_warehouse_infra.py | 7 +-- .../infra/unity_catalog_infra.py | 23 ++++--- .../infra/vector_search_infra.py | 14 ++--- .../jobs/bronze_to_silver.py | 8 --- sql_migration_assistant/jobs/call_agents.py | 13 ++-- .../jobs/silver_to_gold.py | 5 +- sql_migration_assistant/main.py | 6 +- .../run_app_from_databricks_notebook.py | 28 +++++++-- sql_migration_assistant/tests/test_llm.py | 1 + .../tests/test_similar_code.py | 1 + sql_migration_assistant/utils/configloader.py | 5 +- sql_migration_assistant/utils/initialsetup.py | 21 ++++--- .../utils/run_review_app.py | 8 +-- .../utils/runindatabricks.py | 12 ++-- .../utils/uc_model_version.py | 14 ++--- .../utils/upload_files_to_workspace.py | 10 +-- 28 files changed, 171 insertions(+), 161 deletions(-) diff --git a/sql_migration_assistant/README.md b/sql_migration_assistant/README.md index 6e3ac747..14db0732 100644 --- a/sql_migration_assistant/README.md +++ b/sql_migration_assistant/README.md @@ -15,34 +15,33 @@ tags: # Project Legion - SQL Migration Assistant Legion is a Databricks field project to accelerate migrations on to Databricks leveraging the platform’s generative AI -capabilities. It uses an LLM for code conversion and intent summarisation, presented to users in a front end web +capabilities. It uses an LLM for code conversion and intent summarisation, presented to users in a front end web application. -Legion provides a chatbot interface to users for translating input code (for example T-SQL to Databricks SQL) and +Legion provides a chatbot interface to users for translating input code (for example T-SQL to Databricks SQL) and summarising the intent and business purpose of the code. This intent is then embedded for serving in a Vector Search index for finding similar pieces of code. This presents an opportunity for increased collaboration (find out who is -working on similar projects), rationalisation (identify duplicates based on intent) and discoverability (semantic search). +working on similar projects), rationalisation (identify duplicates based on intent) and discoverability (semantic +search). -Legion is a solution accelerator - it is *not* a fully baked solution. This is something for you the customer to take -on and own. This allows you to present a project to upskill your employees, leverage GenAI for a real use case, +Legion is a solution accelerator - it is *not* a fully baked solution. This is something for you the customer to take +on and own. This allows you to present a project to upskill your employees, leverage GenAI for a real use case, customise the application to their needs and entirely own the IP. ## Installation Videos - https://github.com/user-attachments/assets/e665bcf4-265f-4a47-81eb-60845a72c798 https://github.com/user-attachments/assets/fa622f96-a78c-40b8-9eb9-f6671c4d7b47 https://github.com/user-attachments/assets/1a58a1b5-2dcf-4624-b93f-214735162584 - - Setting Legion up is a simple and automated process. Ensure you have the [Databricks CLI] (https://docs.databricks.com/en/dev-tools/cli/index.html) installed and configured with the correct workspace. -Once the Databricks CLI has been installed and configured, run the following command to install the Databricks Labs +Once the Databricks CLI has been installed and configured, run the following command to install the Databricks Labs Sandbox and the SQL Migration Assistant. + ```bash databricks labs install sandbox && databricks labs sandbox sql-migration-assistant ``` @@ -50,10 +49,10 @@ databricks labs install sandbox && databricks labs sandbox sql-migration-assista ### What Legion needs - during setup above you will create or choose existing resources for the following: - A no-isolation shared cluster to host the front end application. -- A catalog and schema in Unity Catalog. +- A catalog and schema in Unity Catalog. - A table to store the code intent statements and their embeddings. -- A vector search endpoint and an embedding model: see docs -https://docs.databricks.com/en/generative-ai/vector-search.html#how-to-set-up-vector-search -- A chat LLM. Pay Per Token is recomended where available, but the set up will also allow for creation of -a provisioned throughput endpoint. +- A vector search endpoint and an embedding model: see docs + https://docs.databricks.com/en/generative-ai/vector-search.html#how-to-set-up-vector-search +- A chat LLM. Pay Per Token is recomended where available, but the set up will also allow for creation of + a provisioned throughput endpoint. - A PAT stored in a secret scope chosen by you, under the key `sql-migration-pat`. diff --git a/sql_migration_assistant/__init__.py b/sql_migration_assistant/__init__.py index c0d899ca..9f84043e 100644 --- a/sql_migration_assistant/__init__.py +++ b/sql_migration_assistant/__init__.py @@ -1,9 +1,11 @@ -from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant -from databricks.sdk import WorkspaceClient -from databricks.labs.blueprint.tui import Prompts -import yaml from pathlib import Path +import yaml +from databricks.labs.blueprint.tui import Prompts +from databricks.sdk import WorkspaceClient + +from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant + def hello(): w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") diff --git a/sql_migration_assistant/app/similar_code.py b/sql_migration_assistant/app/similar_code.py index 45c33887..9ea6effa 100644 --- a/sql_migration_assistant/app/similar_code.py +++ b/sql_migration_assistant/app/similar_code.py @@ -1,18 +1,18 @@ -from databricks.sdk import WorkspaceClient from databricks.labs.lsql.core import StatementExecutionExt +from databricks.sdk import WorkspaceClient class SimilarCode: def __init__( - self, - workspace_client: WorkspaceClient, - see: StatementExecutionExt, - catalog, - schema, - code_intent_table_name, - VS_index_name, - VS_endpoint_name, + self, + workspace_client: WorkspaceClient, + see: StatementExecutionExt, + catalog, + schema, + code_intent_table_name, + VS_index_name, + VS_endpoint_name, ): self.w = workspace_client self.see = see diff --git a/sql_migration_assistant/config.py b/sql_migration_assistant/config.py index fac4b2ea..adca8219 100644 --- a/sql_migration_assistant/config.py +++ b/sql_migration_assistant/config.py @@ -1,6 +1,5 @@ import os - FOUNDATION_MODEL_NAME = os.environ.get("SERVED_FOUNDATION_MODEL_NAME") SQL_WAREHOUSE_ID = os.environ.get("DATABRICKS_WAREHOUSE_ID") VECTOR_SEARCH_ENDPOINT_NAME = os.environ.get("VECTOR_SEARCH_ENDPOINT_NAME") @@ -12,4 +11,4 @@ DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST") TRANSFORMATION_JOB_ID = os.environ.get("TRANSFORMATION_JOB_ID") WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION") -VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH") \ No newline at end of file +VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH") diff --git a/sql_migration_assistant/docs/conf.py b/sql_migration_assistant/docs/conf.py index e338c42a..de230214 100644 --- a/sql_migration_assistant/docs/conf.py +++ b/sql_migration_assistant/docs/conf.py @@ -13,7 +13,6 @@ import os import sys - sys.path.insert(0, os.path.abspath("../../python")) sys.path.append(os.path.abspath("./_theme")) # -- Project information ----------------------------------------------------- diff --git a/sql_migration_assistant/frontend/GradioFrontend.py b/sql_migration_assistant/frontend/GradioFrontend.py index 4bb91999..26fc1bdc 100644 --- a/sql_migration_assistant/frontend/GradioFrontend.py +++ b/sql_migration_assistant/frontend/GradioFrontend.py @@ -1,12 +1,12 @@ import gradio as gr from sql_migration_assistant.frontend.Tabs.BatchInputCodeTab import BatchInputCodeTab -from sql_migration_assistant.frontend.Tabs.CodeExplanationTab import CodeExplanationTab from sql_migration_assistant.frontend.Tabs.BatchOutputTab import BatchOutputTab +from sql_migration_assistant.frontend.Tabs.CodeExplanationTab import CodeExplanationTab from sql_migration_assistant.frontend.Tabs.InteractiveInputCodeTab import InteractiveInputCodeTab +from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import InteractiveOutputTab from sql_migration_assistant.frontend.Tabs.SimilarCodeTab import SimilarCodeTab from sql_migration_assistant.frontend.Tabs.TranslationTab import TranslationTab -from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import InteractiveOutputTab from sql_migration_assistant.frontend.callbacks import ( read_code_file, produce_preview, diff --git a/sql_migration_assistant/frontend/callbacks.py b/sql_migration_assistant/frontend/callbacks.py index 46404828..c0321b02 100644 --- a/sql_migration_assistant/frontend/callbacks.py +++ b/sql_migration_assistant/frontend/callbacks.py @@ -4,6 +4,10 @@ import os import gradio as gr +from databricks.labs.lsql.core import StatementExecutionExt +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.workspace import ImportFormat, Language + from sql_migration_assistant.app.llm import LLMCalls from sql_migration_assistant.app.similar_code import SimilarCode from sql_migration_assistant.config import ( @@ -18,9 +22,6 @@ TRANSFORMATION_JOB_ID, WORKSPACE_LOCATION, VOLUME_NAME, ) -from databricks.labs.lsql.core import StatementExecutionExt -from databricks.sdk import WorkspaceClient -from databricks.sdk.service.workspace import ImportFormat, Language w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID) @@ -46,6 +47,7 @@ def list_files(path_to_volume): def make_status_box_visible(): return gr.Markdown(label="Job Run Status Page", visible=True) + def read_code_file(volume_path, file_name): file_name = os.path.join(volume_path, file_name) file = w.files.download(file_name) diff --git a/sql_migration_assistant/infra/app_serving_cluster_infra.py b/sql_migration_assistant/infra/app_serving_cluster_infra.py index 60d18ea5..1d9c978e 100644 --- a/sql_migration_assistant/infra/app_serving_cluster_infra.py +++ b/sql_migration_assistant/infra/app_serving_cluster_infra.py @@ -1,8 +1,8 @@ +import logging + +from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient from databricks.sdk.service.compute import ListClustersFilterBy, State, DataSecurityMode -from databricks.labs.blueprint.tui import Prompts - -import logging class AppServingClusterInfra: diff --git a/sql_migration_assistant/infra/chat_infra.py b/sql_migration_assistant/infra/chat_infra.py index 0b8f29f1..b30da579 100644 --- a/sql_migration_assistant/infra/chat_infra.py +++ b/sql_migration_assistant/infra/chat_infra.py @@ -1,8 +1,8 @@ -from databricks.sdk import WorkspaceClient +import logging from databricks.labs.blueprint.tui import Prompts +from databricks.sdk import WorkspaceClient -import logging from sql_migration_assistant.utils.uc_model_version import get_latest_model_version diff --git a/sql_migration_assistant/infra/jobs_infra.py b/sql_migration_assistant/infra/jobs_infra.py index 6ec43968..6713362e 100644 --- a/sql_migration_assistant/infra/jobs_infra.py +++ b/sql_migration_assistant/infra/jobs_infra.py @@ -1,7 +1,5 @@ from databricks.sdk import WorkspaceClient -from databricks.sdk.errors.platform import BadRequest -from databricks.labs.blueprint.tui import Prompts -from databricks.labs.lsql.core import StatementExecutionExt +from databricks.sdk.service import compute from databricks.sdk.service.compute import DataSecurityMode from databricks.sdk.service.jobs import ( Task, @@ -11,8 +9,6 @@ JobCluster, JobParameterDefinition, ) -from databricks.sdk.service import jobs, compute -import os """ Approach @@ -28,9 +24,9 @@ class JobsInfra: def __init__( - self, - config, - workspace_client: WorkspaceClient, + self, + config, + workspace_client: WorkspaceClient, ): self.w = workspace_client self.config = config diff --git a/sql_migration_assistant/infra/model_def.py b/sql_migration_assistant/infra/model_def.py index c263fa87..bfe5eb1e 100644 --- a/sql_migration_assistant/infra/model_def.py +++ b/sql_migration_assistant/infra/model_def.py @@ -1,18 +1,17 @@ +import os +from operator import itemgetter + +import mlflow from langchain_community.chat_models import ChatDatabricks -from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough +from langchain_core.messages import HumanMessage, AIMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ( ChatPromptTemplate, MessagesPlaceholder, ) -from langchain_core.messages import HumanMessage, AIMessage - -import mlflow +from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough from mlflow.tracking import MlflowClient -import os -from operator import itemgetter - def create_langchain_chat_model(): # ## Enable MLflow Tracing @@ -123,15 +122,15 @@ def format_chat_history_for_prompt(chat_messages_array): # RAG Chain ############ is_question_about_sql_chain = ( - { - "question": itemgetter("messages") - | RunnableLambda(extract_user_query_string), - "formatted_chat_history": itemgetter("messages") - | RunnableLambda(extract_chat_history), - } - | is_question_relevant_prompt - | model - | StrOutputParser() + { + "question": itemgetter("messages") + | RunnableLambda(extract_user_query_string), + "formatted_chat_history": itemgetter("messages") + | RunnableLambda(extract_chat_history), + } + | is_question_relevant_prompt + | model + | StrOutputParser() ) irrelevant_question_chain = RunnableLambda( @@ -155,15 +154,15 @@ def format_chat_history_for_prompt(chat_messages_array): ) chain = ( - RunnablePassthrough() - | { - "system": itemgetter("system"), - "question": itemgetter("question"), - "formatted_chat_history": itemgetter("chat_history"), - } - | prompt - | model - | StrOutputParser() + RunnablePassthrough() + | { + "system": itemgetter("system"), + "question": itemgetter("question"), + "formatted_chat_history": itemgetter("chat_history"), + } + | prompt + | model + | StrOutputParser() ) branch_node = RunnableBranch( @@ -179,12 +178,12 @@ def format_chat_history_for_prompt(chat_messages_array): ) full_chain = { - "question_is_relevant": is_question_about_sql_chain, - "question": itemgetter("messages") | RunnableLambda(extract_user_query_string), - "system": itemgetter("messages") | RunnableLambda(extract_system_prompt_string), - "chat_history": itemgetter("messages") - | RunnableLambda(format_chat_history_for_prompt), - } | branch_node + "question_is_relevant": is_question_about_sql_chain, + "question": itemgetter("messages") | RunnableLambda(extract_user_query_string), + "system": itemgetter("messages") | RunnableLambda(extract_system_prompt_string), + "chat_history": itemgetter("messages") + | RunnableLambda(format_chat_history_for_prompt), + } | branch_node mlflow.models.set_model(model=full_chain) diff --git a/sql_migration_assistant/infra/secrets_infra.py b/sql_migration_assistant/infra/secrets_infra.py index bb49b0af..30f86434 100644 --- a/sql_migration_assistant/infra/secrets_infra.py +++ b/sql_migration_assistant/infra/secrets_infra.py @@ -1,7 +1,7 @@ import logging -from databricks.sdk import WorkspaceClient from databricks.labs.blueprint.tui import Prompts +from databricks.sdk import WorkspaceClient class SecretsInfra: diff --git a/sql_migration_assistant/infra/sql_warehouse_infra.py b/sql_migration_assistant/infra/sql_warehouse_infra.py index 0c090f2c..9727c3e6 100644 --- a/sql_migration_assistant/infra/sql_warehouse_infra.py +++ b/sql_migration_assistant/infra/sql_warehouse_infra.py @@ -1,9 +1,8 @@ -from databricks.sdk import WorkspaceClient -from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType +import logging from databricks.labs.blueprint.tui import Prompts - -import logging +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType class SqlWarehouseInfra: diff --git a/sql_migration_assistant/infra/unity_catalog_infra.py b/sql_migration_assistant/infra/unity_catalog_infra.py index 520e4f69..526801b6 100644 --- a/sql_migration_assistant/infra/unity_catalog_infra.py +++ b/sql_migration_assistant/infra/unity_catalog_infra.py @@ -1,12 +1,11 @@ import logging -from databricks.sdk import WorkspaceClient -from databricks.sdk.errors.platform import BadRequest from databricks.labs.blueprint.tui import Prompts from databricks.labs.lsql.core import StatementExecutionExt -from databricks.sdk.service.catalog import VolumeType +from databricks.sdk import WorkspaceClient from databricks.sdk.errors import PermissionDenied -import os +from databricks.sdk.errors.platform import BadRequest +from databricks.sdk.service.catalog import VolumeType """ Approach @@ -22,11 +21,11 @@ class UnityCatalogInfra: def __init__( - self, - config, - workspace_client: WorkspaceClient, - p: Prompts, - see: StatementExecutionExt, + self, + config, + workspace_client: WorkspaceClient, + p: Prompts, + see: StatementExecutionExt, ): self.w = workspace_client self.config = config @@ -129,9 +128,9 @@ def create_tables(self): _ = self.see.execute( statement=f"CREATE TABLE IF NOT EXISTS " - f"`{table_name}`" - f" (id BIGINT, code STRING, intent STRING) " - f"TBLPROPERTIES (delta.enableChangeDataFeed = true)", + f"`{table_name}`" + f" (id BIGINT, code STRING, intent STRING) " + f"TBLPROPERTIES (delta.enableChangeDataFeed = true)", catalog=self.migration_assistant_UC_catalog, schema=self.migration_assistant_UC_schema, ) diff --git a/sql_migration_assistant/infra/vector_search_infra.py b/sql_migration_assistant/infra/vector_search_infra.py index 4ef8e7a0..b5f5705c 100644 --- a/sql_migration_assistant/infra/vector_search_infra.py +++ b/sql_migration_assistant/infra/vector_search_infra.py @@ -1,4 +1,9 @@ +import logging +import time + +from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient +from databricks.sdk.errors.platform import ResourceAlreadyExists, NotFound from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput from databricks.sdk.service.vectorsearch import ( EndpointType, @@ -7,13 +12,8 @@ EmbeddingSourceColumn, VectorIndexType, ) -from databricks.sdk.errors.platform import ResourceAlreadyExists, NotFound - -from databricks.labs.blueprint.tui import Prompts -import logging from sql_migration_assistant.utils.uc_model_version import get_latest_model_version -import time class VectorSearchInfra: @@ -177,8 +177,8 @@ def create_VS_index(self): ) except NotFound as e: if ( - f"Vector search endpoint {self.migration_assistant_VS_endpoint} not found" - in str(e) + f"Vector search endpoint {self.migration_assistant_VS_endpoint} not found" + in str(e) ): logging.info( f"Waiting for Vector Search endpoint to provision. Retrying in 30 seconds." diff --git a/sql_migration_assistant/jobs/bronze_to_silver.py b/sql_migration_assistant/jobs/bronze_to_silver.py index d8d97191..1b573a1c 100644 --- a/sql_migration_assistant/jobs/bronze_to_silver.py +++ b/sql_migration_assistant/jobs/bronze_to_silver.py @@ -8,21 +8,16 @@ StringType, MapType, IntegerType, - TimestampType, ) -import pyspark.sql.functions as f -from pyspark.sql.functions import udf, pandas_udf agent_configs = json.loads(dbutils.widgets.get("agent_configs")) app_configs = json.loads(dbutils.widgets.get("app_configs")) - # COMMAND ---------- checkpoint_dir = app_configs["VOLUME_NAME_CHECKPOINT_PATH"] volume_path = app_configs["VOLUME_NAME_INPUT_PATH"] - # COMMAND ---------- bronze_raw_code = f'{app_configs["CATALOG"]}.{app_configs["SCHEMA"]}.bronze_raw_code' @@ -70,7 +65,6 @@ """ ) - silver_llm_responses = ( f'{app_configs["CATALOG"]}.{app_configs["SCHEMA"]}.silver_llm_responses' ) @@ -87,7 +81,6 @@ """ ) - gold_table = ( f'{app_configs["CATALOG"]}.{app_configs["SCHEMA"]}.gold_transformed_notebooks' ) @@ -104,7 +97,6 @@ """ ) - # COMMAND ---------- # DBTITLE 1,convert agent_configs input string to a dataframe diff --git a/sql_migration_assistant/jobs/call_agents.py b/sql_migration_assistant/jobs/call_agents.py index 598b16b7..aba81979 100644 --- a/sql_migration_assistant/jobs/call_agents.py +++ b/sql_migration_assistant/jobs/call_agents.py @@ -1,19 +1,14 @@ # Databricks notebook source +import json + +import pyspark.sql.functions as f from databricks.sdk import WorkspaceClient from databricks.sdk.service.serving import ChatMessage, ChatMessageRole -import json -import os +from pyspark.sql.functions import pandas_udf from pyspark.sql.types import ( - ArrayType, - StructType, - StructField, StringType, MapType, - IntegerType, - TimestampType, ) -import pyspark.sql.functions as f -from pyspark.sql.functions import udf, pandas_udf # COMMAND ---------- diff --git a/sql_migration_assistant/jobs/silver_to_gold.py b/sql_migration_assistant/jobs/silver_to_gold.py index 7228aa06..b57129b6 100644 --- a/sql_migration_assistant/jobs/silver_to_gold.py +++ b/sql_migration_assistant/jobs/silver_to_gold.py @@ -1,10 +1,11 @@ # Databricks notebook source import base64 +import json + from databricks.sdk import WorkspaceClient from databricks.sdk.service.workspace import ImportFormat, Language from pyspark.sql import functions as f from pyspark.sql.types import * -import json # COMMAND ---------- @@ -34,6 +35,7 @@ prompt_id = dbutils.jobs.taskValues.get(taskKey="ingest_to_holding", key="promptID") output_volume_path = app_configs["VOLUME_NAME_OUTPUT_PATH"] + # COMMAND ---------- @@ -110,7 +112,6 @@ def write_notebook_code(llm_responses): gold_df.display() - # COMMAND ---------- temp_table_name = "gold_temp" diff --git a/sql_migration_assistant/main.py b/sql_migration_assistant/main.py index 9388b734..6527e989 100644 --- a/sql_migration_assistant/main.py +++ b/sql_migration_assistant/main.py @@ -1,6 +1,7 @@ +import os from sql_migration_assistant.frontend.GradioFrontend import GradioFrontend -import os + def main(): frontend = GradioFrontend() @@ -9,5 +10,6 @@ def main(): server_port=int(os.getenv("GRADIO_SERVER_PORT", 3001)), ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/sql_migration_assistant/run_app_from_databricks_notebook.py b/sql_migration_assistant/run_app_from_databricks_notebook.py index 5552726b..ec5728c0 100644 --- a/sql_migration_assistant/run_app_from_databricks_notebook.py +++ b/sql_migration_assistant/run_app_from_databricks_notebook.py @@ -7,19 +7,39 @@ # MAGIC If you want to share the app with users outside of Databricks, for example so non technical SMEs can contribute to LLM prompt development, the notebook needs to run on a no isolation shared cluster. # COMMAND ---------- -pip install databricks-sdk -U -q +pip +install +databricks - sdk - U - q # COMMAND ---------- -pip install gradio==4.27.0 pyyaml aiohttp==3.10.5 databricks-labs-blueprint==0.8.2 databricks-labs-lsql==0.9.0 -q +pip +install +gradio == 4.27 +.0 +pyyaml +aiohttp == 3.10 +.5 +databricks - labs - blueprint == 0.8 +.2 +databricks - labs - lsql == 0.9 +.0 - q # COMMAND ---------- -pip install fastapi==0.112.2 pydantic==2.8.2 dbtunnel==0.14.6 -q +pip +install +fastapi == 0.112 +.2 +pydantic == 2.8 +.2 +dbtunnel == 0.14 +.6 - q # COMMAND ---------- dbutils.library.restartPython() # COMMAND ---------- from utils.runindatabricks import run_app + # set debug=True to print the app logs in this cell. # run_app(debug=True) -run_app() \ No newline at end of file +run_app() diff --git a/sql_migration_assistant/tests/test_llm.py b/sql_migration_assistant/tests/test_llm.py index ebb613ea..86278b14 100644 --- a/sql_migration_assistant/tests/test_llm.py +++ b/sql_migration_assistant/tests/test_llm.py @@ -1,5 +1,6 @@ import unittest from unittest.mock import patch, MagicMock + from sql_migration_assistant.app.llm import LLMCalls diff --git a/sql_migration_assistant/tests/test_similar_code.py b/sql_migration_assistant/tests/test_similar_code.py index 7c8d84dd..b8bdcc1b 100644 --- a/sql_migration_assistant/tests/test_similar_code.py +++ b/sql_migration_assistant/tests/test_similar_code.py @@ -1,5 +1,6 @@ import unittest from unittest.mock import MagicMock, patch + from sql_migration_assistant.app.similar_code import ( SimilarCode, ) # replace 'your_module' with the actual name of your module diff --git a/sql_migration_assistant/utils/configloader.py b/sql_migration_assistant/utils/configloader.py index 82f6177e..a3310b45 100644 --- a/sql_migration_assistant/utils/configloader.py +++ b/sql_migration_assistant/utils/configloader.py @@ -1,7 +1,8 @@ -from databricks.sdk import WorkspaceClient -import yaml import os +import yaml +from databricks.sdk import WorkspaceClient + class ConfigLoader: """ diff --git a/sql_migration_assistant/utils/initialsetup.py b/sql_migration_assistant/utils/initialsetup.py index 8383aa0b..62d69f06 100644 --- a/sql_migration_assistant/utils/initialsetup.py +++ b/sql_migration_assistant/utils/initialsetup.py @@ -1,20 +1,21 @@ +import logging +import os + from databricks.labs.lsql.core import StatementExecutionExt from databricks.sdk.errors import ResourceAlreadyExists, BadRequest from databricks.sdk.errors.platform import PermissionDenied -from sql_migration_assistant.infra.sql_warehouse_infra import SqlWarehouseInfra -from sql_migration_assistant.infra.unity_catalog_infra import UnityCatalogInfra -from sql_migration_assistant.infra.vector_search_infra import VectorSearchInfra -from sql_migration_assistant.infra.chat_infra import ChatInfra -from sql_migration_assistant.infra.secrets_infra import SecretsInfra -from sql_migration_assistant.infra.jobs_infra import JobsInfra + from sql_migration_assistant.infra.app_serving_cluster_infra import ( AppServingClusterInfra, ) - -import logging -import os -from sql_migration_assistant.utils.upload_files_to_workspace import FileUploader +from sql_migration_assistant.infra.chat_infra import ChatInfra +from sql_migration_assistant.infra.jobs_infra import JobsInfra +from sql_migration_assistant.infra.secrets_infra import SecretsInfra +from sql_migration_assistant.infra.sql_warehouse_infra import SqlWarehouseInfra +from sql_migration_assistant.infra.unity_catalog_infra import UnityCatalogInfra +from sql_migration_assistant.infra.vector_search_infra import VectorSearchInfra from sql_migration_assistant.utils.run_review_app import RunReviewApp +from sql_migration_assistant.utils.upload_files_to_workspace import FileUploader class SetUpMigrationAssistant: diff --git a/sql_migration_assistant/utils/run_review_app.py b/sql_migration_assistant/utils/run_review_app.py index ae6b8c97..b3b94f19 100644 --- a/sql_migration_assistant/utils/run_review_app.py +++ b/sql_migration_assistant/utils/run_review_app.py @@ -1,10 +1,10 @@ import logging +from urllib.parse import urlparse +from databricks.labs.blueprint.commands import CommandExecutor from databricks.sdk import WorkspaceClient -from databricks.sdk.service import compute from databricks.sdk.mixins.compute import ClustersExt -from databricks.labs.blueprint.commands import CommandExecutor -from urllib.parse import urlparse +from databricks.sdk.service import compute class RunReviewApp: @@ -66,7 +66,7 @@ def _launch_app(self): def _get_proxy_url(self, organisation_id): def get_cloud_proxy_settings( - cloud: str, host: str, org_id: str, cluster_id: str, port: int + cloud: str, host: str, org_id: str, cluster_id: str, port: int ): cloud_norm = cloud.lower() if cloud_norm not in ["aws", "azure"]: diff --git a/sql_migration_assistant/utils/runindatabricks.py b/sql_migration_assistant/utils/runindatabricks.py index 40973e97..37edf76a 100644 --- a/sql_migration_assistant/utils/runindatabricks.py +++ b/sql_migration_assistant/utils/runindatabricks.py @@ -1,11 +1,13 @@ # this is only run from within databricks, hence the import doesn't work in IDE -from sql_migration_assistant.utils.configloader import ConfigLoader -from sql_migration_assistant.utils.run_review_app import RunReviewApp -from dbtunnel import dbtunnel -from databricks.sdk import WorkspaceClient -from databricks.sdk.runtime import * import threading + import yaml +from databricks.sdk import WorkspaceClient +from databricks.sdk.runtime import * +from dbtunnel import dbtunnel + +from sql_migration_assistant.utils.configloader import ConfigLoader +from sql_migration_assistant.utils.run_review_app import RunReviewApp def thread_func(): diff --git a/sql_migration_assistant/utils/uc_model_version.py b/sql_migration_assistant/utils/uc_model_version.py index d9a7ca77..5b0611e6 100644 --- a/sql_migration_assistant/utils/uc_model_version.py +++ b/sql_migration_assistant/utils/uc_model_version.py @@ -8,11 +8,11 @@ def get_latest_model_version(model_name): client = MlflowClient() model_version_infos = client.search_model_versions("name = '%s'" % model_name) return ( - max( - [ - int(model_version_info.version) - for model_version_info in model_version_infos - ] - ) - or 1 + max( + [ + int(model_version_info.version) + for model_version_info in model_version_infos + ] + ) + or 1 ) diff --git a/sql_migration_assistant/utils/upload_files_to_workspace.py b/sql_migration_assistant/utils/upload_files_to_workspace.py index 94e6c392..63633b6f 100644 --- a/sql_migration_assistant/utils/upload_files_to_workspace.py +++ b/sql_migration_assistant/utils/upload_files_to_workspace.py @@ -3,10 +3,10 @@ This uploads the config, runindatabricks.py, and gradio_app_backup.py files to the Databricks workspace. """ +from dataclasses import make_dataclass + from databricks.labs.blueprint.installation import Installation from databricks.sdk import WorkspaceClient -from dataclasses import make_dataclass -import os class FileUploader: @@ -15,9 +15,9 @@ def __init__(self, workspace_client: WorkspaceClient): self.installer = Installation(ws=self.w, product="sql_migration_assistant") def upload( - self, - file_path, - file_name, + self, + file_path, + file_name, ): with open(file_path, "rb") as file: contents = file.read() From 39813af2d759b0812269b092b6afbe9413a6ca65 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Wed, 6 Nov 2024 18:55:54 +0100 Subject: [PATCH 04/19] Makes Module installable. Tested in notebook --- sql_migration_assistant/__init__.py | 22 ------------- .../run_app_from_databricks_notebook.py | 33 +++++-------------- sql_migration_assistant/setup.py | 14 ++++++++ .../{app => src}/__init__.py | 0 .../sql_migration_assistant}/__init__.py | 0 .../sql_migration_assistant}/app/llm.py | 0 .../app/similar_code.py | 0 .../sql_migration_assistant}/config.py | 0 .../frontend/GradioFrontend.py | 0 .../frontend/Tabs/BatchInputCodeTab.py | 0 .../frontend/Tabs/BatchOutputTab.py | 0 .../frontend/Tabs/CodeExplanationTab.py | 0 .../frontend/Tabs/InteractiveInputCodeTab.py | 0 .../frontend/Tabs/InteractiveOutputTab.py | 0 .../frontend/Tabs/SimilarCodeTab.py | 0 .../frontend/Tabs/Tab.py | 0 .../frontend/Tabs/TranslationTab.py | 0 .../frontend/callbacks.py | 0 .../infra/app_serving_cluster_infra.py | 10 +++--- .../infra/chat_infra.py | 0 .../infra/jobs_infra.py | 2 +- .../infra/model_def.py | 0 .../infra/secrets_infra.py | 0 .../infra/sql_warehouse_infra.py | 0 .../infra/unity_catalog_infra.py | 0 .../infra/vector_search_infra.py | 0 .../utils/configloader.py | 0 .../utils/initialsetup.py | 20 ++++++----- .../utils/run_review_app.py | 23 ++++--------- .../utils/runindatabricks.py | 17 ++++++---- .../utils/uc_model_version.py | 0 .../utils/upload_files_to_workspace.py | 0 .../tests/test_similar_code.py | 2 +- 33 files changed, 60 insertions(+), 83 deletions(-) delete mode 100644 sql_migration_assistant/__init__.py create mode 100644 sql_migration_assistant/setup.py rename sql_migration_assistant/{app => src}/__init__.py (100%) rename sql_migration_assistant/{frontend/Tabs => src/sql_migration_assistant}/__init__.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/app/llm.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/app/similar_code.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/config.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/GradioFrontend.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/BatchInputCodeTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/BatchOutputTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/CodeExplanationTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/InteractiveInputCodeTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/InteractiveOutputTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/SimilarCodeTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/Tab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/Tabs/TranslationTab.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/frontend/callbacks.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/app_serving_cluster_infra.py (88%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/chat_infra.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/jobs_infra.py (98%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/model_def.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/secrets_infra.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/sql_warehouse_infra.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/unity_catalog_infra.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/infra/vector_search_infra.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/utils/configloader.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/utils/initialsetup.py (94%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/utils/run_review_app.py (88%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/utils/runindatabricks.py (78%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/utils/uc_model_version.py (100%) rename sql_migration_assistant/{ => src/sql_migration_assistant}/utils/upload_files_to_workspace.py (100%) diff --git a/sql_migration_assistant/__init__.py b/sql_migration_assistant/__init__.py deleted file mode 100644 index 9f84043e..00000000 --- a/sql_migration_assistant/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from pathlib import Path - -import yaml -from databricks.labs.blueprint.tui import Prompts -from databricks.sdk import WorkspaceClient - -from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant - - -def hello(): - w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") - p = Prompts() - setter_upper = SetUpMigrationAssistant() - setter_upper.check_cloud(w) - final_config = setter_upper.setup_migration_assistant(w, p) - current_path = Path(__file__).parent.resolve() - - local_config = str(current_path) + "/config.yml" - with open(local_config, "w") as f: - yaml.dump(final_config, f) - setter_upper.upload_files(w, current_path) - setter_upper.launch_review_app(w, final_config) diff --git a/sql_migration_assistant/run_app_from_databricks_notebook.py b/sql_migration_assistant/run_app_from_databricks_notebook.py index ec5728c0..03b2ed24 100644 --- a/sql_migration_assistant/run_app_from_databricks_notebook.py +++ b/sql_migration_assistant/run_app_from_databricks_notebook.py @@ -7,39 +7,24 @@ # MAGIC If you want to share the app with users outside of Databricks, for example so non technical SMEs can contribute to LLM prompt development, the notebook needs to run on a no isolation shared cluster. # COMMAND ---------- -pip -install -databricks - sdk - U - q +pip install databricks-sdk -U -q # COMMAND ---------- -pip -install -gradio == 4.27 -.0 -pyyaml -aiohttp == 3.10 -.5 -databricks - labs - blueprint == 0.8 -.2 -databricks - labs - lsql == 0.9 -.0 - q +pip install gradio==4.27.0 pyyaml aiohttp==3.10.5 databricks-labs-blueprint==0.8.2 databricks-labs-lsql==0.9.0 -q # COMMAND ---------- -pip -install -fastapi == 0.112 -.2 -pydantic == 2.8 -.2 -dbtunnel == 0.14 -.6 - q +pip install fastapi==0.112.2 pydantic==2.8.2 dbtunnel==0.14.6 -q + +# COMMAND ---------- +pip install . # COMMAND ---------- dbutils.library.restartPython() # COMMAND ---------- -from utils.runindatabricks import run_app + +from sql_migration_assistant.utils.runindatabricks import run_app # set debug=True to print the app logs in this cell. # run_app(debug=True) -run_app() +run_app("/Workspace/Users/sebastian.grunwald@databricks.com/sql_migration_assistant/config.yml") diff --git a/sql_migration_assistant/setup.py b/sql_migration_assistant/setup.py new file mode 100644 index 00000000..dd6ff61e --- /dev/null +++ b/sql_migration_assistant/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup, find_packages + +setup( + name="sql_migration_assistant", + version="0.1", + packages=find_packages(where="src"), # Specify src as the package directory + package_dir={"": "src"}, + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.10', +) \ No newline at end of file diff --git a/sql_migration_assistant/app/__init__.py b/sql_migration_assistant/src/__init__.py similarity index 100% rename from sql_migration_assistant/app/__init__.py rename to sql_migration_assistant/src/__init__.py diff --git a/sql_migration_assistant/frontend/Tabs/__init__.py b/sql_migration_assistant/src/sql_migration_assistant/__init__.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/__init__.py rename to sql_migration_assistant/src/sql_migration_assistant/__init__.py diff --git a/sql_migration_assistant/app/llm.py b/sql_migration_assistant/src/sql_migration_assistant/app/llm.py similarity index 100% rename from sql_migration_assistant/app/llm.py rename to sql_migration_assistant/src/sql_migration_assistant/app/llm.py diff --git a/sql_migration_assistant/app/similar_code.py b/sql_migration_assistant/src/sql_migration_assistant/app/similar_code.py similarity index 100% rename from sql_migration_assistant/app/similar_code.py rename to sql_migration_assistant/src/sql_migration_assistant/app/similar_code.py diff --git a/sql_migration_assistant/config.py b/sql_migration_assistant/src/sql_migration_assistant/config.py similarity index 100% rename from sql_migration_assistant/config.py rename to sql_migration_assistant/src/sql_migration_assistant/config.py diff --git a/sql_migration_assistant/frontend/GradioFrontend.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/GradioFrontend.py similarity index 100% rename from sql_migration_assistant/frontend/GradioFrontend.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/GradioFrontend.py diff --git a/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py diff --git a/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/BatchOutputTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py diff --git a/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py diff --git a/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py diff --git a/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py diff --git a/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py diff --git a/sql_migration_assistant/frontend/Tabs/Tab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/Tab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py diff --git a/sql_migration_assistant/frontend/Tabs/TranslationTab.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py similarity index 100% rename from sql_migration_assistant/frontend/Tabs/TranslationTab.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py diff --git a/sql_migration_assistant/frontend/callbacks.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/callbacks.py similarity index 100% rename from sql_migration_assistant/frontend/callbacks.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/callbacks.py diff --git a/sql_migration_assistant/infra/app_serving_cluster_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py similarity index 88% rename from sql_migration_assistant/infra/app_serving_cluster_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py index 1d9c978e..e0e6386c 100644 --- a/sql_migration_assistant/infra/app_serving_cluster_infra.py +++ b/sql_migration_assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py @@ -2,7 +2,7 @@ from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient -from databricks.sdk.service.compute import ListClustersFilterBy, State, DataSecurityMode +from databricks.sdk.service.compute import State, DataSecurityMode class AppServingClusterInfra: @@ -32,9 +32,7 @@ def choose_serving_cluster(self): cluster_name = self.cluster_name cluster_id = response.response.cluster_id else: - clusters = self.w.clusters.list( - filter_by=ListClustersFilterBy(cluster_states=[State.RUNNING]) - ) + clusters = self.w.clusters.list() # get cluster name and id clusters = { @@ -49,6 +47,10 @@ def choose_serving_cluster(self): question = "Choose a cluster:" cluster_name = self.prompts.choice(question, clusters.keys()) cluster_id = clusters[cluster_name] + cluster = self.w.clusters.get(cluster_id) + if cluster.state not in (State.RUNNING, State.PENDING): + logging.info("Cluster is not running. Trying to start it") + self.w.clusters.start(cluster_id) self.config["SERVING_CLUSTER_NAME"] = cluster_name self.config["SERVING_CLUSTER_ID"] = cluster_id diff --git a/sql_migration_assistant/infra/chat_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/chat_infra.py similarity index 100% rename from sql_migration_assistant/infra/chat_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/chat_infra.py diff --git a/sql_migration_assistant/infra/jobs_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/jobs_infra.py similarity index 98% rename from sql_migration_assistant/infra/jobs_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/jobs_infra.py index 6713362e..a1a8d566 100644 --- a/sql_migration_assistant/infra/jobs_infra.py +++ b/sql_migration_assistant/src/sql_migration_assistant/infra/jobs_infra.py @@ -54,7 +54,7 @@ def __init__( ] self.job_name = "sql_migration_code_transformation" - self.notebook_root_path = f"/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant/jobs/" + self.notebook_root_path = f"/Workspace/Users/{self.w.current_user.me().user_name}/sql_migration_assistant/jobs/" self.job_parameters = [ JobParameterDefinition("agent_configs", ""), JobParameterDefinition("app_configs", ""), diff --git a/sql_migration_assistant/infra/model_def.py b/sql_migration_assistant/src/sql_migration_assistant/infra/model_def.py similarity index 100% rename from sql_migration_assistant/infra/model_def.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/model_def.py diff --git a/sql_migration_assistant/infra/secrets_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/secrets_infra.py similarity index 100% rename from sql_migration_assistant/infra/secrets_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/secrets_infra.py diff --git a/sql_migration_assistant/infra/sql_warehouse_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py similarity index 100% rename from sql_migration_assistant/infra/sql_warehouse_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py diff --git a/sql_migration_assistant/infra/unity_catalog_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py similarity index 100% rename from sql_migration_assistant/infra/unity_catalog_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py diff --git a/sql_migration_assistant/infra/vector_search_infra.py b/sql_migration_assistant/src/sql_migration_assistant/infra/vector_search_infra.py similarity index 100% rename from sql_migration_assistant/infra/vector_search_infra.py rename to sql_migration_assistant/src/sql_migration_assistant/infra/vector_search_infra.py diff --git a/sql_migration_assistant/utils/configloader.py b/sql_migration_assistant/src/sql_migration_assistant/utils/configloader.py similarity index 100% rename from sql_migration_assistant/utils/configloader.py rename to sql_migration_assistant/src/sql_migration_assistant/utils/configloader.py diff --git a/sql_migration_assistant/utils/initialsetup.py b/sql_migration_assistant/src/sql_migration_assistant/utils/initialsetup.py similarity index 94% rename from sql_migration_assistant/utils/initialsetup.py rename to sql_migration_assistant/src/sql_migration_assistant/utils/initialsetup.py index 62d69f06..e20aabf4 100644 --- a/sql_migration_assistant/utils/initialsetup.py +++ b/sql_migration_assistant/src/sql_migration_assistant/utils/initialsetup.py @@ -1,5 +1,6 @@ import logging import os +from pathlib import Path from databricks.labs.lsql.core import StatementExecutionExt from databricks.sdk.errors import ResourceAlreadyExists, BadRequest @@ -18,6 +19,14 @@ from sql_migration_assistant.utils.upload_files_to_workspace import FileUploader +def list_files_recursive(parent_path: str, sub_path: str) -> list[str]: + # Get absolute paths of both directories + dir_to_list = Path(parent_path, sub_path).resolve() + base_dir = Path(parent_path).resolve() + # List all files in dir_to_list and make paths relative to base_dir + return [str(file.relative_to(base_dir)) for file in dir_to_list.rglob('*') if file.is_file()] + + class SetUpMigrationAssistant: # this is a decorator to handle errors and do a retry where user is asked to choose an existing resource @@ -167,17 +176,12 @@ def upload_files(self, w, path): print("\nUploading files to workspace") uploader = FileUploader(w) files_to_upload = [ - "utils/runindatabricks.py", - "utils/configloader.py", - "utils/run_review_app.py", - "jobs/bronze_to_silver.py", - "jobs/call_agents.py", - "jobs/silver_to_gold.py", - "app/llm.py", - "app/similar_code.py", "main.py", + "config.py", "run_app_from_databricks_notebook.py", "config.yml", + "setup.py", + *list_files_recursive(path, "src") ] def inner(f): diff --git a/sql_migration_assistant/utils/run_review_app.py b/sql_migration_assistant/src/sql_migration_assistant/utils/run_review_app.py similarity index 88% rename from sql_migration_assistant/utils/run_review_app.py rename to sql_migration_assistant/src/sql_migration_assistant/utils/run_review_app.py index b3b94f19..45dcfa87 100644 --- a/sql_migration_assistant/utils/run_review_app.py +++ b/sql_migration_assistant/src/sql_migration_assistant/utils/run_review_app.py @@ -38,28 +38,20 @@ def _library_install(self): for l in self.libraries: self.executor.install_notebook_library(l) + self.executor.run("pip install .") self.executor.run("dbutils.library.restartPython()") - def _path_updates(self): - self.executor.run( - code=f""" -import sys -sys.path.insert(0, '/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant/utils') -sys.path.insert(0, '/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant/app') -import os -path = '/Workspace/Users/{self.w.current_user.me().user_name}/.sql_migration_assistant' -os.chdir(path) -""" - ) - def _get_org_id(self): return self.w.get_workspace_id() def _launch_app(self): self.executor.run( - code=""" - from utils.runindatabricks import run_app - run_app() + code=f""" + from sql_migration_assistant.utils.runindatabricks import run_app + + # set debug=True to print the app logs in this cell. + # run_app(debug=True) + run_app("/Workspace/Users/{self.w.current_user.me().user_name}/sql_migration_assistant/config.yml") """ ) @@ -120,7 +112,6 @@ def remove_lowest_subdomain_from_host(url): def launch_review_app(self): self._library_install() - self._path_updates() org_id = self._get_org_id() proxy_url = self._get_proxy_url(org_id) logging.info( diff --git a/sql_migration_assistant/utils/runindatabricks.py b/sql_migration_assistant/src/sql_migration_assistant/utils/runindatabricks.py similarity index 78% rename from sql_migration_assistant/utils/runindatabricks.py rename to sql_migration_assistant/src/sql_migration_assistant/utils/runindatabricks.py index 37edf76a..2d8443b5 100644 --- a/sql_migration_assistant/utils/runindatabricks.py +++ b/sql_migration_assistant/src/sql_migration_assistant/utils/runindatabricks.py @@ -1,5 +1,6 @@ # this is only run from within databricks, hence the import doesn't work in IDE import threading +from pathlib import Path import yaml from databricks.sdk import WorkspaceClient @@ -9,24 +10,26 @@ from sql_migration_assistant.utils.configloader import ConfigLoader from sql_migration_assistant.utils.run_review_app import RunReviewApp +current_folder = Path(__file__).parent.resolve() -def thread_func(): + +def thread_func(config_path: str): cl = ConfigLoader() - cl.read_yaml_to_env("config.yml") + cl.read_yaml_to_env(config_path) dbtunnel.kill_port(8080) app = "main.py" dbtunnel.gradio(path=app).run() -def run_app(debug=False): +def run_app(config_path: str, debug=False): # load config file into environment variables. This is necesarry to create the workspace client if debug: # this will get the app logs to print in the notebook cell output - thread_func() + thread_func(config_path) else: cl = ConfigLoader() - cl.read_yaml_to_env("config.yml") - with open("config.yml", "r") as f: + cl.read_yaml_to_env(config_path) + with open(config_path, "r") as f: config = yaml.safe_load(f) w = WorkspaceClient() @@ -38,7 +41,7 @@ def run_app(debug=False): proxy_url_split[-3] = cluster_id proxy_url = "/".join(proxy_url_split) - x = threading.Thread(target=thread_func) + x = threading.Thread(target=lambda: thread_func(config_path)) x.start() print( f"Launching review app, it may take a few minutes to come up. Visit below link to access the app.\n{proxy_url}" diff --git a/sql_migration_assistant/utils/uc_model_version.py b/sql_migration_assistant/src/sql_migration_assistant/utils/uc_model_version.py similarity index 100% rename from sql_migration_assistant/utils/uc_model_version.py rename to sql_migration_assistant/src/sql_migration_assistant/utils/uc_model_version.py diff --git a/sql_migration_assistant/utils/upload_files_to_workspace.py b/sql_migration_assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py similarity index 100% rename from sql_migration_assistant/utils/upload_files_to_workspace.py rename to sql_migration_assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py diff --git a/sql_migration_assistant/tests/test_similar_code.py b/sql_migration_assistant/tests/test_similar_code.py index b8bdcc1b..cfdb8094 100644 --- a/sql_migration_assistant/tests/test_similar_code.py +++ b/sql_migration_assistant/tests/test_similar_code.py @@ -1,7 +1,7 @@ import unittest from unittest.mock import MagicMock, patch -from sql_migration_assistant.app.similar_code import ( +from sql_migration_assistant.app import ( SimilarCode, ) # replace 'your_module' with the actual name of your module From 7f31ad1ea1573dd80da1d333163fe9baaf17e05b Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Wed, 6 Nov 2024 18:56:12 +0100 Subject: [PATCH 05/19] Makes Module installable. Tested in notebook --- .../{frontend => src/sql_migration_assistant/app}/__init__.py | 0 .../sql_migration_assistant/frontend/Tabs}/__init__.py | 0 .../{utils => src/sql_migration_assistant/frontend}/__init__.py | 0 .../src/sql_migration_assistant/infra/__init__.py | 0 .../src/sql_migration_assistant/utils/__init__.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename sql_migration_assistant/{frontend => src/sql_migration_assistant/app}/__init__.py (100%) rename sql_migration_assistant/{infra => src/sql_migration_assistant/frontend/Tabs}/__init__.py (100%) rename sql_migration_assistant/{utils => src/sql_migration_assistant/frontend}/__init__.py (100%) create mode 100644 sql_migration_assistant/src/sql_migration_assistant/infra/__init__.py create mode 100644 sql_migration_assistant/src/sql_migration_assistant/utils/__init__.py diff --git a/sql_migration_assistant/frontend/__init__.py b/sql_migration_assistant/src/sql_migration_assistant/app/__init__.py similarity index 100% rename from sql_migration_assistant/frontend/__init__.py rename to sql_migration_assistant/src/sql_migration_assistant/app/__init__.py diff --git a/sql_migration_assistant/infra/__init__.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py similarity index 100% rename from sql_migration_assistant/infra/__init__.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py diff --git a/sql_migration_assistant/utils/__init__.py b/sql_migration_assistant/src/sql_migration_assistant/frontend/__init__.py similarity index 100% rename from sql_migration_assistant/utils/__init__.py rename to sql_migration_assistant/src/sql_migration_assistant/frontend/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/__init__.py b/sql_migration_assistant/src/sql_migration_assistant/infra/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/__init__.py b/sql_migration_assistant/src/sql_migration_assistant/utils/__init__.py new file mode 100644 index 00000000..e69de29b From d6a0cc86ee767639ed694806058a843679902ba1 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 19 Nov 2024 16:55:34 +0100 Subject: [PATCH 06/19] Refactoring --- sql-migration-assistant/.gitignore | 2 + .../README.md | 0 .../docs/Makefile | 0 .../docs/_static/css/custom.css | 0 .../docs/conf.py | 0 .../docs/images/intent_generation.png | Bin .../docs/images/legion_logo.png | Bin .../docs/images/similar_code.png | Bin .../docs/images/translation_prompt.png | Bin .../docs/images/translation_screen.png | Bin .../docs/index.rst | 0 .../docs/reload.py | 0 .../docs/requirements.txt | 0 .../docs/usage/installation.rst | 0 .../docs/usage/usage.rst | 0 .../jobs/__init__.py | 0 .../jobs/bronze_to_silver.py | 0 .../jobs/call_agents.py | 0 .../jobs/silver_to_gold.py | 0 .../main.py | 0 .../requirements.txt | 3 +- .../run_app_from_databricks_notebook.py | 2 +- .../setup.py | 0 .../src/__init__.py | 0 .../src/sql_migration_assistant/__init__.py | 0 .../sql_migration_assistant/app/__init__.py | 0 .../src/sql_migration_assistant/app/llm.py | 0 .../app/similar_code.py | 0 .../src/sql_migration_assistant/config.py | 0 .../frontend/GradioFrontend.py | 0 .../frontend/Tabs/BatchInputCodeTab.py | 0 .../frontend/Tabs/BatchOutputTab.py | 0 .../frontend/Tabs/CodeExplanationTab.py | 0 .../frontend/Tabs/InteractiveInputCodeTab.py | 0 .../frontend/Tabs/InteractiveOutputTab.py | 0 .../frontend/Tabs/SimilarCodeTab.py | 0 .../frontend/Tabs/Tab.py | 0 .../frontend/Tabs/TranslationTab.py | 0 .../frontend/Tabs/__init__.py | 0 .../frontend/__init__.py | 0 .../frontend/callbacks.py | 0 .../sql_migration_assistant/infra/__init__.py | 0 .../infra/app_serving_cluster_infra.py | 0 .../infra/chat_infra.py | 0 .../infra/jobs_infra.py | 0 .../infra/model_def.py | 0 .../infra/secrets_infra.py | 0 .../infra/sql_warehouse_infra.py | 0 .../infra/unity_catalog_infra.py | 0 .../infra/vector_search_infra.py | 0 .../sql_migration_assistant/utils/__init__.py | 0 .../utils/configloader.py | 0 .../utils/initialsetup.py | 0 .../utils/run_review_app.py | 0 .../utils/runindatabricks.py | 0 .../utils/uc_model_version.py | 0 .../utils/upload_files_to_workspace.py | 0 .../tests/__init__.py | 0 sql_migration_assistant/.gitignore | 1 - sql_migration_assistant/tests/test_llm.py | 94 ------------------ .../tests/test_similar_code.py | 85 ---------------- 61 files changed, 5 insertions(+), 182 deletions(-) create mode 100644 sql-migration-assistant/.gitignore rename {sql_migration_assistant => sql-migration-assistant}/README.md (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/Makefile (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/_static/css/custom.css (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/conf.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/images/intent_generation.png (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/images/legion_logo.png (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/images/similar_code.png (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/images/translation_prompt.png (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/images/translation_screen.png (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/index.rst (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/reload.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/requirements.txt (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/usage/installation.rst (100%) rename {sql_migration_assistant => sql-migration-assistant}/docs/usage/usage.rst (100%) rename {sql_migration_assistant => sql-migration-assistant}/jobs/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/jobs/bronze_to_silver.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/jobs/call_agents.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/jobs/silver_to_gold.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/main.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/requirements.txt (60%) rename {sql_migration_assistant => sql-migration-assistant}/run_app_from_databricks_notebook.py (91%) rename {sql_migration_assistant => sql-migration-assistant}/setup.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/app/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/app/llm.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/app/similar_code.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/config.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/GradioFrontend.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/Tab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/Tabs/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/frontend/callbacks.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/app_serving_cluster_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/chat_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/jobs_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/model_def.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/secrets_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/sql_warehouse_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/unity_catalog_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/infra/vector_search_infra.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/__init__.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/configloader.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/initialsetup.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/run_review_app.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/runindatabricks.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/uc_model_version.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/src/sql_migration_assistant/utils/upload_files_to_workspace.py (100%) rename {sql_migration_assistant => sql-migration-assistant}/tests/__init__.py (100%) delete mode 100644 sql_migration_assistant/.gitignore delete mode 100644 sql_migration_assistant/tests/test_llm.py delete mode 100644 sql_migration_assistant/tests/test_similar_code.py diff --git a/sql-migration-assistant/.gitignore b/sql-migration-assistant/.gitignore new file mode 100644 index 00000000..b1ecc211 --- /dev/null +++ b/sql-migration-assistant/.gitignore @@ -0,0 +1,2 @@ +.databrickscfg +.databricks diff --git a/sql_migration_assistant/README.md b/sql-migration-assistant/README.md similarity index 100% rename from sql_migration_assistant/README.md rename to sql-migration-assistant/README.md diff --git a/sql_migration_assistant/docs/Makefile b/sql-migration-assistant/docs/Makefile similarity index 100% rename from sql_migration_assistant/docs/Makefile rename to sql-migration-assistant/docs/Makefile diff --git a/sql_migration_assistant/docs/_static/css/custom.css b/sql-migration-assistant/docs/_static/css/custom.css similarity index 100% rename from sql_migration_assistant/docs/_static/css/custom.css rename to sql-migration-assistant/docs/_static/css/custom.css diff --git a/sql_migration_assistant/docs/conf.py b/sql-migration-assistant/docs/conf.py similarity index 100% rename from sql_migration_assistant/docs/conf.py rename to sql-migration-assistant/docs/conf.py diff --git a/sql_migration_assistant/docs/images/intent_generation.png b/sql-migration-assistant/docs/images/intent_generation.png similarity index 100% rename from sql_migration_assistant/docs/images/intent_generation.png rename to sql-migration-assistant/docs/images/intent_generation.png diff --git a/sql_migration_assistant/docs/images/legion_logo.png b/sql-migration-assistant/docs/images/legion_logo.png similarity index 100% rename from sql_migration_assistant/docs/images/legion_logo.png rename to sql-migration-assistant/docs/images/legion_logo.png diff --git a/sql_migration_assistant/docs/images/similar_code.png b/sql-migration-assistant/docs/images/similar_code.png similarity index 100% rename from sql_migration_assistant/docs/images/similar_code.png rename to sql-migration-assistant/docs/images/similar_code.png diff --git a/sql_migration_assistant/docs/images/translation_prompt.png b/sql-migration-assistant/docs/images/translation_prompt.png similarity index 100% rename from sql_migration_assistant/docs/images/translation_prompt.png rename to sql-migration-assistant/docs/images/translation_prompt.png diff --git a/sql_migration_assistant/docs/images/translation_screen.png b/sql-migration-assistant/docs/images/translation_screen.png similarity index 100% rename from sql_migration_assistant/docs/images/translation_screen.png rename to sql-migration-assistant/docs/images/translation_screen.png diff --git a/sql_migration_assistant/docs/index.rst b/sql-migration-assistant/docs/index.rst similarity index 100% rename from sql_migration_assistant/docs/index.rst rename to sql-migration-assistant/docs/index.rst diff --git a/sql_migration_assistant/docs/reload.py b/sql-migration-assistant/docs/reload.py similarity index 100% rename from sql_migration_assistant/docs/reload.py rename to sql-migration-assistant/docs/reload.py diff --git a/sql_migration_assistant/docs/requirements.txt b/sql-migration-assistant/docs/requirements.txt similarity index 100% rename from sql_migration_assistant/docs/requirements.txt rename to sql-migration-assistant/docs/requirements.txt diff --git a/sql_migration_assistant/docs/usage/installation.rst b/sql-migration-assistant/docs/usage/installation.rst similarity index 100% rename from sql_migration_assistant/docs/usage/installation.rst rename to sql-migration-assistant/docs/usage/installation.rst diff --git a/sql_migration_assistant/docs/usage/usage.rst b/sql-migration-assistant/docs/usage/usage.rst similarity index 100% rename from sql_migration_assistant/docs/usage/usage.rst rename to sql-migration-assistant/docs/usage/usage.rst diff --git a/sql_migration_assistant/jobs/__init__.py b/sql-migration-assistant/jobs/__init__.py similarity index 100% rename from sql_migration_assistant/jobs/__init__.py rename to sql-migration-assistant/jobs/__init__.py diff --git a/sql_migration_assistant/jobs/bronze_to_silver.py b/sql-migration-assistant/jobs/bronze_to_silver.py similarity index 100% rename from sql_migration_assistant/jobs/bronze_to_silver.py rename to sql-migration-assistant/jobs/bronze_to_silver.py diff --git a/sql_migration_assistant/jobs/call_agents.py b/sql-migration-assistant/jobs/call_agents.py similarity index 100% rename from sql_migration_assistant/jobs/call_agents.py rename to sql-migration-assistant/jobs/call_agents.py diff --git a/sql_migration_assistant/jobs/silver_to_gold.py b/sql-migration-assistant/jobs/silver_to_gold.py similarity index 100% rename from sql_migration_assistant/jobs/silver_to_gold.py rename to sql-migration-assistant/jobs/silver_to_gold.py diff --git a/sql_migration_assistant/main.py b/sql-migration-assistant/main.py similarity index 100% rename from sql_migration_assistant/main.py rename to sql-migration-assistant/main.py diff --git a/sql_migration_assistant/requirements.txt b/sql-migration-assistant/requirements.txt similarity index 60% rename from sql_migration_assistant/requirements.txt rename to sql-migration-assistant/requirements.txt index c4dcd90e..23edfc35 100644 --- a/sql_migration_assistant/requirements.txt +++ b/sql-migration-assistant/requirements.txt @@ -1,4 +1,5 @@ databricks-sdk==0.30.0 pyyaml databricks-labs-blueprint==0.8.2 -databricks-labs-lsql==0.9.0 \ No newline at end of file +databricks-labs-lsql==0.9.0 +gradio==5.5.0 \ No newline at end of file diff --git a/sql_migration_assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py similarity index 91% rename from sql_migration_assistant/run_app_from_databricks_notebook.py rename to sql-migration-assistant/run_app_from_databricks_notebook.py index 03b2ed24..4d1e56b7 100644 --- a/sql_migration_assistant/run_app_from_databricks_notebook.py +++ b/sql-migration-assistant/run_app_from_databricks_notebook.py @@ -27,4 +27,4 @@ # set debug=True to print the app logs in this cell. # run_app(debug=True) -run_app("/Workspace/Users/sebastian.grunwald@databricks.com/sql_migration_assistant/config.yml") +run_app("/Workspace/Users/sebastian.grunwald@databricks.com/sql-migration-assistant/config.yml") diff --git a/sql_migration_assistant/setup.py b/sql-migration-assistant/setup.py similarity index 100% rename from sql_migration_assistant/setup.py rename to sql-migration-assistant/setup.py diff --git a/sql_migration_assistant/src/__init__.py b/sql-migration-assistant/src/__init__.py similarity index 100% rename from sql_migration_assistant/src/__init__.py rename to sql-migration-assistant/src/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/__init__.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/__init__.py rename to sql-migration-assistant/src/sql_migration_assistant/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/app/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/app/__init__.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/app/__init__.py rename to sql-migration-assistant/src/sql_migration_assistant/app/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/app/llm.py b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/app/llm.py rename to sql-migration-assistant/src/sql_migration_assistant/app/llm.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/app/similar_code.py b/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/app/similar_code.py rename to sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/config.py b/sql-migration-assistant/src/sql_migration_assistant/config.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/config.py rename to sql-migration-assistant/src/sql_migration_assistant/config.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/GradioFrontend.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/GradioFrontend.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchOutputTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveInputCodeTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/SimilarCodeTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/__init__.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/__init__.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/frontend/callbacks.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/frontend/callbacks.py rename to sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/infra/__init__.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/__init__.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/app_serving_cluster_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/chat_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/chat_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/chat_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/chat_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/jobs_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/jobs_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/model_def.py b/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/model_def.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/secrets_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/secrets_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/secrets_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/secrets_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/sql_warehouse_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/infra/vector_search_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/infra/vector_search_infra.py rename to sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/utils/__init__.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/__init__.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/__init__.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/configloader.py b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/configloader.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/initialsetup.py b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/initialsetup.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/run_review_app.py b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/run_review_app.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/runindatabricks.py b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/runindatabricks.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/uc_model_version.py b/sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/uc_model_version.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py diff --git a/sql_migration_assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py similarity index 100% rename from sql_migration_assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py rename to sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py diff --git a/sql_migration_assistant/tests/__init__.py b/sql-migration-assistant/tests/__init__.py similarity index 100% rename from sql_migration_assistant/tests/__init__.py rename to sql-migration-assistant/tests/__init__.py diff --git a/sql_migration_assistant/.gitignore b/sql_migration_assistant/.gitignore deleted file mode 100644 index 3a1de4ad..00000000 --- a/sql_migration_assistant/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.databrickscfg \ No newline at end of file diff --git a/sql_migration_assistant/tests/test_llm.py b/sql_migration_assistant/tests/test_llm.py deleted file mode 100644 index 86278b14..00000000 --- a/sql_migration_assistant/tests/test_llm.py +++ /dev/null @@ -1,94 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock - -from sql_migration_assistant.app.llm import LLMCalls - - -class TestLLMCalls(unittest.TestCase): - - @patch("app.llm.OpenAI") - def setUp(self, MockOpenAI): - """ - Set up the test environment before each test method. - Mocks the OpenAI client and initializes the LLMCalls object with mock dependencies. - """ - # Create a mock client instance - self.mock_client = MagicMock() - # Ensure the OpenAI client constructor returns the mock client - MockOpenAI.return_value = self.mock_client - # Initialize the LLMCalls instance with dummy parameters - self.llm = LLMCalls( - databricks_host="dummy_host", - databricks_token="dummy_token", - model_name="dummy_model", - max_tokens=100, - ) - - def test_call_llm(self): - """ - Test the call_llm method of the LLMCalls class. - Verifies that the method correctly calls the OpenAI client and returns the expected response. - """ - # Setup mock response - mock_response = MagicMock() - mock_response.choices[0].message.content = "Test response" - self.mock_client.chat.completions.create.return_value = mock_response - - # Test the call_llm method - messages = [{"role": "user", "content": "Hello"}] - response = self.llm.call_llm(messages) - - # Verify that the OpenAI client was called with the correct parameters - self.mock_client.chat.completions.create.assert_called_once_with( - messages=messages, model="dummy_model", max_tokens=100 - ) - # Check that the response matches the expected value - self.assertEqual(response, "Test response") - - def test_convert_chat_to_llm_input(self): - """ - Test the convert_chat_to_llm_input method to ensure it correctly formats the chat history. - """ - system_prompt = "You are a helpful assistant." - chat = [("Hello", "Hi there!"), ("How are you?", "I'm good, thank you!")] - expected_output = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"}, - {"role": "user", "content": "How are you?"}, - {"role": "assistant", "content": "I'm good, thank you!"}, - ] - - result = self.llm.convert_chat_to_llm_input(system_prompt, chat) - # Assert that the formatted messages are as expected - self.assertEqual(result, expected_output) - - # Test the LLM functions for translating code, chatting, and determining intent - @patch.object(LLMCalls, "call_llm", return_value="Final answer:\nTranslated code") - def test_llm_translate(self, mock_call_llm): - system_prompt = "Translate this code" - input_code = "SELECT * FROM table" - - response = self.llm.llm_translate(system_prompt, input_code) - self.assertEqual(response, "Translated code") - - @patch.object(LLMCalls, "call_llm", return_value="Chat response") - def test_llm_chat(self, mock_call_llm): - system_prompt = "You are a helpful assistant." - query = "What is the weather today?" - chat_history = [("Hello", "Hi there!")] - - response = self.llm.llm_chat(system_prompt, query, chat_history) - self.assertEqual(response, "Chat response") - - @patch.object(LLMCalls, "call_llm", return_value="Intent response") - def test_llm_intent(self, mock_call_llm): - system_prompt = "Determine the intent of this code" - input_code = "SELECT * FROM table" - - response = self.llm.llm_intent(system_prompt, input_code) - self.assertEqual(response, "Intent response") - - -if __name__ == "__main__": - unittest.main() diff --git a/sql_migration_assistant/tests/test_similar_code.py b/sql_migration_assistant/tests/test_similar_code.py deleted file mode 100644 index cfdb8094..00000000 --- a/sql_migration_assistant/tests/test_similar_code.py +++ /dev/null @@ -1,85 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch - -from sql_migration_assistant.app import ( - SimilarCode, -) # replace 'your_module' with the actual name of your module - - -class TestSimilarCode(unittest.TestCase): - """ - Unit test class for testing the SimilarCode class. - """ - - @patch("app.similar_code.VectorSearchClient") - def setUp(self, MockVectorSearchClient): - """ - Sets up the test case by initializing an instance of SimilarCode with mock dependencies. - - Mocking the VectorSearchClient to isolate the functionality of SimilarCode from external dependencies. - """ - self.mock_vsc_instance = MockVectorSearchClient.return_value - self.similar_code = SimilarCode( - databricks_token="test_token", - databricks_host="test_host", - vector_search_endpoint_name="test_endpoint", - vs_index_fullname="test_index", - intent_table="test_table", - ) - - def test_save_intent(self): - """ - Tests the save_intent method of SimilarCode class. - - This test ensures that the SQL insert statement is correctly formed and executed with the provided parameters. - """ - # Mock the database cursor - mock_cursor = MagicMock() - code = "sample code" - intent = "sample intent" - code_hash = hash(code) - - # Call the method to test - self.similar_code.save_intent(code, intent, mock_cursor) - - # Assert that the execute method was called with the correct SQL statement - mock_cursor.execute.assert_called_once_with( - f'INSERT INTO test_table VALUES ({code_hash}, "{code}", "{intent}")' - ) - - def test_get_similar_code(self): - """ - Tests the get_similar_code method of SimilarCode class. - - This test verifies that the method calls the VectorSearchClient with the correct parameters and - returns the expected results. - """ - # Sample chat history and mock result - chat_history = [(1, "first intent"), (2, "second intent")] - mock_result = {"result": {"data_array": [["sample code", "sample intent"]]}} - - # Mock the similarity_search method's return value - self.mock_vsc_instance.get_index.return_value.similarity_search.return_value = ( - mock_result - ) - - # Call the method to test - code, intent = self.similar_code.get_similar_code(chat_history) - - # Assert that get_index was called with the correct parameters - self.mock_vsc_instance.get_index.assert_called_once_with( - "test_endpoint", "test_index" - ) - - # Assert that similarity_search was called with the correct parameters - self.mock_vsc_instance.get_index.return_value.similarity_search.assert_called_once_with( - query_text="second intent", columns=["code", "intent"], num_results=1 - ) - - # Assert that the returned values are as expected - self.assertEqual(code, "sample code") - self.assertEqual(intent, "sample intent") - - -if __name__ == "__main__": - unittest.main() From 1748700e54aaa4ca773ba6ae9f8f7b18ba798e33 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 10:56:01 +0100 Subject: [PATCH 07/19] changes config path from absolute to relative --- sql-migration-assistant/run_app_from_databricks_notebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-migration-assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py index 4d1e56b7..5e1562e6 100644 --- a/sql-migration-assistant/run_app_from_databricks_notebook.py +++ b/sql-migration-assistant/run_app_from_databricks_notebook.py @@ -27,4 +27,4 @@ # set debug=True to print the app logs in this cell. # run_app(debug=True) -run_app("/Workspace/Users/sebastian.grunwald@databricks.com/sql-migration-assistant/config.yml") +run_app("config.yml") From daefae2ffab7e0925f093d7faaac041992174746 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 15:12:39 +0100 Subject: [PATCH 08/19] adds option to configure profile in commandline --- cli.py | 4 ++-- sql-migration-assistant/src/__init__.py | 0 .../src/sql_migration_assistant/__init__.py | 20 +++++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) delete mode 100644 sql-migration-assistant/src/__init__.py diff --git a/cli.py b/cli.py index 04387197..5f1be95e 100644 --- a/cli.py +++ b/cli.py @@ -10,9 +10,9 @@ def ip_access_list_analyzer(**args): import ip_access_list_analyzer.ip_acl_analyzer as analyzer analyzer.main(args) -def sql_migration_assistant(**args): +def sql_migration_assistant(**kwargs): from sql_migration_assistant import hello - hello() + hello(**kwargs) MAPPING = { "ip-access-list-analyzer": ip_access_list_analyzer, diff --git a/sql-migration-assistant/src/__init__.py b/sql-migration-assistant/src/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/sql-migration-assistant/src/sql_migration_assistant/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/__init__.py index e69de29b..f5ca64f0 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/__init__.py +++ b/sql-migration-assistant/src/sql_migration_assistant/__init__.py @@ -0,0 +1,20 @@ +from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant +from databricks.sdk import WorkspaceClient +from databricks.labs.blueprint.tui import Prompts +import yaml +from pathlib import Path + + +def hello(**kwargs): + w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1", profile=kwargs.get("profile")) + p = Prompts() + setter_upper = SetUpMigrationAssistant() + setter_upper.check_cloud(w) + final_config = setter_upper.setup_migration_assistant(w, p) + current_path = Path(__file__).parent.resolve() + + local_config = str(current_path) + "/config.yml" + with open(local_config, "w") as f: + yaml.dump(final_config, f) + setter_upper.upload_files(w, current_path) + setter_upper.launch_review_app(w, final_config) \ No newline at end of file From 7b97c2482d463cd8060d862fd152a53664cbf7ea Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 15:13:44 +0100 Subject: [PATCH 09/19] adds configs to installable files to remove dependency to Workspace path and adds requirements to installation --- sql-migration-assistant/requirements.txt | 7 ++++++- sql-migration-assistant/setup.py | 10 ++++++++++ .../utils/configloader.py | 18 +++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/sql-migration-assistant/requirements.txt b/sql-migration-assistant/requirements.txt index 23edfc35..eed4d188 100644 --- a/sql-migration-assistant/requirements.txt +++ b/sql-migration-assistant/requirements.txt @@ -2,4 +2,9 @@ databricks-sdk==0.30.0 pyyaml databricks-labs-blueprint==0.8.2 databricks-labs-lsql==0.9.0 -gradio==5.5.0 \ No newline at end of file +gradio==5.5.0 +aiohttp==3.10.5 +fastapi +pydantic==2.8.2 +dbtunnel==0.14.6 +mlflow \ No newline at end of file diff --git a/sql-migration-assistant/setup.py b/sql-migration-assistant/setup.py index dd6ff61e..a98b1808 100644 --- a/sql-migration-assistant/setup.py +++ b/sql-migration-assistant/setup.py @@ -1,14 +1,24 @@ from setuptools import setup, find_packages +# Read the requirements.txt file +def load_requirements(filename="requirements.txt"): + with open(filename, "r") as file: + return file.read().splitlines() + setup( name="sql_migration_assistant", version="0.1", packages=find_packages(where="src"), # Specify src as the package directory package_dir={"": "src"}, + include_package_data=True, # Include files specified in MANIFEST.in + package_data={ + 'sql_migration_assistant': ['config.yml'], # Include YAML file + }, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], + install_requires=load_requirements(), python_requires='>=3.10', ) \ No newline at end of file diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py index a3310b45..70507de8 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py @@ -2,7 +2,15 @@ import yaml from databricks.sdk import WorkspaceClient +import yaml +from importlib.resources import files +def load_config(): + # Access the YAML file as a resource + config_path = files('sql_migration_assistant').joinpath('config.yml') + with config_path.open('r') as f: + config = yaml.safe_load(f) + return config class ConfigLoader: """ @@ -10,17 +18,16 @@ class ConfigLoader: environment variables. """ - def read_yaml_to_env(self, file_path): + def read_yaml_to_env(self): """Reads a YAML file and sets environment variables based on its contents. Args: file_path (str): The path to the YAML file. """ - with open(file_path, "r") as file: - data = yaml.safe_load(file) - for key, value in data.items(): - os.environ[key] = str(value) + data = load_config() + for key, value in data.items(): + os.environ[key] = str(value) w = WorkspaceClient() dbutils = w.dbutils @@ -37,3 +44,4 @@ def read_yaml_to_env(self, file_path): if DATABRICKS_HOST[-1] == "/": DATABRICKS_HOST = DATABRICKS_HOST[:-1] os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST + return data \ No newline at end of file From 5f91084f80ded613f1934ae8fa7eca1c6387f514 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 15:15:12 +0100 Subject: [PATCH 10/19] adds configs to installable files to remove dependency to Workspace path and adds requirements to installation --- .../run_app_from_databricks_notebook.py | 13 ++----------- .../sql_migration_assistant/utils/run_review_app.py | 2 +- .../utils/upload_files_to_workspace.py | 4 ++-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/sql-migration-assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py index 5e1562e6..68086bdd 100644 --- a/sql-migration-assistant/run_app_from_databricks_notebook.py +++ b/sql-migration-assistant/run_app_from_databricks_notebook.py @@ -7,16 +7,7 @@ # MAGIC If you want to share the app with users outside of Databricks, for example so non technical SMEs can contribute to LLM prompt development, the notebook needs to run on a no isolation shared cluster. # COMMAND ---------- -pip install databricks-sdk -U -q - -# COMMAND ---------- -pip install gradio==4.27.0 pyyaml aiohttp==3.10.5 databricks-labs-blueprint==0.8.2 databricks-labs-lsql==0.9.0 -q - -# COMMAND ---------- -pip install fastapi==0.112.2 pydantic==2.8.2 dbtunnel==0.14.6 -q - -# COMMAND ---------- -pip install . +%pip install . # COMMAND ---------- dbutils.library.restartPython() @@ -27,4 +18,4 @@ # set debug=True to print the app logs in this cell. # run_app(debug=True) -run_app("config.yml") +run_app() diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py index 45dcfa87..9d9a0c73 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py @@ -51,7 +51,7 @@ def _launch_app(self): # set debug=True to print the app logs in this cell. # run_app(debug=True) - run_app("/Workspace/Users/{self.w.current_user.me().user_name}/sql_migration_assistant/config.yml") + run_app() """ ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py index 63633b6f..9d7602c4 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py @@ -12,7 +12,7 @@ class FileUploader: def __init__(self, workspace_client: WorkspaceClient): self.w = workspace_client - self.installer = Installation(ws=self.w, product="sql_migration_assistant") + self.installer = Installation(ws=self.w, product="sql-migration-assistant") def upload( self, @@ -34,4 +34,4 @@ def save_config(self, config): config_class = X(**config) - self.installer.save(config_class, filename="config.yml") + self.installer.save(config_class, filename="src/sql_migration_assistant/config.yml") From 027c15f087d58102b69a589f12857fde9710b483 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 15:15:35 +0100 Subject: [PATCH 11/19] adds configs to installable files to remove dependency to Workspace path and adds requirements to installation --- .../utils/runindatabricks.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py index 2d8443b5..087805cb 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py @@ -13,24 +13,22 @@ current_folder = Path(__file__).parent.resolve() -def thread_func(config_path: str): +def thread_func(): cl = ConfigLoader() - cl.read_yaml_to_env(config_path) + cl.read_yaml_to_env() dbtunnel.kill_port(8080) - app = "main.py" + app = str(Path(current_folder, "..", "main.py").absolute()) dbtunnel.gradio(path=app).run() -def run_app(config_path: str, debug=False): +def run_app(debug=False): # load config file into environment variables. This is necesarry to create the workspace client if debug: # this will get the app logs to print in the notebook cell output - thread_func(config_path) + thread_func() else: cl = ConfigLoader() - cl.read_yaml_to_env(config_path) - with open(config_path, "r") as f: - config = yaml.safe_load(f) + config = cl.read_yaml_to_env() w = WorkspaceClient() app_runner = RunReviewApp(w, config) @@ -41,7 +39,7 @@ def run_app(config_path: str, debug=False): proxy_url_split[-3] = cluster_id proxy_url = "/".join(proxy_url_split) - x = threading.Thread(target=lambda: thread_func(config_path)) + x = threading.Thread(target=lambda: thread_func()) x.start() print( f"Launching review app, it may take a few minutes to come up. Visit below link to access the app.\n{proxy_url}" From 2e25ae581460af68bb932e38a63761e9792fb730 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 15:15:56 +0100 Subject: [PATCH 12/19] bug fixes --- .../infra/jobs_infra.py | 2 +- .../{ => src/sql_migration_assistant}/main.py | 0 .../utils/initialsetup.py | 30 +++++++++++-------- 3 files changed, 19 insertions(+), 13 deletions(-) rename sql-migration-assistant/{ => src/sql_migration_assistant}/main.py (100%) diff --git a/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py index a1a8d566..d6ebcefc 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py +++ b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py @@ -54,7 +54,7 @@ def __init__( ] self.job_name = "sql_migration_code_transformation" - self.notebook_root_path = f"/Workspace/Users/{self.w.current_user.me().user_name}/sql_migration_assistant/jobs/" + self.notebook_root_path = f"/Workspace/Users/{self.w.current_user.me().user_name}/.sql-migration-assistant/jobs/" self.job_parameters = [ JobParameterDefinition("agent_configs", ""), JobParameterDefinition("app_configs", ""), diff --git a/sql-migration-assistant/main.py b/sql-migration-assistant/src/sql_migration_assistant/main.py similarity index 100% rename from sql-migration-assistant/main.py rename to sql-migration-assistant/src/sql_migration_assistant/main.py diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py index e20aabf4..7fa72374 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py @@ -1,8 +1,10 @@ import logging import os from pathlib import Path +from typing import Iterator from databricks.labs.lsql.core import StatementExecutionExt +from databricks.sdk import WorkspaceClient from databricks.sdk.errors import ResourceAlreadyExists, BadRequest from databricks.sdk.errors.platform import PermissionDenied @@ -19,12 +21,21 @@ from sql_migration_assistant.utils.upload_files_to_workspace import FileUploader -def list_files_recursive(parent_path: str, sub_path: str) -> list[str]: +def list_files_recursive(parent_path: str | Path, sub_path: str) -> Iterator[str]: # Get absolute paths of both directories dir_to_list = Path(parent_path, sub_path).resolve() base_dir = Path(parent_path).resolve() # List all files in dir_to_list and make paths relative to base_dir - return [str(file.relative_to(base_dir)) for file in dir_to_list.rglob('*') if file.is_file()] + for path in dir_to_list.rglob("*"): # Match all files and directories + # Exclude hidden files/folders, 'venv', and '.egg-info' folders + if ( + any(part.startswith(".") for part in path.parts) or # Hidden files/folders + "venv" in path.parts or # Exclude 'venv' + any(part.endswith(".egg-info") for part in path.parts) # Exclude '.egg-info' + ): + continue + if path.is_file(): # Only yield files + yield str(path.relative_to(base_dir)) class SetUpMigrationAssistant: @@ -168,24 +179,19 @@ def setup_migration_assistant(self, w, p): config = self.update_config(w, config) return config - def upload_files(self, w, path): + def upload_files(self, w: WorkspaceClient, path): # all this nastiness becomes unnecessary with lakehouse apps, or if we upload a whl it simplifies things. # But for now, this is the way. # TODO - MAKE THIS NICE!! + project_path = Path(path).parent.parent + files_to_upload = list_files_recursive(project_path, ".") + logging.info("Uploading files to workspace") print("\nUploading files to workspace") uploader = FileUploader(w) - files_to_upload = [ - "main.py", - "config.py", - "run_app_from_databricks_notebook.py", - "config.yml", - "setup.py", - *list_files_recursive(path, "src") - ] def inner(f): - full_file_path = os.path.join(path, f) + full_file_path = os.path.join(project_path, f) logging.info( f"Uploading {full_file_path} to {uploader.installer.install_folder()}/{f}" ) From 3e45275475401367289628dd368ba3d554210439 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 15:17:46 +0100 Subject: [PATCH 13/19] Formatting --- sql-migration-assistant/jobs/bronze_to_silver.py | 3 +-- .../run_app_from_databricks_notebook.py | 3 ++- sql-migration-assistant/setup.py | 4 +++- .../src/sql_migration_assistant/__init__.py | 12 +++++++----- .../sql_migration_assistant/utils/configloader.py | 7 ++++--- .../sql_migration_assistant/utils/initialsetup.py | 6 +++--- .../sql_migration_assistant/utils/runindatabricks.py | 1 - 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/sql-migration-assistant/jobs/bronze_to_silver.py b/sql-migration-assistant/jobs/bronze_to_silver.py index 1b573a1c..3413dc11 100644 --- a/sql-migration-assistant/jobs/bronze_to_silver.py +++ b/sql-migration-assistant/jobs/bronze_to_silver.py @@ -1,13 +1,12 @@ # Databricks notebook source # DBTITLE 1,get params import json + from pyspark.sql.types import ( - ArrayType, StructType, StructField, StringType, MapType, - IntegerType, ) agent_configs = json.loads(dbutils.widgets.get("agent_configs")) diff --git a/sql-migration-assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py index 68086bdd..78eb15ef 100644 --- a/sql-migration-assistant/run_app_from_databricks_notebook.py +++ b/sql-migration-assistant/run_app_from_databricks_notebook.py @@ -7,7 +7,8 @@ # MAGIC If you want to share the app with users outside of Databricks, for example so non technical SMEs can contribute to LLM prompt development, the notebook needs to run on a no isolation shared cluster. # COMMAND ---------- -%pip install . +%pip +install. # COMMAND ---------- dbutils.library.restartPython() diff --git a/sql-migration-assistant/setup.py b/sql-migration-assistant/setup.py index a98b1808..99e51c3d 100644 --- a/sql-migration-assistant/setup.py +++ b/sql-migration-assistant/setup.py @@ -1,10 +1,12 @@ from setuptools import setup, find_packages + # Read the requirements.txt file def load_requirements(filename="requirements.txt"): with open(filename, "r") as file: return file.read().splitlines() + setup( name="sql_migration_assistant", version="0.1", @@ -21,4 +23,4 @@ def load_requirements(filename="requirements.txt"): ], install_requires=load_requirements(), python_requires='>=3.10', -) \ No newline at end of file +) diff --git a/sql-migration-assistant/src/sql_migration_assistant/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/__init__.py index f5ca64f0..5a49aea6 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/__init__.py +++ b/sql-migration-assistant/src/sql_migration_assistant/__init__.py @@ -1,9 +1,11 @@ -from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant -from databricks.sdk import WorkspaceClient -from databricks.labs.blueprint.tui import Prompts -import yaml from pathlib import Path +import yaml +from databricks.labs.blueprint.tui import Prompts +from databricks.sdk import WorkspaceClient + +from sql_migration_assistant.utils.initialsetup import SetUpMigrationAssistant + def hello(**kwargs): w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1", profile=kwargs.get("profile")) @@ -17,4 +19,4 @@ def hello(**kwargs): with open(local_config, "w") as f: yaml.dump(final_config, f) setter_upper.upload_files(w, current_path) - setter_upper.launch_review_app(w, final_config) \ No newline at end of file + setter_upper.launch_review_app(w, final_config) diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py index 70507de8..94d40656 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py @@ -1,9 +1,9 @@ import os +from importlib.resources import files import yaml from databricks.sdk import WorkspaceClient -import yaml -from importlib.resources import files + def load_config(): # Access the YAML file as a resource @@ -12,6 +12,7 @@ def load_config(): config = yaml.safe_load(f) return config + class ConfigLoader: """ This is used to make it easy to transfer variables between a notebook and a workspace file using @@ -44,4 +45,4 @@ def read_yaml_to_env(self): if DATABRICKS_HOST[-1] == "/": DATABRICKS_HOST = DATABRICKS_HOST[:-1] os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST - return data \ No newline at end of file + return data diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py index 7fa72374..dbda32a5 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py @@ -29,9 +29,9 @@ def list_files_recursive(parent_path: str | Path, sub_path: str) -> Iterator[str for path in dir_to_list.rglob("*"): # Match all files and directories # Exclude hidden files/folders, 'venv', and '.egg-info' folders if ( - any(part.startswith(".") for part in path.parts) or # Hidden files/folders - "venv" in path.parts or # Exclude 'venv' - any(part.endswith(".egg-info") for part in path.parts) # Exclude '.egg-info' + any(part.startswith(".") for part in path.parts) or # Hidden files/folders + "venv" in path.parts or # Exclude 'venv' + any(part.endswith(".egg-info") for part in path.parts) # Exclude '.egg-info' ): continue if path.is_file(): # Only yield files diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py index 087805cb..5cb7af1b 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py @@ -2,7 +2,6 @@ import threading from pathlib import Path -import yaml from databricks.sdk import WorkspaceClient from databricks.sdk.runtime import * from dbtunnel import dbtunnel From 0d72473bfe765aa9159607d44a144c67db0d292c Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Thu, 21 Nov 2024 16:15:15 +0100 Subject: [PATCH 14/19] Merges main --- sql-migration-assistant/requirements.txt | 3 +- .../run_app_from_databricks_notebook.py | 3 +- .../src/sql_migration_assistant/app/llm.py | 3 - .../app/prompt_helper.py | 27 +++++++ .../src/sql_migration_assistant/config.py | 2 + .../frontend/GradioFrontend.py | 38 ++-------- .../frontend/Tabs/CodeExplanationTab.py | 48 +++++++++++- .../frontend/Tabs/FeedbackTab.py | 18 +++++ .../frontend/Tabs/InstructionsTab.py | 40 ++++++++++ .../frontend/Tabs/TranslationTab.py | 76 +++++++++++-------- .../frontend/callbacks.py | 21 ++++- .../infra/unity_catalog_infra.py | 4 +- .../utils/runindatabricks.py | 6 -- 13 files changed, 206 insertions(+), 83 deletions(-) create mode 100644 sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py create mode 100644 sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py diff --git a/sql-migration-assistant/requirements.txt b/sql-migration-assistant/requirements.txt index eed4d188..49230bde 100644 --- a/sql-migration-assistant/requirements.txt +++ b/sql-migration-assistant/requirements.txt @@ -7,4 +7,5 @@ aiohttp==3.10.5 fastapi pydantic==2.8.2 dbtunnel==0.14.6 -mlflow \ No newline at end of file +mlflow +openai \ No newline at end of file diff --git a/sql-migration-assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py index 78eb15ef..68086bdd 100644 --- a/sql-migration-assistant/run_app_from_databricks_notebook.py +++ b/sql-migration-assistant/run_app_from_databricks_notebook.py @@ -7,8 +7,7 @@ # MAGIC If you want to share the app with users outside of Databricks, for example so non technical SMEs can contribute to LLM prompt development, the notebook needs to run on a no isolation shared cluster. # COMMAND ---------- -%pip -install. +%pip install . # COMMAND ---------- dbutils.library.restartPython() diff --git a/sql-migration-assistant/src/sql_migration_assistant/app/llm.py b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py index 71e31caf..ce5f82fa 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/app/llm.py +++ b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py @@ -1,8 +1,5 @@ import gradio as gr -from databricks.sdk import WorkspaceClient -from databricks.sdk.service.serving import ChatMessage, ChatMessageRole - class LLMCalls: def __init__(self, openai_client, foundation_llm_name): diff --git a/sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py b/sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py index e69de29b..d2b040e4 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py +++ b/sql-migration-assistant/src/sql_migration_assistant/app/prompt_helper.py @@ -0,0 +1,27 @@ +import gradio as gr + + +class PromptHelper: + def __init__(self, see, catalog, schema, prompt_table): + self.see = see + self.CATALOG = catalog + self.SCHEMA = schema + self.PROMPT_TABLE = prompt_table + + def get_prompts(self, agent): + gr.Info("Retrieving Prompts...") + response = self.see.execute( + f"SELECT id, prompt, temperature, token_limit, save_time FROM {self.CATALOG}.{self.SCHEMA}.{self.PROMPT_TABLE} " + f"WHERE agent = '{agent}' " + f"ORDER BY save_time DESC " + ) + return response.result.data_array + + def save_prompt(self, agent, prompt, temperature, token_limit): + gr.Info("Saving prompt...") + self.see.execute( + f"INSERT INTO {self.CATALOG}.{self.SCHEMA}.{self.PROMPT_TABLE} " + f"(agent, prompt, temperature, token_limit, save_time) " + f"VALUES ('{agent}', '{prompt}',{temperature}, {token_limit}, CURRENT_TIMESTAMP())" + ) + gr.Info("Prompt saved") diff --git a/sql-migration-assistant/src/sql_migration_assistant/config.py b/sql-migration-assistant/src/sql_migration_assistant/config.py index adca8219..16deb817 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/config.py +++ b/sql-migration-assistant/src/sql_migration_assistant/config.py @@ -12,3 +12,5 @@ TRANSFORMATION_JOB_ID = os.environ.get("TRANSFORMATION_JOB_ID") WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION") VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH") +PROMPT_HISTORY_TABLE_NAME = os.environ.get("PROMPT_HISTORY_TABLE_NAME") +DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN') diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py index 26fc1bdc..bb4e17dd 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py @@ -3,6 +3,7 @@ from sql_migration_assistant.frontend.Tabs.BatchInputCodeTab import BatchInputCodeTab from sql_migration_assistant.frontend.Tabs.BatchOutputTab import BatchOutputTab from sql_migration_assistant.frontend.Tabs.CodeExplanationTab import CodeExplanationTab +from sql_migration_assistant.frontend.Tabs.InstructionsTab import InstructionsTab from sql_migration_assistant.frontend.Tabs.InteractiveInputCodeTab import InteractiveInputCodeTab from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import InteractiveOutputTab from sql_migration_assistant.frontend.Tabs.SimilarCodeTab import SimilarCodeTab @@ -19,39 +20,12 @@ class GradioFrontend: intro = """logo # Databricks Legion Migration Accelerator - -Legion is an AI powered tool that aims to accelerate the migration of code to Databricks for low cost and effort. It -does this by using AI to translate, explain, and make discoverable your code. - -This interface is the Legion Control Panel. Here you are able to configure the AI agents for translation and explanation -to fit your needs, incorporating your expertise and knowledge of the codebase by adjusting the AI agents' instructions. - -Legion can work in a batch or interactive fashion. - -*Interactive operation* -Fine tune the AI agents on a single file and output the result as a Databricks notebook. -Use this UI to adjust the system prompts and instructions for the AI agents to generate the best translation and intent. - -*Batch operation* -Process a Volume of files to generate Databricks notebooks. Use this UI to fine tune your agent prompts against selected - files before executing a Workflow to transform all files in the Volume, outputting Databricks notebooks with the AI - generated intent and translation. - - -Please select your mode of operation to get started. - """ def __init__(self): with gr.Blocks(theme=gr.themes.Soft()) as self.app: self.intro_markdown = gr.Markdown(self.intro) - self.operation = gr.Radio( - label="Select operation mode", - choices=["Interactive mode", "Batch mode"], - value="Interactive mode", - type="value", - interactive=True, - ) + self.instructions_tab = InstructionsTab() self.interactive_input_code_tab = InteractiveInputCodeTab() self.batch_input_code_tab = BatchInputCodeTab() @@ -108,18 +82,18 @@ def add_logic_loading_interactive_mode(self): def change_tabs_based_on_operation_mode(self): for tab in [self.batch_input_code_tab, self.batch_output_tab]: - self.operation.change( + self.instructions_tab.operation.change( lambda x: ( gr.update(visible=(x != "Interactive mode")) ), - self.operation, + self.instructions_tab.operation, tab.tab, ) for tab in [self.interactive_input_code_tab, self.interactive_output_tab]: - self.operation.change( + self.instructions_tab.operation.change( lambda x: ( gr.update(visible=(x == "Interactive mode")) ), - self.operation, + self.instructions_tab.operation, tab.tab, ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py index 00a1f218..b601c9e6 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py @@ -1,6 +1,6 @@ import gradio as gr -from sql_migration_assistant.frontend.callbacks import llm_intent_wrapper +from sql_migration_assistant.frontend.callbacks import llm_intent_wrapper, get_prompt_details, prompt_helper class CodeExplanationTab: @@ -38,8 +38,50 @@ def __init__(self): with gr.Row(): self.intent_system_prompt = gr.Textbox( label="System prompt of the LLM to generate the intent.", - value="""Your job is to explain intent of the provided SQL code. - """.strip(), + placeholder="Add your system prompt here, for example:\n" + "Explain the intent of this code with an example use case.", + lines=3 + ) + # these bits relate to saving and loading of prompts + with gr.Row(): + self.save_intent_prompt = gr.Button("Save intent prompt") + self.load_intent_prompt = gr.Button("Load intent prompt") + # hidden button and display box for saved prompts, made visible when the load button is clicked + self.intent_prompt_id_to_load = gr.Textbox( + label="Prompt ID to load", + visible=False, + placeholder="Enter the ID of the prompt to load from the table below." + ) + self.loaded_intent_prompts = gr.Dataframe( + label='Saved prompts.', + visible=False, + headers=["id", "Prompt", "Temperature", "Max Tokens", "Save Datetime"], + interactive=False, + wrap=True + ) + # get the prompts and populate the table and make it visible + self.load_intent_prompt.click( + fn=lambda: gr.update(visible=True, value=prompt_helper.get_prompts("intent_agent")), + inputs=None, + outputs=[self.loaded_intent_prompts], + ) + # make the input box for the prompt id visible + self.load_intent_prompt.click( + fn=lambda: gr.update(visible=True), + inputs=None, + outputs=[self.intent_prompt_id_to_load], + ) + + self.intent_prompt_id_to_load.change( + fn=get_prompt_details, + inputs=[self.intent_prompt_id_to_load, self.loaded_intent_prompts], + outputs=[self.intent_system_prompt, self.intent_temperature, self.intent_max_tokens] + ) + # save the prompt + self.save_intent_prompt.click( + fn=lambda prompt, temp, tokens: prompt_helper.save_prompt("intent_agent", prompt, temp, tokens), + inputs=[self.intent_system_prompt, self.intent_temperature, self.intent_max_tokens], + outputs=None ) with gr.Accordion(label="Intent Pane", open=True): diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py new file mode 100644 index 00000000..545098b8 --- /dev/null +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py @@ -0,0 +1,18 @@ +import gradio as gr + + +class FeedbackTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Instructions") as self.tab: + self.header = gr.Markdown( + """ + ## Comments? Feature Suggestions? Bugs? + + Below is the link to the Legion Github repo for you to raise an issue. + + On the right hand side of the Issue page, please assign it to **robertwhiffin**, and select the project **Legion**. + Raise the issue on the Github repo for Legion [here](https://github.com/databrickslabs/sandbox/issues/new). + """) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py new file mode 100644 index 00000000..e1de6c08 --- /dev/null +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InstructionsTab.py @@ -0,0 +1,40 @@ +import gradio as gr + + +class InstructionsTab: + header: gr.Markdown + tab: gr.Tab + + def __init__(self): + with gr.Tab(label="Instructions") as self.tab: + self.header = gr.Markdown( + """ + Legion is an AI powered tool that aims to accelerate the migration of code to Databricks for low cost and effort. It + does this by using AI to translate, explain, and make discoverable your code. + + This interface is the Legion Control Panel. Here you are able to configure the AI agents for translation and explanation + to fit your needs, incorporating your expertise and knowledge of the codebase by adjusting the AI agents' instructions. + + Legion can work in a batch or interactive fashion. + + *Interactive operation* + Fine tune the AI agents on a single file and output the result as a Databricks notebook. + Use this UI to adjust the system prompts and instructions for the AI agents to generate the best translation and intent. + + *Batch operation* + Process a Volume of files to generate Databricks notebooks. Use this UI to fine tune your agent prompts against selected + files before executing a Workflow to transform all files in the Volume, outputting Databricks notebooks with the AI + generated intent and translation. + + + Please select your mode of operation to get started. + + """ + ) + self.operation = gr.Radio( + label="Select operation mode", + choices=["Interactive mode", "Batch mode"], + value="Interactive mode", + type="value", + interactive=True, + ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py index 75128cf0..435435f8 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py @@ -1,6 +1,6 @@ import gradio as gr -from sql_migration_assistant.frontend.callbacks import llm_translate_wrapper +from sql_migration_assistant.frontend.callbacks import llm_translate_wrapper, prompt_helper, get_prompt_details class TranslationTab: @@ -36,39 +36,53 @@ def __init__(self): with gr.Row(): self.translation_system_prompt = gr.Textbox( label="Instructions for the LLM translation tool.", - value=""" - You are an expert in multiple SQL dialects. You only reply with SQL code and with no other text. - Your purpose is to translate the given SQL query to Databricks Spark SQL. - You must follow these rules: - - You must keep all original catalog, schema, table, and field names. - - Convert all dates to dd-MMM-yyyy format using the date_format() function. - - Subqueries must end with a semicolon. - - Ensure queries do not have # or @ symbols. - - ONLY if the original query uses temporary tables (e.g. "INTO #temptable"), re-write these as either CREATE OR REPLACE TEMPORARY VIEW or CTEs. . - - Square brackets must be replaced with backticks. - - Custom field names should be surrounded by backticks. - - Ensure queries do not have # or @ symbols. - - Only if the original query contains DECLARE and SET statements, re-write them according to the following format: - DECLARE VARIABLE variable TYPE DEFAULT value; For example: DECLARE VARIABLE number INT DEFAULT 9; - SET VAR variable = value; For example: SET VAR number = 9; - - Write an initial draft of the translated query. Then double check the output for common mistakes, including: - - Using NOT IN with NULL values - - Using UNION when UNION ALL should have been used - - Using BETWEEN for exclusive ranges - - Data type mismatch in predicates - - Properly quoting identifiers - - Using the correct number of arguments for functions - - Casting to the correct data type - - Using the proper columns for joins - - Return the final translated query only. Include comments. Include only SQL. - """.strip(), - lines=20, + placeholder="Add your system prompt here, for example:\n" + "Translate this code to Spark SQL.", + lines=3 ) + with gr.Row(): + self.save_translation_prompt = gr.Button("Save translation prompt") + self.load_translation_prompt = gr.Button("Load translation prompt") + # hidden button and display box for saved prompts, made visible when the load button is clicked + self.translation_prompt_id_to_load = gr.Textbox( + label="Prompt ID to load", + visible=False, + placeholder="Enter the ID of the prompt to load from the table below." + ) + self.loaded_translation_prompts = gr.Dataframe( + label='Saved prompts.', + visible=False, + headers=["id", "Prompt", "Temperature", "Max Tokens", "Save Datetime"], + interactive=False, + wrap=True + ) + # get the prompts and populate the table and make it visible + self.load_translation_prompt.click( + fn=lambda: gr.update(visible=True, value=prompt_helper.get_prompts("translation_agent")), + inputs=None, + outputs=[self.loaded_translation_prompts], + ) + # make the input box for the prompt id visible + self.load_translation_prompt.click( + fn=lambda: gr.update(visible=True), + inputs=None, + outputs=[self.translation_prompt_id_to_load], + ) + # retrive the row from the table and populate the system prompt, temperature, and max tokens + self.translation_prompt_id_to_load.change( + fn=get_prompt_details, + inputs=[self.translation_prompt_id_to_load, self.loaded_translation_prompts], + outputs=[self.translation_system_prompt, self.translation_temperature, self.translation_max_tokens] + ) + self.save_translation_prompt.click( + fn=lambda prompt, temp, tokens: prompt_helper.save_prompt("translation_agent", prompt, temp, + tokens), + inputs=[self.translation_system_prompt, self.translation_temperature, self.translation_max_tokens], + outputs=None + ) with gr.Accordion(label="Translation Pane", open=True): - gr.Markdown(""" ### Input your code here for translation to Spark-SQL.""") + gr.Markdown(""" ### Source code for translation to Spark-SQL.""") # a button labelled translate self.translate_button = gr.Button("Translate") with gr.Row(): diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py index c0321b02..773fbb6d 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py @@ -7,8 +7,10 @@ from databricks.labs.lsql.core import StatementExecutionExt from databricks.sdk import WorkspaceClient from databricks.sdk.service.workspace import ImportFormat, Language +from openai import OpenAI from sql_migration_assistant.app.llm import LLMCalls +from sql_migration_assistant.app.prompt_helper import PromptHelper from sql_migration_assistant.app.similar_code import SimilarCode from sql_migration_assistant.config import ( FOUNDATION_MODEL_NAME, @@ -20,13 +22,20 @@ VS_INDEX_NAME, DATABRICKS_HOST, TRANSFORMATION_JOB_ID, - WORKSPACE_LOCATION, VOLUME_NAME, + WORKSPACE_LOCATION, VOLUME_NAME, DATABRICKS_TOKEN, PROMPT_HISTORY_TABLE_NAME, +) + +openai_client = OpenAI( + api_key=DATABRICKS_TOKEN, + base_url=f"{DATABRICKS_HOST}/serving-endpoints" ) w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") see = StatementExecutionExt(w, warehouse_id=SQL_WAREHOUSE_ID) -translation_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME) -intent_llm = LLMCalls(foundation_llm_name=FOUNDATION_MODEL_NAME) +translation_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME) +intent_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME) + +prompt_helper = PromptHelper(see=see, catalog=CATALOG, schema=SCHEMA, prompt_table=PROMPT_HISTORY_TABLE_NAME) similar_code_helper = SimilarCode( workspace_client=w, see=see, @@ -172,3 +181,9 @@ def save_intent_wrapper(input_code, explained): gr.Info("Saving intent") similar_code_helper.save_intent(input_code, explained) gr.Info("Intent saved") + +# retreive the row from the table and populate the system prompt, temperature, and max tokens +def get_prompt_details(prompt_id, prompts): + prompt = prompts[prompts["id"] == prompt_id] + return [prompt["Prompt"].values[0], prompt["Temperature"].values[0], + prompt["Max Tokens"].values[0]] \ No newline at end of file diff --git a/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py index c76e8d9a..45c6441d 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py +++ b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py @@ -39,8 +39,8 @@ def __init__( # user cannot change these values self.tables = { - "code_intent" : f"(id BIGINT, code STRING, intent STRING) TBLPROPERTIES (delta.enableChangeDataFeed = true)", - "prompt_history" : f"(id BIGINT GENERATED ALWAYS AS IDENTITY, agent STRING, prompt STRING, temperature DOUBLE, token_limit INT, save_time TIMESTAMP)", + "code_intent": f"(id BIGINT, code STRING, intent STRING) TBLPROPERTIES (delta.enableChangeDataFeed = true)", + "prompt_history": f"(id BIGINT GENERATED ALWAYS AS IDENTITY, agent STRING, prompt STRING, temperature DOUBLE, token_limit INT, save_time TIMESTAMP)", } self.volume_name = "sql_migration_assistant_volume" self.volume_dirs = { diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py index d9202aa7..c0723118 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/runindatabricks.py @@ -1,13 +1,9 @@ # this is only run from within databricks, hence the import doesn't work in IDE -import threading from pathlib import Path -from databricks.sdk import WorkspaceClient -from databricks.sdk.runtime import * from dbtunnel import dbtunnel from sql_migration_assistant.utils.configloader import ConfigLoader -from sql_migration_assistant.utils.run_review_app import RunReviewApp current_folder = Path(__file__).parent.resolve() @@ -18,5 +14,3 @@ def run_app(): dbtunnel.kill_port(8080) app = str(Path(current_folder, "..", "main.py").absolute()) dbtunnel.gradio(path=app).run() - - From 88aa9c5eccde12bb49364da4a7632bd5f4ca3f25 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 26 Nov 2024 11:40:40 +0100 Subject: [PATCH 15/19] Changed formatting to black --- sql-migration-assistant/setup.py | 4 +- .../src/sql_migration_assistant/__init__.py | 6 +- .../src/sql_migration_assistant/app/llm.py | 4 +- .../app/similar_code.py | 16 +++--- .../src/sql_migration_assistant/config.py | 2 +- .../frontend/GradioFrontend.py | 44 ++++++++++----- .../frontend/Tabs/BatchInputCodeTab.py | 12 +++- .../frontend/Tabs/CodeExplanationTab.py | 56 ++++++++++++++----- .../frontend/Tabs/FeedbackTab.py | 3 +- .../frontend/Tabs/InteractiveOutputTab.py | 4 +- .../frontend/Tabs/TranslationTab.py | 53 +++++++++++++----- .../frontend/callbacks.py | 38 +++++++++---- .../infra/jobs_infra.py | 6 +- .../infra/model_def.py | 48 ++++++++-------- .../infra/unity_catalog_infra.py | 10 ++-- .../infra/vector_search_infra.py | 4 +- .../utils/configloader.py | 4 +- .../utils/initialsetup.py | 8 ++- .../utils/run_review_app.py | 2 +- .../utils/uc_model_version.py | 14 ++--- .../utils/upload_files_to_workspace.py | 10 ++-- sql_migration_assistant/gradio_app.py | 0 22 files changed, 226 insertions(+), 122 deletions(-) delete mode 100644 sql_migration_assistant/gradio_app.py diff --git a/sql-migration-assistant/setup.py b/sql-migration-assistant/setup.py index 99e51c3d..4cb68723 100644 --- a/sql-migration-assistant/setup.py +++ b/sql-migration-assistant/setup.py @@ -14,7 +14,7 @@ def load_requirements(filename="requirements.txt"): package_dir={"": "src"}, include_package_data=True, # Include files specified in MANIFEST.in package_data={ - 'sql_migration_assistant': ['config.yml'], # Include YAML file + "sql_migration_assistant": ["config.yml"], # Include YAML file }, classifiers=[ "Programming Language :: Python :: 3", @@ -22,5 +22,5 @@ def load_requirements(filename="requirements.txt"): "Operating System :: OS Independent", ], install_requires=load_requirements(), - python_requires='>=3.10', + python_requires=">=3.10", ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/__init__.py b/sql-migration-assistant/src/sql_migration_assistant/__init__.py index 5a49aea6..02138bad 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/__init__.py +++ b/sql-migration-assistant/src/sql_migration_assistant/__init__.py @@ -8,7 +8,11 @@ def hello(**kwargs): - w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1", profile=kwargs.get("profile")) + w = WorkspaceClient( + product="sql_migration_assistant", + product_version="0.0.1", + profile=kwargs.get("profile"), + ) p = Prompts() setter_upper = SetUpMigrationAssistant() setter_upper.check_cloud(w) diff --git a/sql-migration-assistant/src/sql_migration_assistant/app/llm.py b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py index ce5f82fa..6769d89d 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/app/llm.py +++ b/sql-migration-assistant/src/sql_migration_assistant/app/llm.py @@ -41,7 +41,7 @@ def call_llm(self, messages, max_tokens, temperature): def llm_translate(self, system_prompt, input_code, max_tokens, temperature): messages = [ {"role": "system", "content": system_prompt}, - {"role": "user", "content": input_code} + {"role": "user", "content": input_code}, ] # call the LLM end point. @@ -55,7 +55,7 @@ def llm_translate(self, system_prompt, input_code, max_tokens, temperature): def llm_intent(self, system_prompt, input_code, max_tokens, temperature): messages = [ {"role": "system", "content": system_prompt}, - {"role": "user", "content": input_code} + {"role": "user", "content": input_code}, ] # call the LLM end point. diff --git a/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py b/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py index 9ea6effa..4bef3911 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py +++ b/sql-migration-assistant/src/sql_migration_assistant/app/similar_code.py @@ -5,14 +5,14 @@ class SimilarCode: def __init__( - self, - workspace_client: WorkspaceClient, - see: StatementExecutionExt, - catalog, - schema, - code_intent_table_name, - VS_index_name, - VS_endpoint_name, + self, + workspace_client: WorkspaceClient, + see: StatementExecutionExt, + catalog, + schema, + code_intent_table_name, + VS_index_name, + VS_endpoint_name, ): self.w = workspace_client self.see = see diff --git a/sql-migration-assistant/src/sql_migration_assistant/config.py b/sql-migration-assistant/src/sql_migration_assistant/config.py index 16deb817..eb44faf0 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/config.py +++ b/sql-migration-assistant/src/sql_migration_assistant/config.py @@ -13,4 +13,4 @@ WORKSPACE_LOCATION = os.environ.get("WORKSPACE_LOCATION") VOLUME_NAME_INPUT_PATH = os.environ.get("VOLUME_NAME_INPUT_PATH") PROMPT_HISTORY_TABLE_NAME = os.environ.get("PROMPT_HISTORY_TABLE_NAME") -DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN') +DATABRICKS_TOKEN = os.environ.get("DATABRICKS_TOKEN") diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py index bb4e17dd..f8318e3f 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/GradioFrontend.py @@ -4,8 +4,12 @@ from sql_migration_assistant.frontend.Tabs.BatchOutputTab import BatchOutputTab from sql_migration_assistant.frontend.Tabs.CodeExplanationTab import CodeExplanationTab from sql_migration_assistant.frontend.Tabs.InstructionsTab import InstructionsTab -from sql_migration_assistant.frontend.Tabs.InteractiveInputCodeTab import InteractiveInputCodeTab -from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import InteractiveOutputTab +from sql_migration_assistant.frontend.Tabs.InteractiveInputCodeTab import ( + InteractiveInputCodeTab, +) +from sql_migration_assistant.frontend.Tabs.InteractiveOutputTab import ( + InteractiveOutputTab, +) from sql_migration_assistant.frontend.Tabs.SimilarCodeTab import SimilarCodeTab from sql_migration_assistant.frontend.Tabs.TranslationTab import TranslationTab from sql_migration_assistant.frontend.callbacks import ( @@ -35,8 +39,13 @@ def __init__(self): self.batch_output_tab = BatchOutputTab() self.interactive_output_tab = InteractiveOutputTab() - self.similar_code_tab.submit.click(save_intent_wrapper, inputs=[self.translation_tab.translation_input_code, - self.code_explanation_tab.explained]) + self.similar_code_tab.submit.click( + save_intent_wrapper, + inputs=[ + self.translation_tab.translation_input_code, + self.code_explanation_tab.explained, + ], + ) self.batch_output_tab.execute.click( exectute_workflow, inputs=[ @@ -50,8 +59,12 @@ def __init__(self): outputs=self.batch_output_tab.run_status, ) self.interactive_output_tab.produce_preview_button.click( - produce_preview, inputs=[self.code_explanation_tab.explained, self.translation_tab.translated], - outputs=self.interactive_output_tab.preview + produce_preview, + inputs=[ + self.code_explanation_tab.explained, + self.translation_tab.translated, + ], + outputs=self.interactive_output_tab.preview, ) self.add_logic_loading_batch_mode() self.add_logic_loading_interactive_mode() @@ -66,8 +79,11 @@ def add_logic_loading_batch_mode(self): ]: self.batch_input_code_tab.select_code_file.select( fn=read_code_file, - inputs=[self.batch_input_code_tab.volume_path, self.batch_input_code_tab.select_code_file], - outputs=output + inputs=[ + self.batch_input_code_tab.volume_path, + self.batch_input_code_tab.select_code_file, + ], + outputs=output, ) def add_logic_loading_interactive_mode(self): @@ -77,23 +93,21 @@ def add_logic_loading_interactive_mode(self): self.similar_code_tab.similar_code_input, ]: self.interactive_input_code_tab.interactive_code_button.click( - fn=lambda x: gr.update(value=x), inputs=self.interactive_input_code_tab.interactive_code, outputs=output + fn=lambda x: gr.update(value=x), + inputs=self.interactive_input_code_tab.interactive_code, + outputs=output, ) def change_tabs_based_on_operation_mode(self): for tab in [self.batch_input_code_tab, self.batch_output_tab]: self.instructions_tab.operation.change( - lambda x: ( - gr.update(visible=(x != "Interactive mode")) - ), + lambda x: (gr.update(visible=(x != "Interactive mode"))), self.instructions_tab.operation, tab.tab, ) for tab in [self.interactive_input_code_tab, self.interactive_output_tab]: self.instructions_tab.operation.change( - lambda x: ( - gr.update(visible=(x == "Interactive mode")) - ), + lambda x: (gr.update(visible=(x == "Interactive mode"))), self.instructions_tab.operation, tab.tab, ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py index 4a4d4f26..06739b49 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/BatchInputCodeTab.py @@ -1,6 +1,12 @@ import gradio as gr -from sql_migration_assistant.config import DATABRICKS_HOST, CATALOG, SCHEMA, VOLUME_NAME, VOLUME_NAME_INPUT_PATH +from sql_migration_assistant.config import ( + DATABRICKS_HOST, + CATALOG, + SCHEMA, + VOLUME_NAME, + VOLUME_NAME_INPUT_PATH, +) from sql_migration_assistant.frontend.callbacks import list_files @@ -24,6 +30,8 @@ def __init__(self): self.load_files = gr.Button("Load Files from Volume") self.select_code_file = gr.Radio(label="Select Code File") - self.selected_file = gr.Code(label="Selected Code File", language="sql-msSQL") + self.selected_file = gr.Code( + label="Selected Code File", language="sql-msSQL" + ) self.load_files.click(list_files, self.volume_path, self.select_code_file) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py index b601c9e6..7d5497b0 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py @@ -1,6 +1,10 @@ import gradio as gr -from sql_migration_assistant.frontend.callbacks import llm_intent_wrapper, get_prompt_details, prompt_helper +from sql_migration_assistant.frontend.callbacks import ( + llm_intent_wrapper, + get_prompt_details, + prompt_helper, +) class CodeExplanationTab: @@ -39,8 +43,8 @@ def __init__(self): self.intent_system_prompt = gr.Textbox( label="System prompt of the LLM to generate the intent.", placeholder="Add your system prompt here, for example:\n" - "Explain the intent of this code with an example use case.", - lines=3 + "Explain the intent of this code with an example use case.", + lines=3, ) # these bits relate to saving and loading of prompts with gr.Row(): @@ -50,18 +54,27 @@ def __init__(self): self.intent_prompt_id_to_load = gr.Textbox( label="Prompt ID to load", visible=False, - placeholder="Enter the ID of the prompt to load from the table below." + placeholder="Enter the ID of the prompt to load from the table below.", ) self.loaded_intent_prompts = gr.Dataframe( - label='Saved prompts.', + label="Saved prompts.", visible=False, - headers=["id", "Prompt", "Temperature", "Max Tokens", "Save Datetime"], + headers=[ + "id", + "Prompt", + "Temperature", + "Max Tokens", + "Save Datetime", + ], interactive=False, - wrap=True + wrap=True, ) # get the prompts and populate the table and make it visible self.load_intent_prompt.click( - fn=lambda: gr.update(visible=True, value=prompt_helper.get_prompts("intent_agent")), + fn=lambda: gr.update( + visible=True, + value=prompt_helper.get_prompts("intent_agent"), + ), inputs=None, outputs=[self.loaded_intent_prompts], ) @@ -74,14 +87,27 @@ def __init__(self): self.intent_prompt_id_to_load.change( fn=get_prompt_details, - inputs=[self.intent_prompt_id_to_load, self.loaded_intent_prompts], - outputs=[self.intent_system_prompt, self.intent_temperature, self.intent_max_tokens] + inputs=[ + self.intent_prompt_id_to_load, + self.loaded_intent_prompts, + ], + outputs=[ + self.intent_system_prompt, + self.intent_temperature, + self.intent_max_tokens, + ], ) # save the prompt self.save_intent_prompt.click( - fn=lambda prompt, temp, tokens: prompt_helper.save_prompt("intent_agent", prompt, temp, tokens), - inputs=[self.intent_system_prompt, self.intent_temperature, self.intent_max_tokens], - outputs=None + fn=lambda prompt, temp, tokens: prompt_helper.save_prompt( + "intent_agent", prompt, temp, tokens + ), + inputs=[ + self.intent_system_prompt, + self.intent_temperature, + self.intent_max_tokens, + ], + outputs=None, ) with gr.Accordion(label="Intent Pane", open=True): @@ -105,7 +131,9 @@ def __init__(self): # divider subheader gr.Markdown(""" ## Code intent""") # output box of the T-SQL translated to Spark SQL - self.explained = gr.Textbox(label="AI generated intent of your code.") + self.explained = gr.Textbox( + label="AI generated intent of your code." + ) # reset hidden chat history and prompt # do translation diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py index 545098b8..82bbf7cb 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/FeedbackTab.py @@ -15,4 +15,5 @@ def __init__(self): On the right hand side of the Issue page, please assign it to **robertwhiffin**, and select the project **Legion**. Raise the issue on the Github repo for Legion [here](https://github.com/databrickslabs/sandbox/issues/new). - """) + """ + ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py index 8555c470..cf23ad73 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/InteractiveOutputTab.py @@ -38,7 +38,9 @@ def __init__(self): with gr.Column(): self.file_name = gr.Textbox(label="Filename for the notebook") self.write_to_workspace_button = gr.Button("Write to Workspace") - self.adhoc_write_output = gr.Markdown(label="Notebook output location") + self.adhoc_write_output = gr.Markdown( + label="Notebook output location" + ) self.preview = gr.Code(label="Preview", language="python") diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py index 435435f8..48552680 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/TranslationTab.py @@ -1,6 +1,10 @@ import gradio as gr -from sql_migration_assistant.frontend.callbacks import llm_translate_wrapper, prompt_helper, get_prompt_details +from sql_migration_assistant.frontend.callbacks import ( + llm_translate_wrapper, + prompt_helper, + get_prompt_details, +) class TranslationTab: @@ -37,8 +41,8 @@ def __init__(self): self.translation_system_prompt = gr.Textbox( label="Instructions for the LLM translation tool.", placeholder="Add your system prompt here, for example:\n" - "Translate this code to Spark SQL.", - lines=3 + "Translate this code to Spark SQL.", + lines=3, ) with gr.Row(): self.save_translation_prompt = gr.Button("Save translation prompt") @@ -47,18 +51,27 @@ def __init__(self): self.translation_prompt_id_to_load = gr.Textbox( label="Prompt ID to load", visible=False, - placeholder="Enter the ID of the prompt to load from the table below." + placeholder="Enter the ID of the prompt to load from the table below.", ) self.loaded_translation_prompts = gr.Dataframe( - label='Saved prompts.', + label="Saved prompts.", visible=False, - headers=["id", "Prompt", "Temperature", "Max Tokens", "Save Datetime"], + headers=[ + "id", + "Prompt", + "Temperature", + "Max Tokens", + "Save Datetime", + ], interactive=False, - wrap=True + wrap=True, ) # get the prompts and populate the table and make it visible self.load_translation_prompt.click( - fn=lambda: gr.update(visible=True, value=prompt_helper.get_prompts("translation_agent")), + fn=lambda: gr.update( + visible=True, + value=prompt_helper.get_prompts("translation_agent"), + ), inputs=None, outputs=[self.loaded_translation_prompts], ) @@ -71,14 +84,26 @@ def __init__(self): # retrive the row from the table and populate the system prompt, temperature, and max tokens self.translation_prompt_id_to_load.change( fn=get_prompt_details, - inputs=[self.translation_prompt_id_to_load, self.loaded_translation_prompts], - outputs=[self.translation_system_prompt, self.translation_temperature, self.translation_max_tokens] + inputs=[ + self.translation_prompt_id_to_load, + self.loaded_translation_prompts, + ], + outputs=[ + self.translation_system_prompt, + self.translation_temperature, + self.translation_max_tokens, + ], ) self.save_translation_prompt.click( - fn=lambda prompt, temp, tokens: prompt_helper.save_prompt("translation_agent", prompt, temp, - tokens), - inputs=[self.translation_system_prompt, self.translation_temperature, self.translation_max_tokens], - outputs=None + fn=lambda prompt, temp, tokens: prompt_helper.save_prompt( + "translation_agent", prompt, temp, tokens + ), + inputs=[ + self.translation_system_prompt, + self.translation_temperature, + self.translation_max_tokens, + ], + outputs=None, ) with gr.Accordion(label="Translation Pane", open=True): diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py index 773fbb6d..040cb5e9 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/callbacks.py @@ -22,12 +22,14 @@ VS_INDEX_NAME, DATABRICKS_HOST, TRANSFORMATION_JOB_ID, - WORKSPACE_LOCATION, VOLUME_NAME, DATABRICKS_TOKEN, PROMPT_HISTORY_TABLE_NAME, + WORKSPACE_LOCATION, + VOLUME_NAME, + DATABRICKS_TOKEN, + PROMPT_HISTORY_TABLE_NAME, ) openai_client = OpenAI( - api_key=DATABRICKS_TOKEN, - base_url=f"{DATABRICKS_HOST}/serving-endpoints" + api_key=DATABRICKS_TOKEN, base_url=f"{DATABRICKS_HOST}/serving-endpoints" ) w = WorkspaceClient(product="sql_migration_assistant", product_version="0.0.1") @@ -35,7 +37,9 @@ translation_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME) intent_llm = LLMCalls(openai_client, foundation_llm_name=FOUNDATION_MODEL_NAME) -prompt_helper = PromptHelper(see=see, catalog=CATALOG, schema=SCHEMA, prompt_table=PROMPT_HISTORY_TABLE_NAME) +prompt_helper = PromptHelper( + see=see, catalog=CATALOG, schema=SCHEMA, prompt_table=PROMPT_HISTORY_TABLE_NAME +) similar_code_helper = SimilarCode( workspace_client=w, see=see, @@ -70,7 +74,9 @@ def llm_intent_wrapper(system_prompt, input_code, max_tokens, temperature): def llm_translate_wrapper(system_prompt, input_code, max_tokens, temperature): - translated_code = translation_llm.llm_translate(system_prompt, input_code, max_tokens, temperature) + translated_code = translation_llm.llm_translate( + system_prompt, input_code, max_tokens, temperature + ) return translated_code @@ -116,8 +122,14 @@ def write_adhoc_to_workspace(file_name, preview): return output_message -def exectute_workflow(intent_prompt, intent_temperature, intent_max_tokens, translation_prompt, translation_temperature, - translation_max_tokens): +def exectute_workflow( + intent_prompt, + intent_temperature, + intent_max_tokens, + translation_prompt, + translation_temperature, + translation_max_tokens, +): gr.Info("Beginning code transformation workflow") agent_config_payload = [ [ @@ -149,7 +161,9 @@ def exectute_workflow(intent_prompt, intent_temperature, intent_max_tokens, tran "CATALOG": os.environ.get("CATALOG"), "SCHEMA": os.environ.get("SCHEMA"), "DATABRICKS_HOST": DATABRICKS_HOST, - "DATABRICKS_TOKEN_SECRET_SCOPE": os.environ.get("DATABRICKS_TOKEN_SECRET_SCOPE"), + "DATABRICKS_TOKEN_SECRET_SCOPE": os.environ.get( + "DATABRICKS_TOKEN_SECRET_SCOPE" + ), "DATABRICKS_TOKEN_SECRET_KEY": os.environ.get("DATABRICKS_TOKEN_SECRET_KEY"), "CODE_INTENT_TABLE_NAME": os.environ.get("CODE_INTENT_TABLE_NAME"), "WORKSPACE_LOCATION": WORKSPACE_LOCATION, @@ -182,8 +196,12 @@ def save_intent_wrapper(input_code, explained): similar_code_helper.save_intent(input_code, explained) gr.Info("Intent saved") + # retreive the row from the table and populate the system prompt, temperature, and max tokens def get_prompt_details(prompt_id, prompts): prompt = prompts[prompts["id"] == prompt_id] - return [prompt["Prompt"].values[0], prompt["Temperature"].values[0], - prompt["Max Tokens"].values[0]] \ No newline at end of file + return [ + prompt["Prompt"].values[0], + prompt["Temperature"].values[0], + prompt["Max Tokens"].values[0], + ] diff --git a/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py index d6ebcefc..e9d26e94 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py +++ b/sql-migration-assistant/src/sql_migration_assistant/infra/jobs_infra.py @@ -24,9 +24,9 @@ class JobsInfra: def __init__( - self, - config, - workspace_client: WorkspaceClient, + self, + config, + workspace_client: WorkspaceClient, ): self.w = workspace_client self.config = config diff --git a/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py b/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py index bfe5eb1e..a8c1ff18 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py +++ b/sql-migration-assistant/src/sql_migration_assistant/infra/model_def.py @@ -122,15 +122,15 @@ def format_chat_history_for_prompt(chat_messages_array): # RAG Chain ############ is_question_about_sql_chain = ( - { - "question": itemgetter("messages") - | RunnableLambda(extract_user_query_string), - "formatted_chat_history": itemgetter("messages") - | RunnableLambda(extract_chat_history), - } - | is_question_relevant_prompt - | model - | StrOutputParser() + { + "question": itemgetter("messages") + | RunnableLambda(extract_user_query_string), + "formatted_chat_history": itemgetter("messages") + | RunnableLambda(extract_chat_history), + } + | is_question_relevant_prompt + | model + | StrOutputParser() ) irrelevant_question_chain = RunnableLambda( @@ -154,15 +154,15 @@ def format_chat_history_for_prompt(chat_messages_array): ) chain = ( - RunnablePassthrough() - | { - "system": itemgetter("system"), - "question": itemgetter("question"), - "formatted_chat_history": itemgetter("chat_history"), - } - | prompt - | model - | StrOutputParser() + RunnablePassthrough() + | { + "system": itemgetter("system"), + "question": itemgetter("question"), + "formatted_chat_history": itemgetter("chat_history"), + } + | prompt + | model + | StrOutputParser() ) branch_node = RunnableBranch( @@ -178,12 +178,12 @@ def format_chat_history_for_prompt(chat_messages_array): ) full_chain = { - "question_is_relevant": is_question_about_sql_chain, - "question": itemgetter("messages") | RunnableLambda(extract_user_query_string), - "system": itemgetter("messages") | RunnableLambda(extract_system_prompt_string), - "chat_history": itemgetter("messages") - | RunnableLambda(format_chat_history_for_prompt), - } | branch_node + "question_is_relevant": is_question_about_sql_chain, + "question": itemgetter("messages") | RunnableLambda(extract_user_query_string), + "system": itemgetter("messages") | RunnableLambda(extract_system_prompt_string), + "chat_history": itemgetter("messages") + | RunnableLambda(format_chat_history_for_prompt), + } | branch_node mlflow.models.set_model(model=full_chain) diff --git a/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py index 45c6441d..26a36a12 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py +++ b/sql-migration-assistant/src/sql_migration_assistant/infra/unity_catalog_infra.py @@ -21,11 +21,11 @@ class UnityCatalogInfra: def __init__( - self, - config, - workspace_client: WorkspaceClient, - p: Prompts, - see: StatementExecutionExt, + self, + config, + workspace_client: WorkspaceClient, + p: Prompts, + see: StatementExecutionExt, ): self.w = workspace_client self.config = config diff --git a/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py b/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py index b5f5705c..36e98c2b 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py +++ b/sql-migration-assistant/src/sql_migration_assistant/infra/vector_search_infra.py @@ -177,8 +177,8 @@ def create_VS_index(self): ) except NotFound as e: if ( - f"Vector search endpoint {self.migration_assistant_VS_endpoint} not found" - in str(e) + f"Vector search endpoint {self.migration_assistant_VS_endpoint} not found" + in str(e) ): logging.info( f"Waiting for Vector Search endpoint to provision. Retrying in 30 seconds." diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py index 94d40656..abee7613 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/configloader.py @@ -7,8 +7,8 @@ def load_config(): # Access the YAML file as a resource - config_path = files('sql_migration_assistant').joinpath('config.yml') - with config_path.open('r') as f: + config_path = files("sql_migration_assistant").joinpath("config.yml") + with config_path.open("r") as f: config = yaml.safe_load(f) return config diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py index dbda32a5..aafb353c 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/initialsetup.py @@ -29,9 +29,11 @@ def list_files_recursive(parent_path: str | Path, sub_path: str) -> Iterator[str for path in dir_to_list.rglob("*"): # Match all files and directories # Exclude hidden files/folders, 'venv', and '.egg-info' folders if ( - any(part.startswith(".") for part in path.parts) or # Hidden files/folders - "venv" in path.parts or # Exclude 'venv' - any(part.endswith(".egg-info") for part in path.parts) # Exclude '.egg-info' + any(part.startswith(".") for part in path.parts) + or "venv" in path.parts # Hidden files/folders + or any( # Exclude 'venv' + part.endswith(".egg-info") for part in path.parts + ) # Exclude '.egg-info' ): continue if path.is_file(): # Only yield files diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py index 54b59cf7..f0ef7d87 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py @@ -59,7 +59,7 @@ def _launch_app(self): def _get_proxy_url(self, organisation_id): def get_cloud_proxy_settings( - cloud: str, host: str, org_id: str, cluster_id: str, port: int + cloud: str, host: str, org_id: str, cluster_id: str, port: int ): cloud_norm = cloud.lower() if cloud_norm not in ["aws", "azure"]: diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py b/sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py index 5b0611e6..d9a7ca77 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/uc_model_version.py @@ -8,11 +8,11 @@ def get_latest_model_version(model_name): client = MlflowClient() model_version_infos = client.search_model_versions("name = '%s'" % model_name) return ( - max( - [ - int(model_version_info.version) - for model_version_info in model_version_infos - ] - ) - or 1 + max( + [ + int(model_version_info.version) + for model_version_info in model_version_infos + ] + ) + or 1 ) diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py index 9d7602c4..d908cb6d 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/upload_files_to_workspace.py @@ -15,9 +15,9 @@ def __init__(self, workspace_client: WorkspaceClient): self.installer = Installation(ws=self.w, product="sql-migration-assistant") def upload( - self, - file_path, - file_name, + self, + file_path, + file_name, ): with open(file_path, "rb") as file: contents = file.read() @@ -34,4 +34,6 @@ def save_config(self, config): config_class = X(**config) - self.installer.save(config_class, filename="src/sql_migration_assistant/config.yml") + self.installer.save( + config_class, filename="src/sql_migration_assistant/config.yml" + ) diff --git a/sql_migration_assistant/gradio_app.py b/sql_migration_assistant/gradio_app.py deleted file mode 100644 index e69de29b..00000000 From 107f951032e656a414e5bdf89224d3a604e454ce Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 26 Nov 2024 11:44:05 +0100 Subject: [PATCH 16/19] removed unnecessary comments --- .../src/sql_migration_assistant/utils/run_review_app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py index f0ef7d87..27087d5a 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py +++ b/sql-migration-assistant/src/sql_migration_assistant/utils/run_review_app.py @@ -50,8 +50,6 @@ def _launch_app(self): code=f""" from sql_migration_assistant.utils.runindatabricks import run_app - # set debug=True to print the app logs in this cell. - # run_app(debug=True) run_app() """ ) From 9be7b9d881011e42aad14d0daff2f2d3dfd80f64 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 26 Nov 2024 11:49:26 +0100 Subject: [PATCH 17/19] removed unnecessary class --- .../frontend/Tabs/Tab.py | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py deleted file mode 100644 index c7a94f00..00000000 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/Tab.py +++ /dev/null @@ -1,20 +0,0 @@ -from abc import ABC, abstractmethod - -import gradio as gr - - -class Tab(ABC): - header: gr.Markdown - label: str - tab: gr.Tab - - def __init__(self, header: str, label: str, **kwargs): - with gr.Tab(label=label, *+kwargs) as tab: - self.header = gr.Markdown(header) - self.tab = tab - self.build() - - @abstractmethod - def build(self): - """Build your Tab components here. Use self. to store components you need again""" - pass From a448adbeaf6251f2ea672049010eb368e9a43c43 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 26 Nov 2024 11:53:05 +0100 Subject: [PATCH 18/19] Fix for save prompt --- .../frontend/Tabs/CodeExplanationTab.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py index 7d5497b0..cb5ceb31 100644 --- a/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py +++ b/sql-migration-assistant/src/sql_migration_assistant/frontend/Tabs/CodeExplanationTab.py @@ -99,10 +99,9 @@ def __init__(self): ) # save the prompt self.save_intent_prompt.click( - fn=lambda prompt, temp, tokens: prompt_helper.save_prompt( - "intent_agent", prompt, temp, tokens - ), + fn=prompt_helper.save_prompt, inputs=[ + gr.Textbox("intent_agent"), self.intent_system_prompt, self.intent_temperature, self.intent_max_tokens, From d7a959e571fa4adc4a4497558f60ff4bb4d138b7 Mon Sep 17 00:00:00 2001 From: "sebastian.grunwald" Date: Tue, 26 Nov 2024 11:54:57 +0100 Subject: [PATCH 19/19] Removed unnecessary code --- sql-migration-assistant/run_app_from_databricks_notebook.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql-migration-assistant/run_app_from_databricks_notebook.py b/sql-migration-assistant/run_app_from_databricks_notebook.py index 68086bdd..6cc9ad3f 100644 --- a/sql-migration-assistant/run_app_from_databricks_notebook.py +++ b/sql-migration-assistant/run_app_from_databricks_notebook.py @@ -16,6 +16,4 @@ from sql_migration_assistant.utils.runindatabricks import run_app -# set debug=True to print the app logs in this cell. -# run_app(debug=True) run_app()