diff --git a/tutorials/llm_application_tracing_evaluating_and_analysis.ipynb b/tutorials/llm_application_tracing_evaluating_and_analysis.ipynb index 50b59168e8..f540a7805e 100644 --- a/tutorials/llm_application_tracing_evaluating_and_analysis.ipynb +++ b/tutorials/llm_application_tracing_evaluating_and_analysis.ipynb @@ -46,26 +46,32 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -qq \"arize-phoenix[experimental,llama-index]\" \"openai>=1\" gcsfs nest_asyncio\n", - "\n", - "# Import Statements\n", + "!pip install -qq \"arize-phoenix[evals,llama-index]\" \"openai>=1\" gcsfs nest_asyncio llama-index-llms-openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import os\n", "from getpass import getpass\n", "from typing import List, cast\n", + "from urllib.request import urlopen\n", "\n", "import pandas as pd\n", "import phoenix as px\n", - "import requests\n", "from gcsfs import GCSFileSystem\n", - "from llama_index import (\n", + "from llama_index.core import (\n", " ServiceContext,\n", " StorageContext,\n", " load_index_from_storage,\n", " set_global_handler,\n", ")\n", - "from llama_index.embeddings import OpenAIEmbedding\n", - "from llama_index.graph_stores.simple import SimpleGraphStore\n", - "from llama_index.llms import OpenAI\n", + "from llama_index.embeddings.openai import OpenAIEmbedding\n", + "from llama_index.core.graph_stores import SimpleGraphStore\n", + "from llama_index.llms.openai import OpenAI\n", "from phoenix import TraceDataset\n", "from phoenix.trace import DocumentEvaluations, SpanEvaluations\n", "from phoenix.trace.utils import json_lines_to_df\n", @@ -85,18 +91,10 @@ "qa_correctness_eval_url = \"https://storage.googleapis.com/arize-phoenix-assets/datasets/unstructured/llm/context-retrieval/qa_correctness_eval.parquet\"\n", "retrieved_documents_eval_url = \"https://storage.googleapis.com/arize-phoenix-assets/datasets/unstructured/llm/context-retrieval/retrieved_documents_eval.parquet\"\n", "\n", - "response = requests.get(trace_jsonl_url)\n", - "\n", - "if response.status_code == 200:\n", - " with open(\"trace.jsonl\", \"wb\") as f:\n", - " f.write(response.content)\n", - " json_lines = []\n", - " with open(\"trace.jsonl\", \"r\") as f:\n", - " json_lines = cast(List[str], f.readlines())\n", - " trace_ds = TraceDataset(json_lines_to_df(json_lines))\n", - " px.launch_app(trace=trace_ds)\n", - "else:\n", - " print(f\"Failed to download the file. Status code: {response.status_code}\")\n", + "with urlopen(trace_jsonl_url) as response:\n", + " lines = [line.decode(\"utf-8\") for line in response.readlines()]\n", + "trace_ds = TraceDataset(json_lines_to_df(lines))\n", + "px.launch_app(trace=trace_ds)\n", "\n", "hallucination_eval_df = pd.read_parquet(hallucination_eval_url)\n", "qa_correctness_eval_df = pd.read_parquet(qa_correctness_eval_url)\n", @@ -244,7 +242,7 @@ "# Generating the Hallucination & Q&A Eval\n", "\n", "import nest_asyncio\n", - "from phoenix.experimental.evals import (\n", + "from phoenix.evals import (\n", " HALLUCINATION_PROMPT_RAILS_MAP,\n", " HALLUCINATION_PROMPT_TEMPLATE,\n", " QA_PROMPT_RAILS_MAP,\n", @@ -258,7 +256,7 @@ "# Creating Hallucination Eval which checks if the application hallucinated\n", "hallucination_eval = llm_classify(\n", " dataframe=queries_df,\n", - " model=OpenAIModel(\"gpt-4\", temperature=0.0),\n", + " model=OpenAIModel(model=\"gpt-4\", temperature=0.0),\n", " template=HALLUCINATION_PROMPT_TEMPLATE,\n", " rails=list(HALLUCINATION_PROMPT_RAILS_MAP.values()),\n", " provide_explanation=True, # Makes the LLM explain its reasoning\n", @@ -271,7 +269,7 @@ "# Creating Q&A Eval which checks if the application answered the question correctly\n", "qa_correctness_eval = llm_classify(\n", " dataframe=queries_df,\n", - " model=OpenAIModel(\"gpt-4\", temperature=0.0),\n", + " model=OpenAIModel(model=\"gpt-4\", temperature=0.0),\n", " template=QA_PROMPT_TEMPLATE,\n", " rails=list(QA_PROMPT_RAILS_MAP.values()),\n", " provide_explanation=True, # Makes the LLM explain its reasoning\n", @@ -326,7 +324,7 @@ "source": [ "# Generating Retrieval Relevance Eval\n", "\n", - "from phoenix.experimental.evals import (\n", + "from phoenix.evals import (\n", " RAG_RELEVANCY_PROMPT_RAILS_MAP,\n", " RAG_RELEVANCY_PROMPT_TEMPLATE,\n", " OpenAIModel,\n", @@ -335,7 +333,7 @@ "\n", "retrieved_documents_eval = llm_classify(\n", " dataframe=retrieved_documents_df,\n", - " model=OpenAIModel(\"gpt-4\", temperature=0.0),\n", + " model=OpenAIModel(model=\"gpt-4\", temperature=0.0),\n", " template=RAG_RELEVANCY_PROMPT_TEMPLATE,\n", " rails=list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values()),\n", " provide_explanation=True,\n",