Skip to content

Commit

Permalink
reorganized speculative reading datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
ZoeLeBlanc committed May 9, 2024
1 parent 699977e commit 35b1405
Show file tree
Hide file tree
Showing 23 changed files with 272,472 additions and 4,080 deletions.
236 changes: 128 additions & 108 deletions speculative_reading/CollaborativeFilteringRecommendations.ipynb

Large diffs are not rendered by default.

212 changes: 33 additions & 179 deletions speculative_reading/CombineRecommendations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -95,53 +95,33 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {
"id": "ytG8sYRwwkH3"
},
"outputs": [],
"source": [
"lenskit_results = pd.read_csv('./data/aggregated_sampled_scores_lenskit_model100_without_periodicals.csv')\n",
"lenskit_results = pd.read_csv('./data/lenskit_results/aggregated_sampled_predictions_model100_without_periodicals.csv')\n",
"lenskit_results['result_type'] = 'lenskit'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {
"id": "xou1IQOZbe0t"
},
"outputs": [],
"source": [
"memory_cf_results = pd.read_csv('./data/aggregated_full_scores_collaborative_filtering_without_periodicals_circulation_limited.csv')\n",
"memory_cf_results['result_type'] = 'memory_cf'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "VVoCw3Rpffz_"
},
"outputs": [],
"source": [
"memory_cf_results = memory_cf_results.rename(columns={'item_uri':'item_id', })"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "_k72BMGQf79D"
},
"outputs": [],
"source": [
"memory_cf_results = pd.read_csv('./data/collaborative_filtering_results/aggregated_full_predictions_without_periodicals_circulation_limited.csv')\n",
"memory_cf_results['result_type'] = 'memory_cf'\n",
"memory_cf_results = memory_cf_results.rename(columns={'item_uri':'item_id', })\n",
"memory_cf_results['member_period'] = memory_cf_results.member_id + ': ' + memory_cf_results.period"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"metadata": {
"id": "cFqq-LlczVxe"
},
Expand All @@ -156,7 +136,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"metadata": {
"id": "N1TO0mcBOWYE"
},
Expand All @@ -167,29 +147,19 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 8,
"metadata": {
"id": "YPg3uMM8S4AP"
},
"outputs": [],
"source": [
"popular_current = pd.read_csv('./data/popular_books_by_subscription_circulation_period.csv')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"id": "mnEc0iNoS_Lo"
},
"outputs": [],
"source": [
"popular_current = pd.read_csv('./data/popular_books_by_subscription_circulation_period.csv')\n",
"popular_all = pd.read_csv('./data/popular_books_by_library_duration.csv')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 9,
"metadata": {
"id": "YsEHz5MXAY1U"
},
Expand Down Expand Up @@ -260,7 +230,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 10,
"metadata": {
"id": "uesCW1DgCVgD"
},
Expand Down Expand Up @@ -288,7 +258,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -302,13 +272,13 @@
"data": {
"text/html": [
"\n",
"<div id=\"altair-viz-83f63da314f2477ab9c833e5e7151653\"></div>\n",
"<div id=\"altair-viz-06121d9483764597866ce652283f9d0d\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-83f63da314f2477ab9c833e5e7151653\") {\n",
" outputDiv = document.getElementById(\"altair-viz-83f63da314f2477ab9c833e5e7151653\");\n",
" if (outputDiv.id !== \"altair-viz-06121d9483764597866ce652283f9d0d\") {\n",
" outputDiv = document.getElementById(\"altair-viz-06121d9483764597866ce652283f9d0d\");\n",
" }\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
Expand Down Expand Up @@ -361,7 +331,7 @@
"alt.HConcatChart(...)"
]
},
"execution_count": 15,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -372,7 +342,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 12,
"metadata": {
"id": "Ox3oHXadegHk"
},
Expand Down Expand Up @@ -416,7 +386,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 13,
"metadata": {
"id": "N7kXeT8Myofw"
},
Expand Down Expand Up @@ -568,7 +538,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 14,
"metadata": {
"id": "ypXIx1QQgVwY"
},
Expand Down Expand Up @@ -610,7 +580,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand Down Expand Up @@ -1138,7 +1108,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 16,
"metadata": {
"id": "bps79ltxx26m"
},
Expand All @@ -1147,141 +1117,25 @@
"final_df = pd.concat(top_rows)\n",
"final_df = final_df[['member_period','period','lenskit_predicted_item', 'lenskit_median_score','lenskit_coef_variation', 'memory_cf_predicted_item', 'memory_cf_median_score','memory_cf_coef_variation','popular (current)', 'popular scores (current)', 'popular (all time)', 'popular scores (all time)', 'member_id']]\n",
"final_df.loc[final_df.memory_cf_predicted_item.str.contains('Ezra'), 'memory_cf_predicted_item'] = \"*Instigations of Ezra Pound: Together with an Essay on the Chinese Written Character*,<br>Ezra Pound and Ernest Fenollosa\"\n",
"final_df.to_csv('./data/final_top_results.csv', index=False)"
"# final_df.to_csv('./data/final_top_results.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 21,
"metadata": {
"id": "9WzN-rBJA2lj"
},
"outputs": [],
"source": [
"top_results_lenskit = pd.read_csv('./data/top_scores_lenskit_model100.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "4OgP3mGW6zR2"
},
"outputs": [],
"source": [
"top_scores_memorycf = pd.read_csv('./data/memorycf_top_results.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "flkj42wL68eu"
},
"outputs": [],
"source": [
"subset_lenskit = top_results_lenskit[['member_id', 'subscription_start', 'subscription_end', 'item_id','score','formatted_title', 'period', 'model_run']]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"id": "t23JCKeq74ug"
},
"outputs": [],
"source": [
"subset_lenskit = subset_lenskit[subset_lenskit.member_id == 'hemingway-ernest']"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"id": "-Gnr7PZG7By9"
},
"outputs": [],
"source": [
"subset_mcf = top_scores_memorycf[['item_uri', 'formatted_chart_title', 'member_id', 'period', 'subscription_start', 'subscription_end', 'metric', 'score']]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "B91_ehYeP1RC",
"outputId": "ae83cb95-50d6-4488-a89c-ae20bde625c9"
},
"outputs": [
{
"data": {
"text/plain": [
"period\n",
" 1921-12-28/1922-11-08 34\n",
" 1924-03-28/1925-03-28 29\n",
"Name: item_id, dtype: int64"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subset_lenskit.groupby('period')['item_id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YLytNfMEP62D",
"outputId": "e606eda0-a559-487b-b28c-9c796873ee32"
},
"outputs": [
{
"data": {
"text/plain": [
"period\n",
"1921-12-28/1922-11-08 30\n",
"1924-03-28/1925-03-28 47\n",
"Name: item_uri, dtype: int64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subset_mcf.groupby('period')['item_uri'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"id": "J4-sQUmSfSlb"
},
"outputs": [],
"source": [
"subset_lenskit['period'] = subset_lenskit.period.str.split(' ').str[1]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"id": "RT82h23ST8-I"
},
"outputs": [],
"source": [
"top_results_lenskit = pd.read_csv('./data/sampled_scores_lenskit_model100_without_periodicals.csv')\n",
"subset_lenskit = top_results_lenskit[['member_id', 'subscription_start', 'subscription_end', 'item_id','score','formatted_title', 'period', 'model_run']]\n",
"subset_lenskit = subset_lenskit[subset_lenskit.member_id == 'hemingway-ernest']\n",
"print(subset_lenskit.groupby('period')['item_id'].nunique())\n",
"top_scores_memorycf = pd.read_csv('./data/full_scores_collaborative_filtering_without_periodicals_circulation_limited.csv')\n",
"subset_mcf = top_scores_memorycf[['item_uri', 'formatted_chart_title', 'member_id', 'period', 'subscription_start', 'subscription_end', 'metric', 'score']]\n",
"print(subset_mcf.groupby('period')['item_uri'].nunique())\n",
"subset_lenskit['period'] = subset_lenskit.period.str.split(' ').str[1]\n",
"subset_mcf.loc[subset_mcf.formatted_chart_title.str.contains('Instigations of Ezra Pound'), 'formatted_chart_title'] = 'Instigations of Ezra Pound: Together with an Essay on the Chinese Written Character by Ezra Pound and Ernest Fenollosa'"
]
},
Expand Down
Loading

0 comments on commit 35b1405

Please sign in to comment.