Load existing model #17

saurabhbh21 · 2018-07-02T05:22:01Z

I trained a model with Wikipedia dump. I was trying to load the model in another code snippet for classification but unable to find the model load code. Any help will be very much helpful.

woshiyyya · 2018-08-24T09:02:17Z

Hi! I encountered the same problem. Do you have any solution now? Thanks. @saurabhbh21

userofgithub1 · 2018-09-04T16:00:44Z

@saurabhbh21 @woshiyyya Hey! Have any of you figured how to load a trained model? Thanks a lot.

saurabhbh21 · 2018-09-05T08:29:52Z

Please look into below piece of code, it may help you to figure out way to load & store model

import os
import findspark
findspark.init()
import pyspark
sc = pyspark.SparkContext()

import cPickle as pickle

from sift.corpora import wikipedia
from sift.models import text, links

from nel.model import data
from nel.model.store import file

from nel.doc import Doc

from nel.harness.format import from_sift

from nel.process.pipeline import Pipeline
from nel.process.candidates import NameCounts
from nel.features.probability import EntityProbability, NameProbability

from nel.learn import ranking
from nel.features import meta
from nel.model import resolution
from nel.model.disambiguation import EntityCounts, NameProbability
from nel.process import resolve

from nel.harness.format import inject_markdown_links

from nel.process import tag, coref

wikipedia_base_path = 'latest/'
wikidata_base_path = 'latest/'

wikipedia_corpus = wikipedia.WikipediaCorpus()(sc, wikipedia_base_path)
docs = wikipedia.WikipediaArticles()(wikipedia_corpus).cache()

docs = sc.parallelize(docs.take(3))
list_element = docs.collect()
for element in list_element:
print('\nElement: \n', element)

wikipedia_pfx = 'en.wikipedia.org/wiki/'

ec_model = links
.EntityCounts(min_count=1, filter_target=wikipedia_pfx)
.build(docs)
.map(links.EntityCounts.format_item)

enc_model = links
.EntityNameCounts(lowercase=True, filter_target=wikipedia_pfx)
.build(docs)
.filter(lambda (name, counts): sum(counts.itervalues()) > 0)
.map(links.EntityNameCounts.format_item)

print(ec_model.collect())

os.environ['NEL_DATASTORE_URI'] = 'file:///home/botminds/Desktop/nel'

// Saves the model with name wikipedia having pipeline for ecounts followed by necount
data.ObjectStore
.Get('models:ecounts[wikipedia]')
.save_many(ec_model.collect())

data.ObjectStore
.Get('models:necounts[wikipedia]')
.save_many(enc_model.collect())

//Load the model

candidate_generation = [
NameCounts('wikipedia', 10)
]
feature_extraction = [
EntityProbability('wikipedia'),
#NameProbability('wikipedia')
]

// Training
training_pipeline = Pipeline(candidate_generation + feature_extraction)
training_docs = [from_sift(doc) for doc in docs.takeSample(False, 100)]

train = [training_pipeline(doc) for doc in training_docs]

t = training_docs[0]

ranker = ranking.TrainLinearRanker(name='ranker', features=[f.id for f in feature_extraction])(train)

// with open(file_ranker, 'wb') as f:
// pickle.dump(ranker, f, pickle.HIGHEST_PROTOCOL)

// Loading the model for ranker
ranker = None
file_ranker = 'models/ranker.pkl'

with open(file_ranker, 'rb') as f:
ranker = pickle.load(f)

classifier_feature = meta.ClassifierScore(ranker)
linking = [

classifier_feature,
resolve.FeatureRankResolver(classifier_feature.id)

]

linking_pipeline = Pipeline(candidate_generation + feature_extraction + linking)
sample = [from_sift(doc) for doc in docs.takeSample(False, 10)]

for doc in sample:
for chain in doc.chains:
chain.resolution = None
for mention in chain.mentions:
mention.resolution = None

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Load existing model #17

Load existing model #17

saurabhbh21 commented Jul 2, 2018

woshiyyya commented Aug 24, 2018

userofgithub1 commented Sep 4, 2018 •

edited

Loading

saurabhbh21 commented Sep 5, 2018 •

edited

Loading

Load existing model #17

Load existing model #17

Comments

saurabhbh21 commented Jul 2, 2018

woshiyyya commented Aug 24, 2018

userofgithub1 commented Sep 4, 2018 • edited Loading

saurabhbh21 commented Sep 5, 2018 • edited Loading

userofgithub1 commented Sep 4, 2018 •

edited

Loading

saurabhbh21 commented Sep 5, 2018 •

edited

Loading