Skip to content

Commit

Permalink
Changed strategy in nlp
Browse files Browse the repository at this point in the history
  • Loading branch information
rita-gama committed May 18, 2023
1 parent 5f9bab8 commit c121a52
Showing 1 changed file with 1 addition and 84 deletions.
85 changes: 1 addition & 84 deletions scripts/recognition/nlp.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,3 @@
import spacy

# ---------------- Pre-processing ---------------
#https://pyspellchecker.readthedocs.io/en/latest/

# from spellchecker import SpellChecker

# spell = SpellChecker()

# def correct_text(text):
# corrected_text = []
# phrase = text.split()
# for word in phrase:
# print(word)
# corrected_word = spell.correction(word)
# print(corrected_word)
# corrected_text.append(corrected_word)
# return " ".join(corrected_text)

# #testing
# phrase = "hell my neme is Rita. I am portgese"
# print(correct_text(phrase))

# ------------------ Correct words according to context --------

# import sparknlp

# spark = sparknlp.start()

# from sparknlp.base import *
# from sparknlp.annotator import *
# from sparknlp.common import *
# from pyspark.ml import Pipeline
# import pandas as pd

# documentAssembler = DocumentAssembler()\
# .setInputCol("text")\
# .setOutputCol("document")

# tokenizer = RecursiveTokenizer()\
# .setInputCols(["document"])\
# .setOutputCol("token")\
# .setPrefixes(["\"", "(", "[", "\n"])\
# .setSuffixes([".", ",", "?", ")","!", "'s"])

# spellModel = ContextSpellCheckerModel\
# .pretrained('spellcheck_dl')\
# .setInputCols("token")\
# .setOutputCol("checked")\
# .setErrorThreshold(4.0)\
# .setTradeoff(6.0)

# finisher = Finisher()\
# .setInputCols("checked")

# pipeline = Pipeline(stages = [
# documentAssembler,
# tokenizer,
# spellModel,
# finisher])

# empty_ds = spark.createDataFrame([[""]]).toDF("text")
# lp = LightPipeline(pipeline.fit(empty_ds))

# lp.annotate("Please alliow me tao introdduce myhell, I am a man of waelth und tiasted")




# ------------------ NLP ------------------------

#https://spacy.io/usage/spacy-101

nlp = spacy.load("en_core_web_sm")

def answer_generate(user_input):
doc = nlp(user_input)

for token in doc:
#print(token.text, token.pos_, token.dep_)
if token.text.lower() in ("hello"):
print("Hi there!")


answer_generate("Apple is looking at buying U.K. startup for $1 billion")
answer_generate("Hello there! My name is Rita")
#TODO: Naïve Bayes Classifier

0 comments on commit c121a52

Please sign in to comment.