| Type | Description |
|---|---|
| PERSON | People, including fictional. |
| NORP | Nationalities or religious or political groups. |
| FAC | Buildings, airports, highways, bridges, etc. |
| ORG | Companies, agencies, institutions, etc. |
| GPE | Countries, cities, states. |
| LOC | Non-GPE locations, mountain ranges, bodies of water. |
| PRODUCT | Objects, vehicles, foods, etc. (Not services.) |
| EVENT | Named hurricanes, battles, wars, sports events, etc. |
| WORK_OF_ART | Titles of books, songs, etc. |
| LAW | Named documents made into laws. |
| LANGUAGE | Any named language. |
| DATE | Absolute or relative dates or periods. |
| TIME | Times smaller than a day. |
| PERCENT | Percentage, including ”%“. |
| MONEY | Monetary values, including unit. |
| QUANTITY | Measurements, as of weight or distance. |
| ORDINAL | “first”, “second”, etc. |
| CARDINAL | Numerals that do not fall under another type. |
pip install spacy
python -m spacy download en
spacy_nlp = spacy.load('en_core_web_sm')
text = "John bought a Toyota camry 2019 model in Toronto in January 2020 at a cost of $38000"
doc = spacy_nlp(text.strip())
text = "John bought a Toyota camry 2019 model in Toronto in January 2020 at a cost of $38000"
doc = spacy_nlp(text.strip())
# create sets to hold words
named_entities = set()
money_entities = set()
organization_entities = set()
location_entities = set()
time_indicator_entities = set()
for i in doc.ents:
entry = str(i.lemma_).lower()
text = text.replace(str(i).lower(), "")
# Time indicator entities detection
if i.label_ in ["TIM", "DATE"]:
time_indicator_entities.add(entry)
# money value entities detection
elif i.label_ in ["MONEY"]:
money_entities.add(entry)
# organization entities detection
elif i.label_ in ["ORG"]:
organization_entities.add(entry)
# Geographical and Geographical entities detection
elif i.label_ in ["GPE", "GEO"]:
location_entities.add(entry)
# extract artifacts, events and natural phenomenon from text
elif i.label_ in ["ART", "EVE", "NAT", "PERSON"]:
named_entities.add(entry.title())
print(f"named entities - {named_entities}")
print(f"money entities - {money_entities}")
print(f"location entities - {location_entities}")
print(f"time indicator entities - {time_indicator_entities}")
print(f"organization entities - {organization_entities}")
-------------------------------------------------------------
named entities - {'John'}
money entities - {'38000'}
location entities - {'toronto'}
time indicator entities - {'2019', 'january 2020'}
organization entities - {'toyota'}
import spacy
class NamedEntityExtractor:
"""
Performs named entity recognition from texts
"""
def extract(self, text: str) :
"""
Performs named entity recognition from text
:param text: Text to extract
"""
# load spacy nlp library
spacy_nlp = spacy.load('en_core_web_sm')
# parse text into spacy document
doc = spacy_nlp(text.strip())
# create sets to hold words
named_entities = set()
money_entities = set()
organization_entities = set()
location_entities = set()
time_indicator_entities = set()
for i in doc.ents:
entry = str(i.lemma_).lower()
text = text.replace(str(i).lower(), "")
# Time indicator entities detection
if i.label_ in ["TIM", "DATE"]:
time_indicator_entities.add(entry)
# money value entities detection
elif i.label_ in ["MONEY"]:
money_entities.add(entry)
# organization entities detection
elif i.label_ in ["ORG"]:
organization_entities.add(entry)
# Geographical and Geographical entities detection
elif i.label_ in ["GPE", "GEO"]:
location_entities.add(entry)
# extract artifacts, events and natural phenomenon from text
elif i.label_ in ["ART", "EVE", "NAT", "PERSON"]:
named_entities.add(entry.title())
print(f"named entities - {named_entities}")
print(f"money entities - {money_entities}")
print(f"location entities - {location_entities}")
print(f"time indicator entities - {time_indicator_entities}")
print(f"organization entities - {organization_entities}")
if __name__ == '__main__':
named_entity_extractor = NamedEntityExtractor()
text = "John bought a Toyota camry 2019 model in Toronto in January 2020 at a cost of $38000"
named_entity_extractor.extract(text)
Share this page on
3
People Like(s) This Page
Permalink
comments powered by Disqus