diff --git a/README.md b/README.md index 19a1734..16c4de0 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,10 @@ Built with ❤︎ and :coffee: by [Omkar Pathak](https://github.com/OmkarPathak - Extract mobile numbers - Extract skills - Extract total experience -- Extract education (not very accurate as of now) -- Extract experience (not very accurate as of now) +- Extract college name +- Extract degree +- Extract designation +- Extract company names # Installation @@ -61,13 +63,15 @@ data = ResumeParser('/path/to/resume/file').get_extracted_data() For running the resume extractor you can also use the `cli` provided ```bash -usage: pyresparser [-h] [-f FILE] [-d DIRECTORY] +usage: pyresparser [-h] [-f FILE] [-d DIRECTORY] [-r REMOTEFILE] + [-sf SKILLSFILE] optional arguments: - -h, --help show this help message and exit - -f FILE, --file FILE resume file to be extracted - -d DIRECTORY, --directory DIRECTORY directory containing all the resumes to be extracted - -r REMOTEFILE, --remotefile REMOTEFILE remote path for resume file to be extracted + -h, --help show this help message and exit + -f FILE, --file FILE resume file to be extracted + -d DIRECTORY, --directory DIRECTORY directory containing all the resumes to be extracted + -r REMOTEFILE, --remotefile REMOTEFILE remote path for resume file to be extracted + -sf SKILLSFILE, --skillsfile SKILLSFILE custom skills CSV file against which skills are searched for ``` For extracting data from a single resume file, use @@ -88,6 +92,12 @@ For extracting data from remote resumes, execute pyresparser -r ``` +For extracting data against your specified skills, create a CSV file with no headers. Sample file can be found [here](pyresparser/skills.csv) + +```bash +pyresparser -sf +``` + # Notes: - If you are running the app on windows, then you can only extract .docs and .pdf files @@ -136,6 +146,8 @@ The module would return a list of dictionary objects with result as follows: - [https://medium.com/@divalicious.priya/information-extraction-from-cv-acec216c3f48](https://medium.com/@divalicious.priya/information-extraction-from-cv-acec216c3f48) +- **Special thanks** to dataturks for their [annotated dataset](https://dataturks.com/blog/named-entity-recognition-in-resumes.php) + # Donation If you have found my softwares to be of any use to you, do consider helping me pay my internet bills. This would encourage me to create many such softwares :smile: diff --git a/pyresparser/command_line.py b/pyresparser/command_line.py index e048203..e399aea 100755 --- a/pyresparser/command_line.py +++ b/pyresparser/command_line.py @@ -6,6 +6,7 @@ import io import csv import multiprocessing as mp +import urllib from urllib.request import Request, urlopen from pyresparser import ResumeParser from itertools import product @@ -71,13 +72,16 @@ def __extract_from_directory(self, directory, skills_file=None): return 'Directory not found. Please provide a valid directory.' def __extract_from_remote_file(self, remote_file): - print_cyan('Extracting data from: {}'.format(remote_file)) - req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'}) - webpage = urlopen(req).read() - _file = io.BytesIO(webpage) - _file.name = remote_file.split('/')[-1] - resume_parser = ResumeParser(_file) - return [resume_parser.get_extracted_data()] + try: + print_cyan('Extracting data from: {}'.format(remote_file)) + req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'}) + webpage = urlopen(req).read() + _file = io.BytesIO(webpage) + _file.name = remote_file.split('/')[-1] + resume_parser = ResumeParser(_file) + return [resume_parser.get_extracted_data()] + except urllib.error.HTTPError: + return 'File not found. Please provide correct URL for resume file.' def resume_result_wrapper(args): if len(args) == 2: diff --git a/setup.py b/setup.py index 0e2220c..e4b0422 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='pyresparser', - version='1.0.4', + version='1.0.5', description='A simple resume parser used for extracting information from resumes', long_description=open('README.rst').read(), url='https://github.com/OmkarPathak/pyresparser',