Release new version 1.0.5

OmkarPathak · Oct 3, 2019 · 853c210 · 853c210
1 parent 0d32905
commit 853c210
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -20,8 +20,10 @@ Built with ❤︎ and :coffee: by  [Omkar Pathak](https://github.com/OmkarPathak
 - Extract mobile numbers
 - Extract skills
 - Extract total experience
-- Extract education (not very accurate as of now)
-- Extract experience (not very accurate as of now)
+- Extract college name
+- Extract degree
+- Extract designation
+- Extract company names
 
 # Installation
 
@@ -61,13 +63,15 @@ data = ResumeParser('/path/to/resume/file').get_extracted_data()
 For running the resume extractor you can also use the `cli` provided
 
 ```bash
-usage: pyresparser [-h] [-f FILE] [-d DIRECTORY]
+usage: pyresparser [-h] [-f FILE] [-d DIRECTORY] [-r REMOTEFILE]
+                   [-sf SKILLSFILE]
 
 optional arguments:
-  -h, --help                              show this help message and exit
-  -f FILE, --file FILE                    resume file to be extracted
-  -d DIRECTORY, --directory DIRECTORY     directory containing all the resumes to be extracted
-  -r REMOTEFILE, --remotefile REMOTEFILE  remote path for resume file to be extracted
+  -h, --help                                show this help message and exit
+  -f FILE, --file FILE                      resume file to be extracted
+  -d DIRECTORY, --directory DIRECTORY       directory containing all the resumes to be extracted
+  -r REMOTEFILE, --remotefile REMOTEFILE    remote path for resume file to be extracted
+  -sf SKILLSFILE, --skillsfile SKILLSFILE   custom skills CSV file against which skills are searched for
 ```
 
 For extracting data from a single resume file, use
@@ -88,6 +92,12 @@ For extracting data from remote resumes, execute
 pyresparser -r <path_to_remote_resume_file>
 ```
 
+For extracting data against your specified skills, create a CSV file with no headers. Sample file can be found [here](pyresparser/skills.csv)
+
+```bash
+pyresparser -sf <path_to_custom_skills_file>
+```
+
 # Notes:
 
 - If you are running the app on windows, then you can only extract .docs and .pdf files
@@ -136,6 +146,8 @@ The module would return a list of dictionary objects with result as follows:
 
 - [https://medium.com/@divalicious.priya/information-extraction-from-cv-acec216c3f48](https://medium.com/@divalicious.priya/information-extraction-from-cv-acec216c3f48)
 
+- **Special thanks** to dataturks for their [annotated dataset](https://dataturks.com/blog/named-entity-recognition-in-resumes.php)
+
 # Donation
 
 If you have found my softwares to be of any use to you, do consider helping me pay my internet bills. This would encourage me to create many such softwares :smile:

diff --git a/pyresparser/command_line.py b/pyresparser/command_line.py
@@ -6,6 +6,7 @@
 import io
 import csv
 import multiprocessing as mp
+import urllib
 from urllib.request import Request, urlopen
 from pyresparser import ResumeParser
 from itertools import product
@@ -71,13 +72,16 @@ def __extract_from_directory(self, directory, skills_file=None):
             return 'Directory not found. Please provide a valid directory.'
 
     def __extract_from_remote_file(self, remote_file):
-        print_cyan('Extracting data from: {}'.format(remote_file))
-        req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'})
-        webpage = urlopen(req).read()
-        _file = io.BytesIO(webpage)
-        _file.name = remote_file.split('/')[-1]
-        resume_parser = ResumeParser(_file)
-        return [resume_parser.get_extracted_data()]
+        try:
+            print_cyan('Extracting data from: {}'.format(remote_file))
+            req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'})
+            webpage = urlopen(req).read()
+            _file = io.BytesIO(webpage)
+            _file.name = remote_file.split('/')[-1]
+            resume_parser = ResumeParser(_file)
+            return [resume_parser.get_extracted_data()]
+        except urllib.error.HTTPError:
+            return 'File not found. Please provide correct URL for resume file.'
 
 def resume_result_wrapper(args):
     if len(args) == 2:

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='pyresparser',
-    version='1.0.4',
+    version='1.0.5',
     description='A simple resume parser used for extracting information from resumes',
     long_description=open('README.rst').read(),
     url='https://github.com/OmkarPathak/pyresparser',