Parsing A (modified) Ris File With Python
I have a bunch of (modified) RIS files. The toy example looks like the following: Record #1 of 2 ID: CN-01160769 AU: Uedo N AU: Kasiser R TI: Development of an E-learning system SO
Solution 1:
you just need add a judge and break the Record #x of 2
line.
import re
classRIS:
""" RIS file structure """def__init__(self, in_file=None):
""" Initialize and parse input """
self.records = []
if in_file:
self.parse(in_file)
defparse(self, in_file):
""" Parse input file """
self.current_tag = None
self.current_record = None
prog = re.compile("^([A-Z][A-Z0-9]): (.*)")
lines = []
# Eliminate blank linesfor line in in_file:
line = line.strip()
iflen(line) > 0:
lines.append(line)
for line in lines:
if"#"in line:
continue
match = prog.match(line)
if match:
tag = match.groups()[0]
field = match.groups()[1]
self.process_field(tag, field)
else:
raise ValueError(line)
defprocess_field(self, tag, field):
""" Process RIS file field """if tag == "ID":
self.current_record = {tag: field}
elif tag == "YR":
self.records.append(self.current_record)
self.current_record = Noneelif tag in ["AU", "AD"]:
if tag in self.current_record:
self.current_record[tag].append(field)
else:
self.current_record[tag] = [field]
else:
ifnot tag in self.current_record:
self.current_record[tag] = field
else:
error_str = "Duplicate tag: %s" % tag
raise ValueError(error_str)
defmain():
""" Test the code """import pprint
withopen("test.ris", "rt") as ris_file:
ris = RIS(ris_file)
pp = pprint.PrettyPrinter()
pp.pprint(ris.records)
if __name__ == "__main__":
main()
the add code:
if"#"in line:
continue
the output is
[{'AU': ['Uedo N', 'Kasiser R'],
'ID': 'CN-01160769',
'SO': 'United European Gastroenterology Journal',
'TI': 'Development of an E-learning system'},
{'AU': ['Krogh LQ'],
'ID': 'CN-01070265',
'SO': 'Resuscitation',
'TI': 'E-learning in pediatric basic life support'}]
Post a Comment for "Parsing A (modified) Ris File With Python"