Python - Error when trying to store results of datatxt.nex into dataframe

23 views
Skip to first unread message

Francesca Bottazzoli

unread,
Mar 23, 2022, 3:21:29 AM3/23/22
to Dandelion Support Forum
Hello, 
I have a Basic Account and I'm using Python APIs ( dandelion-eu 0.3.2)  to annotate a text in Italian and then trying to store the results in a dataframe. Basically I'm doing the following:

from dandelion import DataTXT
datatxt = DataTXT(token='mytoken')
mytext = 'Mario Draghi è il presidente del consiglio in Italia'
annotation = datatxt.nex(mytext,lang = 'it')
mylist =  annotation['annotations']
mydf = pd.DataFrame.from_dict({"mycol":[mylist]})

and I get the following error. Can anyone help me?
Thank you in advance,
Francesca 

--------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~\AppData\Roaming\Python\Python37\site-packages\IPython\core\formatters.py in __call__(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue() ~\AppData\Roaming\Python\Python37\site-packages\IPython\lib\pretty.py in pretty(self, obj) 392 if cls is not object \ 393 and callable(cls.__dict__.get('__repr__')): --> 394 return _repr_pprint(obj, self, cycle) 395 396 return _default_pprint(obj, self, cycle) ~\AppData\Roaming\Python\Python37\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle) 698 """A pprint that just redirects to the normal repr function.""" 699 # Find newlines and replace them with p.break_() --> 700 output = repr(obj) 701 lines = output.splitlines() 702 with p.group(): ~\anaconda3\envs\tesienv\lib\site-packages\pandas\core\frame.py in __repr__(self) 1000 line_width=width, 1001 max_colwidth=max_colwidth, -> 1002 show_dimensions=show_dimensions, 1003 ) 1004 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding) 1132 buf=buf, 1133 encoding=encoding, -> 1134 line_width=line_width, 1135 ) 1136 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in to_string(self, buf, encoding, line_width) 1051 1052 string_formatter = StringFormatter(self.fmt, line_width=line_width) -> 1053 string = string_formatter.to_string() 1054 return save_to_buffer(string, buf=buf, encoding=encoding) 1055 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\string.py in to_string(self) 23 24 def to_string(self) -> str: ---> 25 text = self._get_string_representation() 26 if self.fmt.should_show_dimensions: 27 text = "".join([text, self.fmt.dimensions_info]) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\string.py in _get_string_representation(self) 38 return self._empty_info_line 39 ---> 40 strcols = self._get_strcols() 41 42 if self.line_width is None: ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\string.py in _get_strcols(self) 29 30 def _get_strcols(self) -> list[list[str]]: ---> 31 strcols = self.fmt.get_strcols() 32 if self.fmt.is_truncated: 33 strcols = self._insert_dot_separators(strcols) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in get_strcols(self) 538 Render a DataFrame to a list of columns (as lists of strings). 539 """ --> 540 strcols = self._get_strcols_without_index() 541 542 if self.index: ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in _get_strcols_without_index(self) 802 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader) 803 ) --> 804 fmt_values = self.format_col(i) 805 fmt_values = _make_fixed_width( 806 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in format_col(self, i) 823 space=self.col_space.get(frame.columns[i]), 824 decimal=self.decimal, --> 825 leading_space=self.index, 826 ) 827 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting) 1238 ) 1239 -> 1240 return fmt_obj.get_result() 1241 1242 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in get_result(self) 1269 1270 def get_result(self) -> list[str]: -> 1271 fmt_values = self._format_strings() 1272 return _make_fixed_width(fmt_values, self.justify) 1273 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in _format_strings(self) 1332 for i, v in enumerate(vals): 1333 if not is_float_type[i] and leading_space: -> 1334 fmt_values.append(f" {_format(v)}") 1335 elif is_float_type[i]: 1336 fmt_values.append(float_format(v)) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in _format(x) 1312 else: 1313 # object dtype -> 1314 return str(formatter(x)) 1315 1316 vals = extract_array(self.values, extract_numpy=True) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items) 226 escape_chars=escape_chars, 227 quote_strings=quote_strings, --> 228 max_seq_items=max_seq_items, 229 ) 230 elif isinstance(thing, str) and quote_strings: ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds) 120 r = [ 121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) --> 122 for i in range(min(nitems, len(seq))) 123 ] 124 body = ", ".join(r) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in <listcomp>(.0) 120 r = [ 121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) --> 122 for i in range(min(nitems, len(seq))) 123 ] 124 body = ", ".join(r) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items) 212 return result 213 --> 214 if hasattr(thing, "__next__"): 215 return str(thing) 216 elif isinstance(thing, dict) and _nest_lvl < get_option( ~\anaconda3\envs\tesienv\lib\site-packages\dandelion\utils.py in __getattr__(self, name) 5 class AttributeDict(dict): 6 def __getattr__(self, name): ----> 7 return self[name] 8 9 def __setattr__(self, name, value): KeyError: '__next__'

Giacomo Berardi

unread,
Mar 25, 2022, 6:34:19 AM3/25/22
to Dandelion Support Forum, f.bott...@campus.unimib.it
Hi Francesca,
annotations are dictionaries. 
It depends on what you want to obtain, but you should probably put in the list the "title" of each annotation.

Best

Giacomo Berardi
Dandelion team

Reply all
Reply to author
Forward
0 new messages