Python - Error when trying to store results of datatxt.nex into dataframe

23 views

Skip to first unread message

Francesca Bottazzoli

unread,

Mar 23, 2022, 3:21:29 AM3/23/22

to Dandelion Support Forum

Hello,
I have a Basic Account and I'm using Python APIs ( dandelion-eu 0.3.2) to annotate a text in Italian and then trying to store the results in a dataframe. Basically I'm doing the following:

from dandelion import DataTXT
datatxt = DataTXT(token='mytoken')

mytext = 'Mario Draghi è il presidente del consiglio in Italia'
annotation = datatxt.nex(mytext,lang = 'it')
mylist = annotation['annotations']
mydf = pd.DataFrame.from_dict({"mycol":[mylist]})

and I get the following error. Can anyone help me?

Thank you in advance,

Francesca

--------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~\AppData\Roaming\Python\Python37\site-packages\IPython\core\formatters.py in __call__(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue() ~\AppData\Roaming\Python\Python37\site-packages\IPython\lib\pretty.py in pretty(self, obj) 392 if cls is not object \ 393 and callable(cls.__dict__.get('__repr__')): --> 394 return _repr_pprint(obj, self, cycle) 395 396 return _default_pprint(obj, self, cycle) ~\AppData\Roaming\Python\Python37\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle) 698 """A pprint that just redirects to the normal repr function.""" 699 # Find newlines and replace them with p.break_() --> 700 output = repr(obj) 701 lines = output.splitlines() 702 with p.group(): ~\anaconda3\envs\tesienv\lib\site-packages\pandas\core\frame.py in __repr__(self) 1000 line_width=width, 1001 max_colwidth=max_colwidth, -> 1002 show_dimensions=show_dimensions, 1003 ) 1004 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding) 1132 buf=buf, 1133 encoding=encoding, -> 1134 line_width=line_width, 1135 ) 1136 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in to_string(self, buf, encoding, line_width) 1051 1052 string_formatter = StringFormatter(self.fmt, line_width=line_width) -> 1053 string = string_formatter.to_string() 1054 return save_to_buffer(string, buf=buf, encoding=encoding) 1055 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\string.py in to_string(self) 23 24 def to_string(self) -> str: ---> 25 text = self._get_string_representation() 26 if self.fmt.should_show_dimensions: 27 text = "".join([text, self.fmt.dimensions_info]) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\string.py in _get_string_representation(self) 38 return self._empty_info_line 39 ---> 40 strcols = self._get_strcols() 41 42 if self.line_width is None: ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\string.py in _get_strcols(self) 29 30 def _get_strcols(self) -> list[list[str]]: ---> 31 strcols = self.fmt.get_strcols() 32 if self.fmt.is_truncated: 33 strcols = self._insert_dot_separators(strcols) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in get_strcols(self) 538 Render a DataFrame to a list of columns (as lists of strings). 539 """ --> 540 strcols = self._get_strcols_without_index() 541 542 if self.index: ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in _get_strcols_without_index(self) 802 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader) 803 ) --> 804 fmt_values = self.format_col(i) 805 fmt_values = _make_fixed_width( 806 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in format_col(self, i) 823 space=self.col_space.get(frame.columns[i]), 824 decimal=self.decimal, --> 825 leading_space=self.index, 826 ) 827 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting) 1238 ) 1239 -> 1240 return fmt_obj.get_result() 1241 1242 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in get_result(self) 1269 1270 def get_result(self) -> list[str]: -> 1271 fmt_values = self._format_strings() 1272 return _make_fixed_width(fmt_values, self.justify) 1273 ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in _format_strings(self) 1332 for i, v in enumerate(vals): 1333 if not is_float_type[i] and leading_space: -> 1334 fmt_values.append(f" {_format(v)}") 1335 elif is_float_type[i]: 1336 fmt_values.append(float_format(v)) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\format.py in _format(x) 1312 else: 1313 # object dtype -> 1314 return str(formatter(x)) 1315 1316 vals = extract_array(self.values, extract_numpy=True) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items) 226 escape_chars=escape_chars, 227 quote_strings=quote_strings, --> 228 max_seq_items=max_seq_items, 229 ) 230 elif isinstance(thing, str) and quote_strings: ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds) 120 r = [ 121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) --> 122 for i in range(min(nitems, len(seq))) 123 ] 124 body = ", ".join(r) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in <listcomp>(.0) 120 r = [ 121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) --> 122 for i in range(min(nitems, len(seq))) 123 ] 124 body = ", ".join(r) ~\anaconda3\envs\tesienv\lib\site-packages\pandas\io\formats\printing.py in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items) 212 return result 213 --> 214 if hasattr(thing, "__next__"): 215 return str(thing) 216 elif isinstance(thing, dict) and _nest_lvl < get_option( ~\anaconda3\envs\tesienv\lib\site-packages\dandelion\utils.py in __getattr__(self, name) 5 class AttributeDict(dict): 6 def __getattr__(self, name): ----> 7 return self[name] 8 9 def __setattr__(self, name, value): KeyError: '__next__'

Giacomo Berardi

unread,

Mar 25, 2022, 6:34:19 AM3/25/22

to Dandelion Support Forum, f.bott...@campus.unimib.it

Hi Francesca,

annotations are dictionaries.

It depends on what you want to obtain, but you should probably put in the list the "title" of each annotation.

Please read https://dandelion.eu/docs/api/datatxt/nex/v1/#response

Best

Giacomo Berardi

Dandelion team

Reply all

Reply to author

Forward

0 new messages