@inproceedings{ avraham2022bringing, title={Bringing Image Scene Structure to Video via Frame-Clip Consistency of Object Tokens}, author={Elad Ben Avraham and Roei Herzig and Karttikeya Mangalam and Amir Bar and Anna Rohrbach and Leonid Karlinsky and Trevor Darrell and Amir Globerson}, booktitle={Thirty-Sixth Conference on Neural Information Processing Systems}, year={2022}, url={https://openreview.net/forum?id=0JV4VVBsK6a} }