@conference {1506361, title = {Asynchronous Algorithms for Approximate Distributed Constraint Optimization with Quality Bounds }, booktitle = {International Conference on Autonomous Agents and Multiagent Systems (AAMAS)}, year = {2010}, abstract = {Distributed Constraint Optimization (DCOP) is a popular framework for cooperative multi-agent decision making. DCOP is NPhard, so an important line of work focuses on developing fast incomplete solution algorithms for large-scale applications. One of the few incomplete algorithms to provide bounds on solution quality is k-size optimality, which defines a local optimality criterion based on the size of the group of deviating agents. Unfortunately, the lack of a general-purpose algorithm and the commitment to forming groups based solely on group size has limited the use of k-size optimality. This paper introduces t-distance optimality which departs from k-size optimality by using graph distance as an alternative criteria for selecting groups of deviating agents. This throws open a new research direction into the tradeoffs between different group selection and coordination mechanisms for incomplete DCOP algorithms. We derive theoretical quality bounds for t-distance optimality that improve known bounds for k-size optimality. In addition, we develop a new efficient asynchronous local search algorithm for finding both k-size and t-distance optimal solutions {\textemdash} allowing these concepts to be deployed in real applications. Indeed, empirical results show that this algorithm significantly outperforms the only existing algorithm for finding general k-size optimal solutions, which is also synchronous. Finally, we compare the algorithmic performance of k-size and t-distance optimality using this algorithm. We find that t-distance consistently converges to higher-quality solutions in the long run, but results are mixed on convergence speed; we identify cases where k-size and t-distance converge faster.}, author = {Kiekintveld, Christopher and Yin, Zhengyu and Kumar, Atul and Tambe, Milind} } @article {1506495, title = {Balancing Local Resources and Global Goals in Multiply-Constrained DCOP }, journal = {Journal of Multiagent and Grid Systems (MAGS)}, volume = {6}, number = {4}, year = {2010}, pages = {353-393}, abstract = {Distributed constraint optimization (DCOP) is a useful framework for cooperative multiagent coordination. DCOP focuses on optimizing a single team objective. However, in many domains, agents must satisfy constraints on resources consumed locally while optimizing the team goal. Yet, these resource constraints may need to be kept private. Designing DCOP algorithms for these domains requires managing complex trade-offs in completeness, scalability, privacy and efficiency. This article defines the multiply-constrained DCOP (MC-DCOP) framework and provides complete (globally optimal) and incomplete (locally optimal) algorithms for solving MC-DCOP problems. Complete algorithms find the best allocation of scarce resources while optimizing the team objective, while incomplete algorithms are more scalable. The algorithms use four main techniques: (i) transforming constraints to maintain privacy; (ii) dynamically setting upper bounds on resource consumption; (iii) identifying the extent to which the local graph structure allows agents to compute exact bounds; and (iv) using a virtual assignment to flag problems rendered unsatisfiable by resource constraints. Proofs of correctness are presented for all algorithms. Experimental results illustrate the strengths and weaknesses of both the complete and incomplete algorithms.}, author = {Bowring, Emma and Tambe, Milind and Makoto Yokoo} } @article {1506358, title = {A Framework for Evaluating Deployed Security Systems: Is There a Chink in your ARMOR? }, journal = {Informatica}, number = {34}, year = {2010}, pages = {129-139}, abstract = {A growing number of security applications are being developed and deployed to explicitly reduce risk from adversaries{\textquoteright} actions. However, there are many challenges when attempting to evaluate such systems, both in the lab and in the real world. Traditional evaluations used by computer scientists, such as runtime analysis and optimality proofs, may be largely irrelevant. The primary contribution of this paper is to provide a preliminary framework which can guide the evaluation of such systems and to apply the framework to the evaluation of ARMOR (a system deployed at LAX since August 2007). This framework helps to determine what evaluations could, and should, be run in order to measure a system{\textquoteright}s overall utility. A secondary contribution of this paper is to help familiarize our community with some of the difficulties inherent in evaluating deployed applications, focusing on those in security domains.}, author = {Matthew E. Taylor and Kiekintveld, Christopher and Craig Western and Tambe, Milind} } @conference {1506498, title = {Game-Theoretic Allocation of Security Forces in a City }, booktitle = {AAMAS 2010 workshop on Optimization in Multiagent Systems}, year = {2010}, abstract = {Law enforcement agencies frequently must allocate limited resources to protect targets embedded in a network, such as important buildings in a city road network. Since intelligent attackers may observe and exploit patterns in the allocation, it is crucial that the allocations be randomized. We cast this problem as an attacker-defender Stackelberg game: the defender{\textquoteright}s goal is to obtain an optimal mixed strategy for allocating resources. The defender{\textquoteright}s strategy space is exponential in the number of resources, and the attacker{\textquoteright}s exponential in the network size. Existing algorithms are therefore useless for all but the smallest networks. We present a solution approach based on two key ideas: (i) a polynomial-sized game model obtained via an approximation of the strategy space, solved efficiently using a linear program; (ii) two efficient techniques that map solutions from the approximate game to the original, with proofs of correctness under certain assumptions. We present in-depth experimental results, including an evaluation on part of the Mumbai road network.}, author = {Tsai, Jason and Yin, Zhengyu and Kwak, Jun-young and David Kempe and Kiekintveld, Christopher and Tambe, Milind} } @conference {1506364, title = {How to Protect a City: Strategic Security Placement in Graph-Based Domains }, booktitle = {Extended Abstract for International Conference on Autonomous Agents and Multiagent Systems (AAMAS)}, year = {2010}, abstract = {Protecting targets against potential attacks is an important problem for security forces worldwide. The general setting we study is as follows: An attacker assigns different values to reaching (and damaging or destroying) one of multiple targets. A defender is able to allocate resources (such as patrol cars or canine units) to capture the attacker before he reaches a target. In many of these situations, the domain has structure that is naturally modeled as a graph. For example, city maps can be modeled with intersections as nodes and roads as edges, where nodes are targets for attackers. In order to prevent attacks, security forces can schedule checkpoints on edges (e.g., roads) to detect intruders. For instance, in response to the devastating terrorist attacks in 2008 [1], Mumbai police deploy randomized checkpoints as one countermeasure to prevent future attacks [2]. The strategy for placing these checkpoints must necessarily be decided in advance of attack attempts, should account for targets of differing importance, and should anticipate an intelligent adversary who can observe the strategy prior to attacking. In light of these requirements, game-theoretic approaches have been developed to assist in generating randomized security strategies in several real-world domains, including applications in use by the Los Angeles International Airport [12] and the Federal Air Marshals Service [13]. To account for the attacker{\textquoteright}s ability to observe deployment patterns, these methods model the problem as a Stackelberg game and solve for an optimal probability distribution over the possible deployments to ensure unpredictability. Novel solvers for classes of security games have recently been developed [3, 11, 4]. However, these solvers take time at least polynomial in the number of actions of both players. In our setting, every path from an entry point to a target is an attacker action, and every set of r or fewer edges is a defender action. (r is the maximum number of checkpoints.) Since the attacker{\textquoteright}s actions grow exponentially with the size of the network, and the defender{\textquoteright}s actions grow exponentially with r, existing methods quickly become too slow when applied to large real-world domains. Therefore, our goal is to develop faster methods for these settings and evaluate them theoretically and empirically.}, author = {Tsai, Jason and Yin, Zhengyu and Kwak, Jun-young and David Kempe and Kiekintveld, Christopher and Tambe, Milind} } @article {1506496, title = {Introducing Communication in Dis-POMDPs with Locality of Interaction }, journal = {Journal of Web Intelligence and Agent Systems (WIAS)}, volume = {8}, number = {3}, year = {2010}, pages = {303-311}, abstract = {The Networked Distributed POMDPs (ND-POMDPs) can model multiagent systems in uncertain domains and has begun to scale-up the number of agents. However, prior work in ND-POMDPs has failed to address communication. Without communication, the size of a local policy at each agent within the ND-POMDPs grows exponentially in the time horizon. To overcome this problem, we extend existing algorithms so that agents periodically communicate their observation and action histories with each other. After communication, agents can start from new synchronized belief state. Thus, we can avoid the exponential growth in the size of local policies at agents. Furthermore, we introduce an idea that is similar to the Point-based Value Iteration algorithm to approximate the value function with a fixed number of representative points. Our experimental results show that we can obtain much longer policies than existing algorithms as long as the interval between communications is small.}, author = {Makoto Tasaki and Yuichi Yabu and Yuki Iwanari and Makoto Yokoo and Janusz Marecki and Varakantham, Pradeep and Milind Tambed} } @conference {1506494, title = {Introducing Multiagent Systems to Undergraduates Through Games and Chocolate }, booktitle = {Book Chapter in {\textquoteright}Multi-Agent Systems for Education and Interactive Entertainment:Design, Use and Experience{\textquoteright}}, year = {2010}, abstract = {The field of --intelligent agents and multiagent systems{\textbardbl} is maturing; no longer is it a special topic to be introduced to graduate students after years of training in computer science and many introductory courses in artificial intelligence. Instead, the time is ripe to introduce agents and multiagents directly to undergraduate students, whether majoring in computer science or not. This chapter focuses on exactly this challenge, drawing on the co-authors{\textquoteleft} experience of teaching several such undergraduate courses on agents and multiagents, over the last three years at two different universities. The chapter outlines three key issues that must be addressed. The first issue is facilitating students{\textquoteleft} intuitive understanding of fundamental concepts of multiagent systems; we illustrate uses of science fiction materials and classroom games to not only provide students with the necessary intuitive understanding but with the excitement and motivation for studying multiagent systems. The second is in selecting the right material {\textemdash} either science-fiction material or games {\textemdash} for providing students the necessary motivation and intuition; we outline several criteria that have been useful in selecting such material. The third issue is in educating students about the fundamental philosophical, ethical and social issues surrounding agents and multiagent systems: we outline course materials and classroom activities that allow students to obtain this --big picture{\textbardbl} futuristic vision of our science. We conclude with feedback received, lessons learned and impact on both the computer science students and non computer-science students.}, author = {Bowring, Emma and Tambe, Milind} } @conference {1506500, title = {Methods and Algorithms for Infinite Bayesian Stackelberg Security Games (Extended Abstract) }, booktitle = {Conference on Decision and Game Theory for Security}, year = {2010}, abstract = {Recently there has been significant interest in applications of gametheoretic analysis to analyze security resource allocation decisions. Two examples of deployed systems based on this line of research are the ARMOR system in use at the Los Angeles International Airport [20], and the IRIS system used by the Federal Air Marshals Service [25]. Game analysis always begins by developing a model of the domain, often based on inputs from domain experts or historical data. These models inevitably contain significant uncertainty{\textemdash}especially in security domains where intelligence about adversary capabilities and preferences is very difficult to gather. In this work we focus on developing new models and algorithms that capture this uncertainty using continuous payoff distributions. These models are richer and more powerful than previous approaches that are limited to small finite Bayesian game models. We present the first algorithms for approximating equilibrium solutions in these games, and study these algorithms empirically. Our results show dramatic improvements over existing techniques, even in cases where there is very limited uncertainty about an adversaries{\textquoteright} payoffs.}, author = {Kiekintveld, Christopher and Janusz Marecki and Tambe, Milind} } @conference {1506502, title = {Optimal defender allocation for massive security games: A branch and price approach }, booktitle = {AAMAS 2010 Workshop on Optimisation in Multi-Agent Systems (OptMas)}, year = {2010}, abstract = {Algorithms to solve security games, an important class of Stackelberg games, have seen successful real-world deployment by LAX police and the Federal Air Marshal Service. These algorithms provide randomized schedules to optimally allocate limited security resources for infrastructure protection. Unfortunately, these stateof-the-art algorithms fail to scale-up or to provide a correct solution for massive security games with arbitrary scheduling constraints. This paper provides ASPEN, a branch-and-price algorithm to overcome this limitation based on two key contributions: (i) A column-generation approach that exploits an innovative compact network flow representation, avoiding a combinatorial explosion of schedule allocations; (ii) A branch-and-bound approach with novel upper-bound generation via a fast algorithm for solving under-constrained security games. ASPEN is the first known method for efficiently solving real-world-sized security games with arbitrary schedules. This work contributes to a very new area of work that applies techniques used in large-scale optimization to game-theoretic problems{\textemdash}an exciting new avenue with the potential to greatly expand the reach of game theory.}, author = {Jain, Manish and Erim Kardes and Kiekintveld, Christopher and Ordonez, Fernando and Tambe, Milind} } @conference {1506497, title = {Randomizing Security Activities with Attacker Circumvention Strategies }, booktitle = {AAMAS 2010 Workshop on Optimisation in Multi-Agent Systems (OptMas)}, year = {2010}, abstract = {Game theoretic methods for making resource allocation decision in security domains have attracted growing attention from both researchers and security practitioners, including deployed applications at both the LAX airport and the Federal Air Marshals Service. We develop a new class of security games designed to model decisions faced by the Transportation Security Administration and other agencies in protecting airports, ports, and other critical infrastructure. Our model allows for a more diverse set of security activities for the defensive resources than previous work, which has generally focused on interchangeable resources that can only defend against possible attacks in one way. Here, we are concerned in particular with the possibility that adversaries can circumvent specific security activities if they are aware of common security measures. The model we propose takes this capability into account and generates more unpredictable, diverse security policies as a result{\textemdash}without resorting to an external value for entropy or randomness. Solving these games is a significant computational challenge, and existing algorithms are not capable of solving realistic games. We introduce a new method that exploits common structure in these problems to reduce the size of the game representation and enable faster solution algorithm. These algorithms are able to scale to make larger games than existing solvers, as we show in our experimental results.}, author = {Pita, James and Kiekintveld, Christopher and Michael Scott and Tambe, Milind} } @conference {1506362, title = {Robust Bayesian Methods for Stackelberg Security Games (Extended Abstract) }, booktitle = {International Conference on Autonomous Agents and Multiagent Systems (AAMAS) [Short Paper]}, year = {2010}, abstract = {Recent work has applied game-theoretic models to real-world security problems at the Los Angeles International Airport (LAX) and Federal Air Marshals Service (FAMS). The analysis of these domains is based on input from domain experts intended to capture the best available intelligence information about potential terrorist activities and possible security countermeasures. Nevertheless, these models are subject to significant uncertainty{\textemdash}especially in security domains where intelligence about adversary capabilities and preferences is very difficult to gather. This uncertainty presents significant challenges for applying game-theoretic analysis in these domains. Our experimental results show that standard solution methods based on perfect information assumptions are very sensitive to payoff uncertainty, resulting in low payoffs for the defender. We describe a model of Bayesian Stackelberg games that allows for general distributional uncertainty over the attacker{\textquoteright}s payoffs. We conduct an experimental analysis of two algorithms for approximating equilibria of these games, and show that the resulting solutions give much better results than the standard approach when there is payoff uncertainty.}, author = {Kiekintveld, Christopher and Tambe, Milind and Janusz Marecki} } @article {1506499, title = {Robust Solutions to Stackelberg Games: Addressing Bounded Rationality and Limited Observations in Human Cognition }, journal = {Artificial Intelligence Journal}, volume = {174}, number = {15}, year = {2010}, pages = {1142-1171}, abstract = {How do we build algorithms for agent interactions with human adversaries? Stackelberg games are natural models for many important applications that involve human interaction, such as oligopolistic markets and security domains. In Stackelberg games, one player, the leader, commits to a strategy and the follower makes her decision with knowledge of the leader{\textquoteright}s commitment. Existing algorithms for Stackelberg games efficiently find optimal solutions (leader strategy), but they critically assume that the follower plays optimally. Unfortunately, in many applications, agents face human followers (adversaries) who {\textemdash} because of their bounded rationality and limited observation of the leader strategy {\textemdash} may deviate from their expected optimal response. In other words, human adversaries{\textquoteright} decisions are biased due to their bounded rationality and limited observations. Not taking into account these likely deviations when dealing with human adversaries may cause an unacceptable degradation in the leader{\textquoteright}s reward, particularly in security applications where these algorithms have seen deployment. The objective of this paper therefore is to investigate how to build algorithms for agent interactions with human adversaries. To address this crucial problem, this paper introduces a new mixed-integer linear program (MILP) for Stackelberg games to consider human adversaries, incorporating: (i) novel anchoring theories on human perception of probability distributions and (ii) robustness approaches for MILPs to address human imprecision. Since this new approach considers human adversaries, traditional proofs of correctness or optimality are insufficient; instead, it is necessary to rely on empirical validation. To that end, this paper considers four settings based on real deployed security systems at Los Angeles International Airport [43], and compares 6 different approaches (three based on our new approach and three previous approaches), in 4 different observability conditions, involving 218 human subjects playing 2960 games in total. The final conclusion is that a model which incorporates both the ideas of robustness and anchoring achieves statistically significant higher rewards and also maintains equivalent or faster solution speeds compared to existing approaches.}, author = {Pita, James and Jain, Manish and Ordonez, Fernando and Tambe, Milind and Kraus, Sarit} } @conference {1506367, title = {Security Games with Arbitrary Schedules: A Branch and Price Approach }, booktitle = {National Conference on Artificial Intelligence (AAAI)}, year = {2010}, abstract = {Security games, and important class of Stackelberg games, are used in deployed decision-support tools in use by LAX police and the Federal Air Marshals Service. The algorithms used to solve these games find optimal randomized schedules to allocate security resources for infrastructure protection. Unfortunately, the state of the art algorithms either fail to scale or to provide a correct solution for large problems with arbitrary scheduling constraints. We introduce ASPEN, a branch-and-price approach that overcomes these limitations based on two key contributions: (i) A columngeneration approach that exploits a novel network flow representation, avoiding a combinatorial explosion of schedule allocations; (ii) A branch-and-bound algorithm that generates bounds via a fast algorithm for solving security games with relaxed scheduling constraints. ASPEN is the first known method for efficiently solving massive security games with arbitrary schedules.}, author = {Jain, Manish and Erim Kardes and Kiekintveld, Christopher and Tambe, Milind and Ordonez, Fernando} } @article {1506359, title = {Software Assistants for patrol planning at LAX and Federal Air Marshals Service }, journal = {Interfaces}, volume = {40}, number = {4}, year = {2010}, pages = {267-290}, abstract = {Security at major locations of economic or political importance is a key concern around the world, particularly given the increasing threat of terrorism. Limited security resources prevent full security coverage at all times, which allows adversaries to observe and exploit patterns in patrolling or monitoring, e.g. they can plan an attack that avoids existing patrols. An important method of countering the surveillance capabilities of an adversary is to use randomized security policies that are more difficult to predict and exploit. We describe two deployed applications that assist security forces in randomizing their operations based on fast algorithms for solving large instances of Bayesian Stackelberg games. The first is the ARMOR system (Assistant for Randomized Monitoring over Routes), which has been successfully deployed since August 2007 at the Los Angeles International Airport (LAX). This system is used by airport police to randomize the placement of checkpoints on roads entering the airport, and the routes of canine unit patrols in the airport terminals. The IRIS system (Intelligent Randomization in Scheduling) is designed to randomize flight schedules for the Federal Air Marshals Service (FAMS). IRIS has been deployed in a pilot program by FAMS since October 2009 to randomize schedules of air marshals on international flights. These assistants share several key features: (i) they are based on Stackelberg game models to intelligently weight the randomized schedules, (ii) they use efficient mixed-integer programming formulations of the game models to enable fast solutions for large games, and (iii) they allow for interactive manipulation of the domain constraints and parameters by the users. This paper examines the design choices, information, and evaluation that went into building these effective applications.}, author = {Jain, Manish and Pita, James and Tsai, Jason and Kiekintveld, Christopher and Shyamsunder Rathi and Ordonez, Fernando and Tambe, Milind} } @conference {1506363, title = {Stackelberg vs. Nash in Security Games: Interchangeability, Equivalence, and Uniqueness }, booktitle = {International Conference on Autonomous Agents and Multiagent Systems (AAMAS)}, year = {2010}, abstract = {There has been significant recent interest in game theoretic approaches to security, with much of the recent research focused on utilizing the leader-follower Stackelberg game model; for example, these games are at the heart of major applications such as the ARMOR program deployed for security at the LAX airport since 2007 and the IRIS program in use by the US Federal Air Marshals (FAMS). The foundational assumption for using Stackelberg games is that security forces (leaders), acting first, commit to a randomized strategy; while their adversaries (followers) choose their best response after surveillance of this randomized strategy. Yet, in many situations, the followers may act without observation of the leader{\textquoteright}s strategy, essentially converting the game into a simultaneous-move game model. Previous work fails to address how a leader should compute her strategy given this fundamental uncertainty about the type of game faced. Focusing on the complex games that are directly inspired by realworld security applications, the paper provides four contributions in the context of a general class of security games. First, exploiting the structure of these security games, the paper shows that the Nash equilibria in security games are interchangeable, thus alleviating the equilibrium selection problem. Second, resolving the leader{\textquoteright}s dilemma, it shows that under a natural restriction on security games, any Stackelberg strategy is also a Nash equilibrium strategy; and furthermore, the solution is unique in a class of realworld security games of which ARMOR is a key exemplar. Third, when faced with a follower that can attack multiple targets, many of these properties no longer hold. Fourth, our experimental results emphasize positive properties of games that do not fit our restrictions. Our contributions have major implications for the real-world applications.}, author = {Yin, Zhengyu and Dmytro Korzhyk and Kiekintveld, Christopher and Vincent Conitzer and Tambe, Milind} } @conference {1506501, title = {Teamwork and Coordination under Model Uncertainty in DEC-POMDPs }, booktitle = {AAAI 2010 Workshop on Interactive Decision Theory and Game Theory (IDTGT) }, year = {2010}, abstract = {Distributed Partially Observable Markov Decision Processes (DEC-POMDPs) are a popular planning framework for multiagent teamwork to compute (near-)optimal plans. However, these methods assume a complete and correct world model, which is often violated in real-world domains. We provide a new algorithm for DEC-POMDPs that is more robust to model uncertainty, with a focus on domains with sparse agent interactions. Our STC algorithm relies on the following key ideas: (1) reduce planning-time computation by shifting some of the burden to execution-time reasoning, (2) exploit sparse interactions between agents, and (3) maintain an approximate model of agents{\textquoteright} beliefs. We empirically show that STC is often substantially faster to existing DEC-POMDP methods without sacrificing reward performance.}, author = {Kwak, Jun-young and Yang, Rong and Yin, Zhengyu and Matthew E. Taylor and Tambe, Milind} } @conference {1506365, title = {Towards a Theoretic Understanding of DCEE }, booktitle = {DCR 10}, year = {2010}, abstract = {Common wisdom says that the greater the level of teamwork, the higher the performance of the team. In teams of cooperative autonomous agents, working together rather than independently can increase the team reward. However, recent results show that in uncertain environments, increasing the level of teamwork can actually decrease overall performance. Coined the team uncertainty penalty, this phenomenon has been shown empirically in simulation, but the underlying mathematics are not yet understood. By understanding the mathematics, we could develop algorithms that reduce or eliminate this penalty of increased teamwork. In this paper we investigate the team uncertainty penalty on two fronts. First, we provide results of robots exhibiting the same behavior seen in simulations. Second, we present a mathematical foundation by which to analyze the phenomenon. Using this model, we present findings indicating that the team uncertainty penalty is inherent to the level of teamwork allowed, rather than to specific algorithms.}, author = {Scott Alfeld and Matthew E. Taylor and Tandon, Prateek and Tambe, Milind} } @conference {1506366, title = {Urban Security: Game-Theoretic Resource Allocation in Networked Physical Domains }, booktitle = {National Conference on Artificial Intelligence (AAAI)}, year = {2010}, abstract = {Law enforcement agencies frequently must allocate limited resources to protect targets embedded in a network, such as important buildings in a city road network. Since intelligent attackers may observe and exploit patterns in the allocation, it is crucial that the allocations be randomized. We cast this problem as an attacker-defender Stackelberg game: the defender{\textquoteright}s goal is to obtain an optimal mixed strategy for allocating resources. The defender{\textquoteright}s strategy space is exponential in the number of resources, and the attacker{\textquoteright}s exponential in the network size. Existing algorithms are therefore useless for all but the smallest networks. We present a solution approach based on two key ideas: (i) A polynomial-sized game model obtained via an approximation of the strategy space, solved efficiently using a linear program; (ii) Two efficient techniques that map solutions from the approximate game to the original, with proofs of correctness under certain assumptions. We present in-depth experimental results, including an evaluation on part of the Mumbai road network.}, author = {Tsai, Jason and Yin, Zhengyu and Kwak, Jun-young and David Kempe and Kiekintveld, Christopher and Tambe, Milind} } @conference {1506360, title = {When Should There be a "Me" in "Team"? Distributed Multi-Agent Optimization Under Uncertainty }, booktitle = {International Conference on Autonomous Agents and Multiagent Systems (AAMAS)}, year = {2010}, abstract = {Increasing teamwork between agents typically increases the performance of a multi-agent system, at the cost of increased communication and higher computational complexity. This work examines joint actions in the context of a multi-agent optimization problem where agents must cooperate to balance exploration and exploitation. Surprisingly, results show that increased teamwork can hurt agent performance, even when communication and computation costs are ignored, which we term the team uncertainty penalty. This paper introduces the above phenomena, analyzes it, and presents algorithms to reduce the effect of the penalty in our problem setting.}, author = {Matthew E. Taylor and Jain, Manish and Yanquin Jin and Makoto Yooko and Tambe, Milind} }